|
8 | 8 | capture_internal_exception, |
9 | 9 | disable_capture_event, |
10 | 10 | format_timestamp, |
11 | | - json_dumps, |
12 | 11 | safe_repr, |
13 | 12 | strip_string, |
14 | 13 | ) |
15 | | - |
16 | | -import sentry_sdk.utils |
17 | | - |
18 | 14 | from sentry_sdk._compat import ( |
19 | 15 | text_type, |
20 | 16 | PY2, |
|
23 | 19 | iteritems, |
24 | 20 | binary_sequence_types, |
25 | 21 | ) |
26 | | - |
27 | 22 | from sentry_sdk._types import TYPE_CHECKING |
28 | 23 |
|
29 | 24 | if TYPE_CHECKING: |
30 | | - from datetime import timedelta |
31 | | - |
32 | 25 | from types import TracebackType |
33 | 26 |
|
34 | 27 | from typing import Any |
|
37 | 30 | from typing import Dict |
38 | 31 | from typing import List |
39 | 32 | from typing import Optional |
40 | | - from typing import Tuple |
41 | 33 | from typing import Type |
42 | 34 | from typing import Union |
43 | 35 |
|
@@ -120,12 +112,11 @@ def __exit__( |
120 | 112 | self._ids.pop(id(self._objs.pop()), None) |
121 | 113 |
|
122 | 114 |
|
123 | | -def serialize(event, smart_transaction_trimming=False, **kwargs): |
124 | | - # type: (Event, bool, **Any) -> Event |
| 115 | +def serialize(event, **kwargs): |
| 116 | + # type: (Event, **Any) -> Event |
125 | 117 | memo = Memo() |
126 | 118 | path = [] # type: List[Segment] |
127 | 119 | meta_stack = [] # type: List[Dict[str, Any]] |
128 | | - span_description_bytes = [] # type: List[int] |
129 | 120 |
|
130 | 121 | def _annotate(**meta): |
131 | 122 | # type: (**Any) -> None |
@@ -365,113 +356,23 @@ def _serialize_node_impl( |
365 | 356 | if not isinstance(obj, string_types): |
366 | 357 | obj = safe_repr(obj) |
367 | 358 |
|
368 | | - # Allow span descriptions to be longer than other strings. |
369 | | - # |
370 | | - # For database auto-instrumented spans, the description contains |
371 | | - # potentially long SQL queries that are most useful when not truncated. |
372 | | - # Because arbitrarily large events may be discarded by the server as a |
373 | | - # protection mechanism, we dynamically limit the description length |
374 | | - # later in _truncate_span_descriptions. |
375 | | - if ( |
376 | | - smart_transaction_trimming |
377 | | - and len(path) == 3 |
378 | | - and path[0] == "spans" |
379 | | - and path[-1] == "description" |
380 | | - ): |
381 | | - span_description_bytes.append(len(obj)) |
| 359 | + is_span_description = ( |
| 360 | + len(path) == 3 and path[0] == "spans" and path[-1] == "description" |
| 361 | + ) |
| 362 | + if is_span_description: |
382 | 363 | return obj |
383 | | - return _flatten_annotated(strip_string(obj)) |
384 | 364 |
|
385 | | - def _truncate_span_descriptions(serialized_event, event, excess_bytes): |
386 | | - # type: (Event, Event, int) -> None |
387 | | - """ |
388 | | - Modifies serialized_event in-place trying to remove excess_bytes from |
389 | | - span descriptions. The original event is used read-only to access the |
390 | | - span timestamps (represented as RFC3399-formatted strings in |
391 | | - serialized_event). |
392 | | -
|
393 | | - It uses heuristics to prioritize preserving the description of spans |
394 | | - that might be the most interesting ones in terms of understanding and |
395 | | - optimizing performance. |
396 | | - """ |
397 | | - # When truncating a description, preserve a small prefix. |
398 | | - min_length = 10 |
399 | | - |
400 | | - def shortest_duration_longest_description_first(args): |
401 | | - # type: (Tuple[int, Span]) -> Tuple[timedelta, int] |
402 | | - i, serialized_span = args |
403 | | - span = event["spans"][i] |
404 | | - now = datetime.utcnow() |
405 | | - start = span.get("start_timestamp") or now |
406 | | - end = span.get("timestamp") or now |
407 | | - duration = end - start |
408 | | - description = serialized_span.get("description") or "" |
409 | | - return (duration, -len(description)) |
410 | | - |
411 | | - # Note: for simplicity we sort spans by exact duration and description |
412 | | - # length. If ever needed, we could have a more involved heuristic, e.g. |
413 | | - # replacing exact durations with "buckets" and/or looking at other span |
414 | | - # properties. |
415 | | - path.append("spans") |
416 | | - for i, span in sorted( |
417 | | - enumerate(serialized_event.get("spans") or []), |
418 | | - key=shortest_duration_longest_description_first, |
419 | | - ): |
420 | | - description = span.get("description") or "" |
421 | | - if len(description) <= min_length: |
422 | | - continue |
423 | | - excess_bytes -= len(description) - min_length |
424 | | - path.extend([i, "description"]) |
425 | | - # Note: the last time we call strip_string we could preserve a few |
426 | | - # more bytes up to a total length of MAX_EVENT_BYTES. Since that's |
427 | | - # not strictly required, we leave it out for now for simplicity. |
428 | | - span["description"] = _flatten_annotated( |
429 | | - strip_string(description, max_length=min_length) |
430 | | - ) |
431 | | - del path[-2:] |
432 | | - del meta_stack[len(path) + 1 :] |
433 | | - |
434 | | - if excess_bytes <= 0: |
435 | | - break |
436 | | - path.pop() |
437 | | - del meta_stack[len(path) + 1 :] |
| 365 | + return _flatten_annotated(strip_string(obj)) |
438 | 366 |
|
| 367 | + # |
| 368 | + # Start of serialize() function |
| 369 | + # |
439 | 370 | disable_capture_event.set(True) |
440 | 371 | try: |
441 | | - rv = _serialize_node(event, **kwargs) |
442 | | - if meta_stack and isinstance(rv, dict): |
443 | | - rv["_meta"] = meta_stack[0] |
444 | | - |
445 | | - sum_span_description_bytes = sum(span_description_bytes) |
446 | | - if smart_transaction_trimming and sum_span_description_bytes > 0: |
447 | | - span_count = len(event.get("spans") or []) |
448 | | - # This is an upper bound of how many bytes all descriptions would |
449 | | - # consume if the usual string truncation in _serialize_node_impl |
450 | | - # would have taken place, not accounting for the metadata attached |
451 | | - # as event["_meta"]. |
452 | | - descriptions_budget_bytes = span_count * sentry_sdk.utils.MAX_STRING_LENGTH |
453 | | - |
454 | | - # If by not truncating descriptions we ended up with more bytes than |
455 | | - # per the usual string truncation, check if the event is too large |
456 | | - # and we need to truncate some descriptions. |
457 | | - # |
458 | | - # This is guarded with an if statement to avoid JSON-encoding the |
459 | | - # event unnecessarily. |
460 | | - if sum_span_description_bytes > descriptions_budget_bytes: |
461 | | - original_bytes = len(json_dumps(rv)) |
462 | | - excess_bytes = original_bytes - MAX_EVENT_BYTES |
463 | | - if excess_bytes > 0: |
464 | | - # Event is too large, will likely be discarded by the |
465 | | - # server. Trim it down before sending. |
466 | | - _truncate_span_descriptions(rv, event, excess_bytes) |
467 | | - |
468 | | - # Span descriptions truncated, set or reset _meta. |
469 | | - # |
470 | | - # We run the same code earlier because we want to account |
471 | | - # for _meta when calculating original_bytes, the number of |
472 | | - # bytes in the JSON-encoded event. |
473 | | - if meta_stack and isinstance(rv, dict): |
474 | | - rv["_meta"] = meta_stack[0] |
475 | | - return rv |
| 372 | + serialized_event = _serialize_node(event, **kwargs) |
| 373 | + if meta_stack and isinstance(serialized_event, dict): |
| 374 | + serialized_event["_meta"] = meta_stack[0] |
| 375 | + |
| 376 | + return serialized_event |
476 | 377 | finally: |
477 | 378 | disable_capture_event.set(False) |
0 commit comments