Skip to content
20 changes: 10 additions & 10 deletions rust_snuba/src/processors/eap_items.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,16 +106,7 @@ impl TryFrom<TraceItem> for EAPItem {
Some(Value::DoubleValue(double)) => eap_item.attributes.insert_float(key, double),
Some(Value::IntValue(int)) => eap_item.attributes.insert_int(key, int),
Some(Value::BoolValue(bool)) => eap_item.attributes.insert_bool(key, bool),
Some(Value::ArrayValue(array)) => {
if get_str_config(INSERT_ARRAYS_CONFIG)
.ok()
.flatten()
.unwrap_or("0".to_string())
== "1"
{
eap_item.attributes.insert_array(key, array)
}
}
Some(Value::ArrayValue(array)) => eap_item.attributes.insert_array(key, array),
Some(Value::BytesValue(_)) => (),
Some(Value::KvlistValue(_)) => (),
None => (),
Expand Down Expand Up @@ -229,6 +220,15 @@ impl AttributeMap {
}

pub fn insert_array(&mut self, k: String, v: ArrayValue) {
if get_str_config(INSERT_ARRAYS_CONFIG)
.ok()
.flatten()
.unwrap_or("0".to_string())
!= "1"
{
return;
}

let mut values: Vec<EAPValue> = Vec::default();
for value in v.values {
match value.value {
Expand Down
137 changes: 109 additions & 28 deletions snuba/web/rpc/v1/endpoint_get_trace.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import random
import uuid
from datetime import datetime
Expand All @@ -12,7 +13,11 @@
GetTraceResponse,
)
from sentry_protos.snuba.v1.request_common_pb2 import PageToken, TraceItemType
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey, AttributeValue
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import (
Array,
AttributeKey,
AttributeValue,
)
from sentry_protos.snuba.v1.trace_item_filter_pb2 import (
AndFilter,
ComparisonFilter,
Expand Down Expand Up @@ -58,6 +63,8 @@
"project_id",
"trace_id",
"sampling_factor",
"attributes_bool",
"attributes_int",
]
APPLY_FINAL_ROLLOUT_PERCENTAGE_CONFIG_KEY = "EndpointGetTrace.apply_final_rollout_percentage"

Expand Down Expand Up @@ -204,21 +211,29 @@ def _build_query(
else:
selected_columns += [
SelectedExpression(
name=("attributes_string"),
name="attributes_string",
expression=FunctionCall(
("attributes_string"),
"mapConcat",
tuple(column(f"attributes_string_{i}") for i in range(40)),
),
),
SelectedExpression(
name=("attributes_float"),
name="attributes_float",
expression=FunctionCall(
("attributes_float"),
"mapConcat",
tuple(column(f"attributes_float_{i}") for i in range(40)),
),
),
SelectedExpression(
name="attributes_array",
expression=FunctionCall(
"attributes_array",
"toJSONString",
(column("attributes_array"),),
),
),
]
selected_columns.extend(
map(
Expand Down Expand Up @@ -363,46 +378,80 @@ def _build_snuba_request(
)


def convert_to_attribute_value(value: Any) -> AttributeValue:
if isinstance(value, bool):
return AttributeValue(
val_bool=value,
)
elif isinstance(value, int):
return AttributeValue(
val_int=value,
)
elif isinstance(value, float):
return AttributeValue(
val_double=value,
)
elif isinstance(value, str):
return AttributeValue(
val_str=value,
)
elif isinstance(value, list):
return AttributeValue(
val_array=Array(values=[convert_to_attribute_value(v) for v in value])
)
elif isinstance(value, datetime):
return AttributeValue(
val_double=value.timestamp(),
)
else:
raise BadSnubaRPCRequestException(f"data type unknown: {type(value)}")


def _value_to_attribute(key: str, value: Any) -> tuple[AttributeKey, AttributeValue]:
if isinstance(value, int):
if isinstance(value, bool):
return (
AttributeKey(
name=key,
type=AttributeKey.Type.TYPE_INT,
type=AttributeKey.Type.TYPE_BOOLEAN,
),
AttributeValue(
val_int=value,
convert_to_attribute_value(value),
)
elif isinstance(value, int):
return (
AttributeKey(
name=key,
type=AttributeKey.Type.TYPE_INT,
),
convert_to_attribute_value(value),
)
elif isinstance(value, float):
return (
AttributeKey(
name=key,
type=AttributeKey.Type.TYPE_DOUBLE,
),
AttributeValue(
val_double=value,
),
convert_to_attribute_value(value),
)
elif isinstance(value, str):
return (
AttributeKey(
name=key,
type=AttributeKey.Type.TYPE_STRING,
),
AttributeValue(
val_str=value,
),
convert_to_attribute_value(value),
)
elif isinstance(value, list):
return (
AttributeKey(name=key, type=AttributeKey.Type.TYPE_ARRAY),
convert_to_attribute_value(value),
)
elif isinstance(value, datetime):
return (
AttributeKey(
name=key,
type=AttributeKey.Type.TYPE_DOUBLE,
),
AttributeValue(
val_double=value.timestamp(),
),
convert_to_attribute_value(value),
)
else:
raise BadSnubaRPCRequestException(f"data type unknown: {type(value)}")
Expand All @@ -418,6 +467,25 @@ def _value_to_attribute(key: str, value: Any) -> tuple[AttributeKey, AttributeVa
)


def _transform_array_value(value: dict[str, str]) -> Any:
for t, v in value.items():
if t == "Int":
return int(v)
if t == "Double":
return float(v)
if t in {"String", "Bool"}:
return v
raise BadSnubaRPCRequestException(f"array value type unknown: {type(v)}")


def _process_arrays(raw: str) -> dict[str, list[Any]]:
parsed = json.loads(raw)
arrays = {}
for key, values in parsed.items():
arrays[key] = [_transform_array_value(v) for v in values]
return arrays
Comment on lines +481 to +486
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Missing NULL handling for attributes_array column when toJSONString(NULL) returns "null" causes AttributeError.
Severity: HIGH | Confidence: High

🔍 Detailed Analysis

When the attributes_array column from ClickHouse is NULL, toJSONString(NULL) returns the JSON string "null". The code at line 504, arrays = row.pop("attributes_array", "{}") or "{}", does not correctly handle this. json.loads("null") results in Python's None, which is then assigned to parsed. Subsequently, parsed.items() at line 484 will raise an AttributeError: 'NoneType' object has no attribute 'items', leading to a crash.

💡 Suggested Fix

After parsed = json.loads(raw), add a check to ensure parsed is not None. If parsed is None, return an empty dictionary {} instead of proceeding to iterate over parsed.items().

🤖 Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: snuba/web/rpc/v1/endpoint_get_trace.py#L481-L486

Potential issue: When the `attributes_array` column from ClickHouse is `NULL`,
`toJSONString(NULL)` returns the JSON string `"null"`. The code at line 504, `arrays =
row.pop("attributes_array", "{}") or "{}"`, does not correctly handle this.
`json.loads("null")` results in Python's `None`, which is then assigned to `parsed`.
Subsequently, `parsed.items()` at line 484 will raise an `AttributeError: 'NoneType'
object has no attribute 'items'`, leading to a crash.

Did we get this right? 👍 / 👎 to inform future reviews.
Reference ID: 4393405



def _process_results(
data: Iterable[Dict[str, Any]],
) -> ProcessedResults:
Expand All @@ -433,6 +501,11 @@ def _process_results(
for row in data:
id = row.pop("id")
ts = row.pop("timestamp")
arrays = row.pop("attributes_array", "{}") or "{}"
# We want to merge these values after to overwrite potential floats
# with the same name.
booleans = row.pop("attributes_bool", {}) or {}
integers = row.pop("attributes_int", {}) or {}
last_seen_timestamp_precise = float(ts)
last_seen_id = id

Expand All @@ -441,29 +514,37 @@ def _process_results(
# then transform to nanoseconds
timestamp.FromNanoseconds(int(ts * 1e6) * 1000)

attributes: list[GetTraceResponse.Item.Attribute] = []
attributes: dict[str, GetTraceResponse.Item.Attribute] = {}

def add_attribute(key: str, value: Any) -> None:
attribute_key, attribute_value = _value_to_attribute(key, value)
attributes.append(
GetTraceResponse.Item.Attribute(
key=attribute_key,
value=attribute_value,
)
attributes[key] = GetTraceResponse.Item.Attribute(
key=attribute_key,
value=attribute_value,
)

for key, value in row.items():
if isinstance(value, dict):
for k, v in value.items():
add_attribute(k, v)
for row_key, row_value in row.items():
if isinstance(row_value, dict):
for column_key, column_value in row_value.items():
add_attribute(column_key, column_value)
else:
add_attribute(key, value)
add_attribute(row_key, row_value)

attributes_array = _process_arrays(arrays)
for array_key, array_value in attributes_array.items():
add_attribute(array_key, array_value)

for bool_key, bool_value in booleans.items():
add_attribute(bool_key, bool_value)

for int_key, int_value in integers.items():
add_attribute(int_key, int_value)

item = GetTraceResponse.Item(
id=id,
timestamp=timestamp,
attributes=sorted(
attributes,
attributes.values(),
key=attrgetter("key.name"),
),
)
Expand Down
12 changes: 12 additions & 0 deletions tests/datasets/configuration/test_storage_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Any

from snuba.clickhouse.columns import (
JSON,
Array,
Bool,
Column,
Expand Down Expand Up @@ -196,6 +197,13 @@ def test_column_parser(self) -> None:
"schema_modifiers": ["nullable"],
},
},
{
"name": "json_col",
"type": "JSON",
"args": {
"max_dynamic_paths": 128,
},
},
]

expected_python_columns = [
Expand All @@ -222,6 +230,10 @@ def test_column_parser(self) -> None:
SchemaModifiers(nullable=True, readonly=False),
),
),
Column(
"json_col",
JSON(max_dynamic_paths=128),
),
]

assert parse_columns(serialized_columns) == expected_python_columns
Loading
Loading