Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions src/boring_semantic_layer/measure_scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,47 @@
from toolz import curry


def _has_prefixed_columns(tbl, name: str) -> bool:
"""Check if table has columns with the given prefix (e.g., 'flights.' prefix)."""
if not hasattr(tbl, "columns"):
return False
prefix = f"{name}."
return any(c.startswith(prefix) for c in tbl.columns)


class _ColumnPrefixProxy:
"""Proxy for navigating prefixed column names on joined ibis tables.

Supports chained attribute access like ``t.flights.carrier`` which resolves
to ``table["flights.carrier"]`` when the table has columns with the
``"flights."`` prefix (typical after joins).
"""

__slots__ = ("_tbl", "_prefix")

def __init__(self, tbl, prefix: str):
object.__setattr__(self, "_tbl", tbl)
object.__setattr__(self, "_prefix", prefix)

def __getattr__(self, name: str):
full_name = f"{self._prefix}.{name}"
if hasattr(self._tbl, "columns") and full_name in self._tbl.columns:
return self._tbl[full_name]
raise AttributeError(
f"No column '{full_name}' found on the table. "
f"Available columns with prefix '{self._prefix}.': "
f"{[c for c in (self._tbl.columns if hasattr(self._tbl, 'columns') else []) if c.startswith(self._prefix + '.')]}"
)

def __getitem__(self, name: str):
full_name = f"{self._prefix}.{name}"
if hasattr(self._tbl, "columns") and full_name in self._tbl.columns:
return self._tbl[full_name]
raise KeyError(
f"No column '{full_name}' found on the table."
)


class _PendingMethodCall:
"""Captures a method access on a calc-measure AST node, waiting for ``()``."""

Expand Down Expand Up @@ -268,6 +309,8 @@ def __getattr__(self, name: str):
)

if self.post_agg:
if _has_prefixed_columns(self.tbl, name):
return _ColumnPrefixProxy(self.tbl, name)
return _resolve_column_short_name(self.tbl, name)

maybe_measure = _resolve_measure_name(name, self.known, self.known_set).map(MeasureRef)
Expand All @@ -277,6 +320,10 @@ def __getattr__(self, name: str):
if hasattr(self.tbl, "columns") and name in self.tbl.columns:
return DeferredColumn(name, self.tbl)

# Support prefix navigation for joined tables (e.g., t.flights.carrier)
if _has_prefixed_columns(self.tbl, name):
return _ColumnPrefixProxy(self.tbl, name)

return _resolve_column_short_name(self.tbl, name)

def __getitem__(self, name: str):
Expand Down Expand Up @@ -337,6 +384,10 @@ def __getattr__(self, name: str):
proxy = create_table_proxy(self.tbl)
return getattr(proxy, name)

# Support prefix navigation for joined tables (e.g., t.flights.carrier)
if _has_prefixed_columns(self.tbl, name):
return _ColumnPrefixProxy(self.tbl, name)

return getattr(self.tbl, name)

def __getitem__(self, name: str):
Expand Down
71 changes: 69 additions & 2 deletions src/boring_semantic_layer/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,12 @@ def resolve_one(dim_name: str, current_tbl: ir.Table) -> ir.Table:
try:
while True:
try:
dim_expr = merged_dimensions[dim_name](current_tbl)
dim_fn = merged_dimensions[dim_name]
dim_expr = (
dim_fn(current_tbl, _dims=merged_dimensions)
if isinstance(dim_fn, Dimension)
else dim_fn(current_tbl)
)
return current_tbl.mutate(**{dim_name: dim_expr})
except Exception as exc:
missing = _extract_missing_column_name(exc)
Expand Down Expand Up @@ -844,6 +849,57 @@ def _format_column_error(e: AttributeError, table: ir.Table) -> str:
return " ".join(parts)


class _DimPrefixProxy:
"""Resolves ``proxy.column`` to ``dims["prefix.column"](table)``."""

__slots__ = ("_tbl", "_dims", "_prefix")

def __init__(self, tbl, dims: dict, prefix: str):
object.__setattr__(self, "_tbl", tbl)
object.__setattr__(self, "_dims", dims)
object.__setattr__(self, "_prefix", prefix)

def __getattr__(self, name: str):
full_name = f"{self._prefix}.{name}"
if full_name in self._dims:
return self._dims[full_name](self._tbl)
raise AttributeError(
f"No dimension '{full_name}' found. "
f"Available dimensions with prefix '{self._prefix}.': "
f"{[k for k in self._dims if k.startswith(self._prefix + '.')]}"
)


class _DimensionTableProxy:
"""Proxy that wraps an ibis table to support model-prefix navigation.

Allows dimension lambdas like ``lambda t: t.flights.carrier`` to work on
joined tables by resolving ``t.flights.carrier`` through the merged
dimension map (``dims["flights.carrier"](table)``).
"""

__slots__ = ("_tbl", "_dims")

def __init__(self, tbl, dims: dict):
object.__setattr__(self, "_tbl", tbl)
object.__setattr__(self, "_dims", dims)

def __getattr__(self, name: str):
prefix = f"{name}."
if any(k.startswith(prefix) for k in self._dims):
return _DimPrefixProxy(self._tbl, self._dims, name)
return getattr(self._tbl, name)

def __getitem__(self, name: str):
if name in self._dims:
return self._dims[name](self._tbl)
return self._tbl[name]

@property
def columns(self):
return self._tbl.columns


@frozen(kw_only=True, slots=True)
class Dimension:
expr: Callable[[ir.Table], ir.Value] | Deferred
Expand All @@ -853,10 +909,21 @@ class Dimension:
is_event_timestamp: bool = False
smallest_time_grain: str | None = None

def __call__(self, table: ir.Table) -> ir.Value:
def __call__(self, table: ir.Table, _dims: dict | None = None) -> ir.Value:
try:
return self.expr.resolve(table) if _is_deferred(self.expr) else self.expr(table)
except AttributeError as e:
# Retry with a prefix-aware proxy for joined tables where
# model prefixes are used (e.g., lambda t: t.flights.carrier)
if _dims and not _is_deferred(self.expr) and callable(self.expr):
try:
proxy = _DimensionTableProxy(table, _dims)
return self.expr(proxy)
except AttributeError:
# Re-raise proxy errors so dependency resolution can detect them
raise
except Exception:
pass
# Provide helpful error for missing columns
if "'Table' object has no attribute" in str(
e
Expand Down
14 changes: 12 additions & 2 deletions src/boring_semantic_layer/projection_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,12 +227,22 @@ def _extract_requirement_for_key(
if cols:
return apply_requirements_to_tables(current_reqs, table_names, cols)
elif isinstance(result, Failure):
# If the failure is about a dimension validation error, raise it immediately
# If the failure is about a dimension validation error, re-raise
# unless the dimension uses model-prefixed access (e.g., t.flights.carrier)
# which legitimately fails on the raw table but works with a proxy
exc = result.failure()
if isinstance(
exc, AttributeError
) and "Dimension expression references non-existent column" in str(exc):
raise exc
# Check if the missing column could be a model prefix
err_str = str(exc)
is_prefix = False
for tname in table_names:
if f"'{tname}'" in err_str:
is_prefix = True
break
if not is_prefix:
raise exc
return current_reqs

# If not a dimension and we have a table prefix, assume col_name is a direct column reference
Expand Down
36 changes: 36 additions & 0 deletions src/boring_semantic_layer/tests/test_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -1303,6 +1303,7 @@ def test_from_config_with_filter_and_joins():
assert len(result) == 3


# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# Issue #114: self-joins in YAML
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -1370,3 +1371,38 @@ def test_yaml_self_joins(duckdb_conn):
assert len(df) == 3
assert "origin_airport.city" in df.columns
assert "destination_airport.city" in df.columns


# Issue #136: model prefix in with_dimensions/with_measures
# ---------------------------------------------------------------------------


def test_model_prefix_in_with_dimensions(duckdb_conn):
"""Test model prefix works in with_dimensions after join (#136)."""
from boring_semantic_layer import to_semantic_table

carriers = duckdb_conn.create_table(
"carriers_136", {"code": ["AA", "UA"], "name": ["American", "United"]}
)
flights = duckdb_conn.create_table(
"flights_136", {"carrier": ["AA", "UA", "AA"], "distance": [100, 200, 300]}
)

carriers_st = to_semantic_table(carriers, name="carriers").with_dimensions(
code=lambda t: t.code, name=lambda t: t.name
)
flights_st = (
to_semantic_table(flights, name="flights")
.with_dimensions(carrier=lambda t: t.carrier)
.with_measures(total_distance=lambda t: t.distance.sum())
)

joined = flights_st.join_one(carriers_st, on=lambda l, r: l.carrier == r.code)

# Model prefix should work in with_dimensions after join
result = joined.with_dimensions(carrier_name=lambda t: t.carriers.name)
df = result.group_by("carrier_name").aggregate("total_distance").execute()

assert len(df) == 2
assert "carrier_name" in df.columns
assert set(df["carrier_name"]) == {"American", "United"}
Loading