diff --git a/src/boring_semantic_layer/measure_scope.py b/src/boring_semantic_layer/measure_scope.py index 8a740da..5c53f26 100644 --- a/src/boring_semantic_layer/measure_scope.py +++ b/src/boring_semantic_layer/measure_scope.py @@ -8,6 +8,47 @@ from toolz import curry +def _has_prefixed_columns(tbl, name: str) -> bool: + """Check if table has columns with the given prefix (e.g., 'flights.' prefix).""" + if not hasattr(tbl, "columns"): + return False + prefix = f"{name}." + return any(c.startswith(prefix) for c in tbl.columns) + + +class _ColumnPrefixProxy: + """Proxy for navigating prefixed column names on joined ibis tables. + + Supports chained attribute access like ``t.flights.carrier`` which resolves + to ``table["flights.carrier"]`` when the table has columns with the + ``"flights."`` prefix (typical after joins). + """ + + __slots__ = ("_tbl", "_prefix") + + def __init__(self, tbl, prefix: str): + object.__setattr__(self, "_tbl", tbl) + object.__setattr__(self, "_prefix", prefix) + + def __getattr__(self, name: str): + full_name = f"{self._prefix}.{name}" + if hasattr(self._tbl, "columns") and full_name in self._tbl.columns: + return self._tbl[full_name] + raise AttributeError( + f"No column '{full_name}' found on the table. " + f"Available columns with prefix '{self._prefix}.': " + f"{[c for c in (self._tbl.columns if hasattr(self._tbl, 'columns') else []) if c.startswith(self._prefix + '.')]}" + ) + + def __getitem__(self, name: str): + full_name = f"{self._prefix}.{name}" + if hasattr(self._tbl, "columns") and full_name in self._tbl.columns: + return self._tbl[full_name] + raise KeyError( + f"No column '{full_name}' found on the table." + ) + + class _PendingMethodCall: """Captures a method access on a calc-measure AST node, waiting for ``()``.""" @@ -268,6 +309,8 @@ def __getattr__(self, name: str): ) if self.post_agg: + if _has_prefixed_columns(self.tbl, name): + return _ColumnPrefixProxy(self.tbl, name) return _resolve_column_short_name(self.tbl, name) maybe_measure = _resolve_measure_name(name, self.known, self.known_set).map(MeasureRef) @@ -277,6 +320,10 @@ def __getattr__(self, name: str): if hasattr(self.tbl, "columns") and name in self.tbl.columns: return DeferredColumn(name, self.tbl) + # Support prefix navigation for joined tables (e.g., t.flights.carrier) + if _has_prefixed_columns(self.tbl, name): + return _ColumnPrefixProxy(self.tbl, name) + return _resolve_column_short_name(self.tbl, name) def __getitem__(self, name: str): @@ -337,6 +384,10 @@ def __getattr__(self, name: str): proxy = create_table_proxy(self.tbl) return getattr(proxy, name) + # Support prefix navigation for joined tables (e.g., t.flights.carrier) + if _has_prefixed_columns(self.tbl, name): + return _ColumnPrefixProxy(self.tbl, name) + return getattr(self.tbl, name) def __getitem__(self, name: str): diff --git a/src/boring_semantic_layer/ops.py b/src/boring_semantic_layer/ops.py index 6c24d91..980b949 100644 --- a/src/boring_semantic_layer/ops.py +++ b/src/boring_semantic_layer/ops.py @@ -490,7 +490,12 @@ def resolve_one(dim_name: str, current_tbl: ir.Table) -> ir.Table: try: while True: try: - dim_expr = merged_dimensions[dim_name](current_tbl) + dim_fn = merged_dimensions[dim_name] + dim_expr = ( + dim_fn(current_tbl, _dims=merged_dimensions) + if isinstance(dim_fn, Dimension) + else dim_fn(current_tbl) + ) return current_tbl.mutate(**{dim_name: dim_expr}) except Exception as exc: missing = _extract_missing_column_name(exc) @@ -844,6 +849,57 @@ def _format_column_error(e: AttributeError, table: ir.Table) -> str: return " ".join(parts) +class _DimPrefixProxy: + """Resolves ``proxy.column`` to ``dims["prefix.column"](table)``.""" + + __slots__ = ("_tbl", "_dims", "_prefix") + + def __init__(self, tbl, dims: dict, prefix: str): + object.__setattr__(self, "_tbl", tbl) + object.__setattr__(self, "_dims", dims) + object.__setattr__(self, "_prefix", prefix) + + def __getattr__(self, name: str): + full_name = f"{self._prefix}.{name}" + if full_name in self._dims: + return self._dims[full_name](self._tbl) + raise AttributeError( + f"No dimension '{full_name}' found. " + f"Available dimensions with prefix '{self._prefix}.': " + f"{[k for k in self._dims if k.startswith(self._prefix + '.')]}" + ) + + +class _DimensionTableProxy: + """Proxy that wraps an ibis table to support model-prefix navigation. + + Allows dimension lambdas like ``lambda t: t.flights.carrier`` to work on + joined tables by resolving ``t.flights.carrier`` through the merged + dimension map (``dims["flights.carrier"](table)``). + """ + + __slots__ = ("_tbl", "_dims") + + def __init__(self, tbl, dims: dict): + object.__setattr__(self, "_tbl", tbl) + object.__setattr__(self, "_dims", dims) + + def __getattr__(self, name: str): + prefix = f"{name}." + if any(k.startswith(prefix) for k in self._dims): + return _DimPrefixProxy(self._tbl, self._dims, name) + return getattr(self._tbl, name) + + def __getitem__(self, name: str): + if name in self._dims: + return self._dims[name](self._tbl) + return self._tbl[name] + + @property + def columns(self): + return self._tbl.columns + + @frozen(kw_only=True, slots=True) class Dimension: expr: Callable[[ir.Table], ir.Value] | Deferred @@ -853,10 +909,21 @@ class Dimension: is_event_timestamp: bool = False smallest_time_grain: str | None = None - def __call__(self, table: ir.Table) -> ir.Value: + def __call__(self, table: ir.Table, _dims: dict | None = None) -> ir.Value: try: return self.expr.resolve(table) if _is_deferred(self.expr) else self.expr(table) except AttributeError as e: + # Retry with a prefix-aware proxy for joined tables where + # model prefixes are used (e.g., lambda t: t.flights.carrier) + if _dims and not _is_deferred(self.expr) and callable(self.expr): + try: + proxy = _DimensionTableProxy(table, _dims) + return self.expr(proxy) + except AttributeError: + # Re-raise proxy errors so dependency resolution can detect them + raise + except Exception: + pass # Provide helpful error for missing columns if "'Table' object has no attribute" in str( e diff --git a/src/boring_semantic_layer/projection_utils.py b/src/boring_semantic_layer/projection_utils.py index 6eec74d..8651be4 100644 --- a/src/boring_semantic_layer/projection_utils.py +++ b/src/boring_semantic_layer/projection_utils.py @@ -227,12 +227,22 @@ def _extract_requirement_for_key( if cols: return apply_requirements_to_tables(current_reqs, table_names, cols) elif isinstance(result, Failure): - # If the failure is about a dimension validation error, raise it immediately + # If the failure is about a dimension validation error, re-raise + # unless the dimension uses model-prefixed access (e.g., t.flights.carrier) + # which legitimately fails on the raw table but works with a proxy exc = result.failure() if isinstance( exc, AttributeError ) and "Dimension expression references non-existent column" in str(exc): - raise exc + # Check if the missing column could be a model prefix + err_str = str(exc) + is_prefix = False + for tname in table_names: + if f"'{tname}'" in err_str: + is_prefix = True + break + if not is_prefix: + raise exc return current_reqs # If not a dimension and we have a table prefix, assume col_name is a direct column reference diff --git a/src/boring_semantic_layer/tests/test_yaml.py b/src/boring_semantic_layer/tests/test_yaml.py index 3c72577..a6b64ab 100644 --- a/src/boring_semantic_layer/tests/test_yaml.py +++ b/src/boring_semantic_layer/tests/test_yaml.py @@ -1303,6 +1303,7 @@ def test_from_config_with_filter_and_joins(): assert len(result) == 3 +# --------------------------------------------------------------------------- # --------------------------------------------------------------------------- # Issue #114: self-joins in YAML # --------------------------------------------------------------------------- @@ -1370,3 +1371,38 @@ def test_yaml_self_joins(duckdb_conn): assert len(df) == 3 assert "origin_airport.city" in df.columns assert "destination_airport.city" in df.columns + + +# Issue #136: model prefix in with_dimensions/with_measures +# --------------------------------------------------------------------------- + + +def test_model_prefix_in_with_dimensions(duckdb_conn): + """Test model prefix works in with_dimensions after join (#136).""" + from boring_semantic_layer import to_semantic_table + + carriers = duckdb_conn.create_table( + "carriers_136", {"code": ["AA", "UA"], "name": ["American", "United"]} + ) + flights = duckdb_conn.create_table( + "flights_136", {"carrier": ["AA", "UA", "AA"], "distance": [100, 200, 300]} + ) + + carriers_st = to_semantic_table(carriers, name="carriers").with_dimensions( + code=lambda t: t.code, name=lambda t: t.name + ) + flights_st = ( + to_semantic_table(flights, name="flights") + .with_dimensions(carrier=lambda t: t.carrier) + .with_measures(total_distance=lambda t: t.distance.sum()) + ) + + joined = flights_st.join_one(carriers_st, on=lambda l, r: l.carrier == r.code) + + # Model prefix should work in with_dimensions after join + result = joined.with_dimensions(carrier_name=lambda t: t.carriers.name) + df = result.group_by("carrier_name").aggregate("total_distance").execute() + + assert len(df) == 2 + assert "carrier_name" in df.columns + assert set(df["carrier_name"]) == {"American", "United"}