Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# SPDX-FileCopyrightText: 2024-present MTS PJSC
# SPDX-License-Identifier: Apache-2.0
"""Add external_url and external_id for datasets

Revision ID: 947c82ba59ba
Revises: 4e119cb7481e
Create Date: 2026-04-07 14:16:26.411705

"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "947c82ba59ba"
down_revision = "4e119cb7481e"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.add_column("dataset", sa.Column("external_id", sa.String(), nullable=True))
op.add_column("dataset", sa.Column("external_url", sa.String(), nullable=True))


def downgrade() -> None:
op.drop_column("dataset", "external_url")
op.drop_column("dataset", "external_id")
10 changes: 10 additions & 0 deletions data_rentgen/db/models/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@ class Dataset(Base):
lazy="noload",
doc="Dataset tag values",
)
external_id: Mapped[str | None] = mapped_column(
String,
nullable=True,
doc="External ID for integration with other systems",
)
external_url: Mapped[str | None] = mapped_column(
String,
nullable=True,
doc="External link to other systems",
)

search_vector: Mapped[str] = mapped_column(
TSVECTOR,
Expand Down
2 changes: 2 additions & 0 deletions data_rentgen/server/schemas/v1/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class DatasetResponseV1(BaseModel):
id: str = Field(description="Dataset id", coerce_numbers_to_str=True)
location: LocationResponseV1 = Field(description="Corresponding Location")
name: str = Field(description="Dataset name")
external_id: str | None = Field(description="External ID for integration with other systems")
external_url: str | None = Field(description="Link to dataset in a external system")
schema: DatasetSchemaV1 | None = Field( # type: ignore[assignment]
description="Schema",
default=None,
Expand Down
4 changes: 4 additions & 0 deletions data_rentgen/server/services/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ class DatasetData:
id: int
name: str
location: Location
external_id: str | None
external_url: str | None


@dataclass
Expand Down Expand Up @@ -66,6 +68,8 @@ async def paginate(
id=dataset.id,
name=dataset.name,
location=dataset.location,
external_id=dataset.external_id,
external_url=dataset.external_url,
),
tags=[
TagData(
Expand Down
4 changes: 4 additions & 0 deletions data_rentgen/server/utils/lineage_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,8 @@ def _get_datasets(
id=str(dataset.id),
location=LocationResponseV1.model_validate(dataset.location),
name=dataset.name,
external_id=dataset.external_id,
external_url=dataset.external_url,
schema=schema,
)
return datasets
Expand Down Expand Up @@ -330,6 +332,8 @@ def _get_datasets_with_dataset_granularity(
id=str(dataset.id),
location=LocationResponseV1.model_validate(dataset.location),
name=dataset.name,
external_id=dataset.external_id,
external_url=dataset.external_url,
schema=schema,
)
return datasets
Expand Down
1 change: 1 addition & 0 deletions docs/changelog/next_release/432.improvement.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added optional ``external_id`` and ``external_url`` fields on datasets (database, API responses) for linking datasets to external systems.
2 changes: 2 additions & 0 deletions tests/test_server/fixtures/factories/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ def dataset_factory(**kwargs):
"id": randint(0, 10000000),
"location_id": randint(0, 10000000),
"name": random_string(32),
"external_id": random_string(),
"external_url": None,
}

data.update(kwargs)
Expand Down
4 changes: 4 additions & 0 deletions tests/test_server/test_lineage/test_column_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1342,6 +1342,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_and_column_lineage(
"name": dataset.name,
"location": location_to_json(dataset.location),
"schema": schema_to_json(lineage.outputs[0].schema, "EXACT_MATCH"),
"external_id": dataset.external_id,
"external_url": dataset.external_url,
}
for dataset in datasets
},
Expand Down Expand Up @@ -1482,6 +1484,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_and_column_lineage_f
"name": dataset.name,
"location": location_to_json(dataset.location),
"schema": schema_to_json(lineage.outputs[0].schema, "EXACT_MATCH"),
"external_id": dataset.external_id,
"external_url": dataset.external_url,
}
for dataset in datasets
},
Expand Down
22 changes: 22 additions & 0 deletions tests/test_server/test_lineage/test_dataset_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ async def test_get_dataset_lineage_with_granularity_dataset(
"name": dataset.name,
"location": location_to_json(dataset.location),
"schema": schema_to_json(lineage.inputs[0].schema, "EXACT_MATCH"),
"external_id": dataset.external_id,
"external_url": dataset.external_url,
}
for dataset in datasets
},
Expand Down Expand Up @@ -417,6 +419,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_and_direction(
"name": dataset.name,
"location": location_to_json(dataset.location),
"schema": schema_to_json(lineage.inputs[0].schema, "EXACT_MATCH"),
"external_id": dataset.external_id,
"external_url": dataset.external_url,
}
for dataset in datasets
},
Expand Down Expand Up @@ -485,6 +489,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_and_depth(
"name": dataset.name,
"location": location_to_json(dataset.location),
"schema": schema_to_json(lineage.inputs[0].schema, "EXACT_MATCH"),
"external_id": dataset.external_id,
"external_url": dataset.external_url,
}
for dataset in datasets
},
Expand Down Expand Up @@ -563,6 +569,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_and_symlinks(
if dataset.id in inputs_by_dataset_id or dataset.id in outputs_by_dataset_id
else None
),
"external_id": dataset.external_id,
"external_url": dataset.external_url,
}
for dataset in datasets
},
Expand Down Expand Up @@ -633,6 +641,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_and_until(
"name": dataset.name,
"location": location_to_json(dataset.location),
"schema": schema_to_json(lineage.inputs[0].schema, "EXACT_MATCH"),
"external_id": dataset.external_id,
"external_url": dataset.external_url,
}
for dataset in datasets
},
Expand Down Expand Up @@ -1495,6 +1505,8 @@ async def test_get_dataset_lineage_unmergeable_schema_and_output_type(
"name": dataset.name,
"location": location_to_json(dataset.location),
"schema": schema_to_json(response_schema, "LATEST_KNOWN"),
"external_id": dataset.external_id,
"external_url": dataset.external_url,
},
},
"jobs": jobs_to_json(jobs),
Expand Down Expand Up @@ -1685,18 +1697,24 @@ async def test_get_dataset_lineage_with_granularity_dataset_without_output_schem
"name": lineage_dataset.name,
"location": location_to_json(lineage_dataset.location),
"schema": schema_to_json(response_schema, "EXACT_MATCH"),
"external_id": lineage_dataset.external_id,
"external_url": lineage_dataset.external_url,
},
str(datasets[0].id): {
"id": str(datasets[0].id),
"name": datasets[0].name,
"location": location_to_json(datasets[0].location),
"schema": schema_to_json(lineage.inputs[0].schema, "EXACT_MATCH"),
"external_id": datasets[0].external_id,
"external_url": datasets[0].external_url,
},
str(datasets[2].id): {
"id": str(datasets[2].id),
"name": datasets[2].name,
"location": location_to_json(datasets[2].location),
"schema": schema_to_json(lineage.inputs[0].schema, "EXACT_MATCH"),
"external_id": datasets[2].external_id,
"external_url": datasets[2].external_url,
},
},
"jobs": {},
Expand Down Expand Up @@ -1749,6 +1767,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_ignore_self_referenc
"name": dataset.name,
"location": location_to_json(dataset.location),
"schema": None,
"external_id": dataset.external_id,
"external_url": dataset.external_url,
},
},
"jobs": {},
Expand Down Expand Up @@ -1802,6 +1822,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_ignore_not_connected
"name": dataset.name,
"location": location_to_json(dataset.location),
"schema": None,
"external_id": dataset.external_id,
"external_url": dataset.external_url,
},
},
"jobs": {},
Expand Down
2 changes: 2 additions & 0 deletions tests/test_server/utils/convert_to_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@ def dataset_to_json(
"name": dataset.name,
"location": location_to_json(dataset.location),
"schema": schema,
"external_id": dataset.external_id,
"external_url": dataset.external_url,
}


Expand Down
Loading