diff --git a/src/cloudimagedirectory/filter/filter.py b/src/cloudimagedirectory/filter/filter.py index bef66b06..faae22b5 100644 --- a/src/cloudimagedirectory/filter/filter.py +++ b/src/cloudimagedirectory/filter/filter.py @@ -32,16 +32,16 @@ def FilterImageByLatestUpdate(latestDate: pd.Timestamp) -> Callable: ] -def FilterImageByUniqueName() -> Callable: - """Filter latest images with unique names.""" - print("filter images by unique names") - return _filter_by_unique_names +def FilterImageByUniqueReference() -> Callable: + """Filter latest images with unique references.""" + print("filter images by unique references") + return _filter_by_unique_references -def _filter_by_unique_names(data): - """Return a list of latest images with unique names.""" - # Create a dictionary of image names and latest data entries. - # The dictionary ensures uniqueness of the names and preserves +def _filter_by_unique_references(data): + """Return a list of latest images with unique references.""" + # Create a dictionary of image references and latest data entries. + # The dictionary ensures uniqueness of the references and preserves # insertion order of the data entries. unique_data = {} @@ -50,22 +50,22 @@ def _filter_by_unique_names(data): if entry.content is None: continue - # Compare the data entry with the last inserted entry of - # the same name. If the new entry is older, do nothing. - name = entry.content["name"] + # Compare the data entry with the last inserted entry with + # the same reference. If the new entry is older, do nothing. + ref = entry.filename date = entry.content["date"] - if name in unique_data: - latest_entry = unique_data[name] + if ref in unique_data: + latest_entry = unique_data[ref] latest_date = latest_entry.content["date"] if get_utc_datetime(latest_date) > get_utc_datetime(date): continue - # Add a new latest data entry for this image name. + # Add a new latest data entry for this image reference. # Reinsert the key to preserve the insertion order. - unique_data.pop(name, None) - unique_data[name] = entry + unique_data.pop(ref, None) + unique_data[ref] = entry - # Return a list of latest entries with unique image names. + # Return a list of latest entries with unique image references. return list(unique_data.values()) diff --git a/src/cloudimagedirectory/transformer.py b/src/cloudimagedirectory/transformer.py index 1fae46b2..5ede63ab 100644 --- a/src/cloudimagedirectory/transformer.py +++ b/src/cloudimagedirectory/transformer.py @@ -55,7 +55,7 @@ def run( filters = [ filter.FilterImageByFilename("test"), filter.FilterImageByFilename("beta"), - filter.FilterImageByUniqueName(), + filter.FilterImageByUniqueReference(), ] if filter_until == "default": diff --git a/tests/filter/test_filter.py b/tests/filter/test_filter.py index 76e3937f..ef0b31c3 100644 --- a/tests/filter/test_filter.py +++ b/tests/filter/test_filter.py @@ -9,23 +9,23 @@ def test_filterImageByLatestUpdate(): """Test for filtering the images from a given date.""" data = [ connection.DataEntry( - "aws/region-1/rhel-1", + "aws/region-a/rhel-1", None, ), connection.DataEntry( - "azure/region-1/rhel-1", + "azure/region-a/rhel-1", { "date": "2026-01-01", }, ), connection.DataEntry( - "azure/region-1/rhel-1", + "azure/region-a/rhel-1", { "date": "1920-01-01", }, ), connection.DataEntry( - "google/region-1/rhel-1", + "google/region-a/rhel-1", { "date": "2024-01-01", }, @@ -34,13 +34,13 @@ def test_filterImageByLatestUpdate(): results = filter.FilterImageByLatestUpdate(pd.to_datetime("2023-04-04"))(data) expected = [ connection.DataEntry( - "azure/region-1/rhel-1", + "azure/region-a/rhel-1", { "date": "2026-01-01", }, ), connection.DataEntry( - "google/region-1/rhel-1", + "google/region-a/rhel-1", { "date": "2024-01-01", }, @@ -55,74 +55,109 @@ def test_filterImageByLatestUpdate(): assert expected[1].content == results[1].content -def test_FilterImageByUniqueName(): - """Test for filtering latest images with unique names.""" +def test_FilterImageByUniqueReference(): + """Test for filtering latest images with unique references.""" data = [ connection.DataEntry( - "aws/region-1/rhel-1", + "aws/region-a/rhel-1", None, ), connection.DataEntry( - "azure/region-1/rhel-3", + "azure/region-a/rhel-3", { "name": "rhel-3", "date": "2023-01-01", }, ), connection.DataEntry( - "azure/region-1/rhel-1", + "azure/region-a/rhel-1", { "name": "rhel-1", "date": "2020-01-01", }, ), connection.DataEntry( - "azure/region-1/rhel-2", + "azure/region-a/rhel-2", { "name": "rhel-2", "date": "2022-01-01", }, ), connection.DataEntry( - "azure/region-1/rhel-3", + "azure/region-a/rhel-3", { "name": "rhel-3", "date": "2022-01-01", }, ), connection.DataEntry( - "azure/region-1/rhel-3", + "azure/region-a/rhel-3", { "name": "rhel-3", "date": "2020-01-01", }, ), connection.DataEntry( - "google/region-1/rhel-1", + "google/region-a/rhel-1", + { + "name": "rhel-1", + "date": "2024-01-01", + }, + ), + connection.DataEntry( + "google/region-b/rhel-1", + { + "name": "rhel-1", + "date": "2024-01-01", + }, + ), + connection.DataEntry( + "google/region-c/rhel-1", { "name": "rhel-1", "date": "2024-01-01", }, ), ] - results = filter.FilterImageByUniqueName()(data) + results = filter.FilterImageByUniqueReference()(data) expected = [ connection.DataEntry( - "azure/region-1/rhel-3", + "azure/region-a/rhel-3", { "name": "rhel-3", "date": "2023-01-01", }, ), connection.DataEntry( - "azure/region-1/rhel-2", + "azure/region-a/rhel-1", + { + "name": "rhel-1", + "date": "2020-01-01", + }, + ), + connection.DataEntry( + "azure/region-a/rhel-2", { "name": "rhel-2", "date": "2022-01-01", }, ), connection.DataEntry( - "google/region-1/rhel-1", + "google/region-a/rhel-1", + { + "name": "rhel-1", + "date": "2024-01-01", + }, + ), + connection.DataEntry( + "google/region-b/rhel-1", + { + "name": "rhel-1", + "date": "2024-01-01", + }, + ), + connection.DataEntry( + "google/region-c/rhel-1", { "name": "rhel-1", "date": "2024-01-01", @@ -130,10 +165,5 @@ def test_FilterImageByUniqueName(): ), ] - for r in results: - print(r.content) - - assert len(expected) == len(results) - assert expected[0].content == results[0].content - assert expected[1].content == results[1].content - assert expected[2].content == results[2].content + assert [i.filename for i in expected] == [i.filename for i in results] + assert [i.content for i in expected] == [i.content for i in results]