From 7e84ba84e599509102d720fd8d7e0528647d5326 Mon Sep 17 00:00:00 2001 From: nguyen Date: Wed, 29 Oct 2025 14:45:08 +0100 Subject: [PATCH 1/2] fix: Add batching for visitors data loading for Matomo to avoid too-long-url issue --- sources/matomo/__init__.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/sources/matomo/__init__.py b/sources/matomo/__init__.py index 9598e513c..5b3ab017e 100644 --- a/sources/matomo/__init__.py +++ b/sources/matomo/__init__.py @@ -221,3 +221,41 @@ def get_unique_visitors( ) for method_dict in method_data: yield method_dict + + +@dlt.transformer( + data_from=get_last_visits, + write_disposition="merge", + name="visitors", + primary_key="visitorId", +) +def get_unique_visitors_with_chunk( + visits: List[DictStrAny], + client: MatomoAPIClient, + site_id: int, + chunk_size: int = 20, +) -> Iterator[TDataItem]: + """ + Dlt transformer. Receives information about visits from get_last_visits. + This version allows batch loading for visitors data, which is to avoid too-long-URL issue + + Args: + visits (List[DictStrAny]): List of dicts containing information on last visits in the given timeframe. + client (MatomoAPIClient): Used to make calls to Matomo API. + site_id (int): Every site in Matomo has a unique id. + chunk_size (int): Number of visitor IDs to process in each batch. Defaults to 100. + + Returns: + Iterator[TDataItem]: Dict containing information about the visitor. + """ + + visitor_ids = [visit["visitorId"] for visit in visits] + indexed_visitor_ids = [ + visitor_ids[i : i + chunk_size] for i in range(0, len(visitor_ids), chunk_size) + ] + for visitor_list in indexed_visitor_ids: + method_data = client.get_visitors_batch( + visitor_list=visitor_list, site_id=site_id + ) + for method_dict in method_data: + yield method_dict From 19f6321a0368c1e6ff47a4aae9143e7549b48ed4 Mon Sep 17 00:00:00 2001 From: nguyen Date: Wed, 29 Oct 2025 14:46:25 +0100 Subject: [PATCH 2/2] fix: Change default visitors loading function to the batching one --- sources/matomo/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/matomo/__init__.py b/sources/matomo/__init__.py index 5b3ab017e..ca61d32b6 100644 --- a/sources/matomo/__init__.py +++ b/sources/matomo/__init__.py @@ -138,7 +138,7 @@ def matomo_visits( if get_live_event_visitors: resource_list.append( visits_data_generator - | get_unique_visitors(client=client, site_id=live_events_site_id) + | get_unique_visitors_with_chunk(client=client, site_id=live_events_site_id) ) return resource_list