From 449d4c766074f81b3ad759c80c5cac5ad5e9d6ca Mon Sep 17 00:00:00 2001 From: Antoine Balliet Date: Wed, 25 Jun 2025 12:15:58 +0200 Subject: [PATCH 01/22] chore: pipeline should fail if records increase buffer size (#62) * chore: remove python version condition * chore: pipeline should fail if source batch exceed full buffer size * fix test --- .pre-commit-config.yaml | 3 --- bizon/destination/destination.py | 8 ++++++++ tests/destination/test_destination_logic.py | 13 +++++++------ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 05a55ec..45b6364 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,3 @@ -default_language_version: - python: python3.8 - repos: - repo: https://github.com/psf/black rev: 24.4.2 diff --git a/bizon/destination/destination.py b/bizon/destination/destination.py index 206b145..380dcd0 100644 --- a/bizon/destination/destination.py +++ b/bizon/destination/destination.py @@ -191,6 +191,14 @@ def write_or_buffer_records( logger.info( f"Buffer ripeness {round(self.buffer.ripeness / 60, 2)} min. Max ripeness {round(self.buffer.buffer_flush_timeout / 60, 2)} min." # noqa ) + logger.info( + f"Current records size to process: {round(df_destination_records.estimated_size(unit='b') / 1024 / 1024, 2)} Mb." + ) + + if df_destination_records.estimated_size(unit="b") > self.buffer.buffer_size: + raise ValueError( + f"Records size {round(df_destination_records.estimated_size(unit='b') / 1024 / 1024, 2)} Mb is greater than buffer size {round(self.buffer.buffer_size / 1024 / 1024, 2)} Mb. Please increase destination buffer_size or reduce batch_size from the source." + ) # Write buffer to destination if buffer is ripe and create a new buffer for the new iteration if self.buffer.is_ripe: diff --git a/tests/destination/test_destination_logic.py b/tests/destination/test_destination_logic.py index 59a4544..d3f8d26 100644 --- a/tests/destination/test_destination_logic.py +++ b/tests/destination/test_destination_logic.py @@ -66,7 +66,7 @@ def test_buffer_records(logger_destination: LoggerDestination): assert logger_destination.buffer.df_destination_records.equals(df_destination_records) -def test_write_or_buffer_records(logger_destination: LoggerDestination): +def test_write_or_buffer_records_too_large(logger_destination: LoggerDestination): df_big_size = pl.DataFrame(schema=destination_record_schema) @@ -87,11 +87,12 @@ def test_write_or_buffer_records(logger_destination: LoggerDestination): logger_destination.buffer.buffer_size = df_big_size.estimated_size(unit="b") # Write twice - buffer_status = logger_destination.write_or_buffer_records( - df_destination_records=df_big_size.vstack(df_destination_records), iteration=1 - ) - - assert buffer_status == DestinationBufferStatus.RECORDS_WRITTEN_THEN_BUFFERED + with pytest.raises( + ValueError, match="Please increase destination buffer_size or reduce batch_size from the source" + ): + buffer_status = logger_destination.write_or_buffer_records( + df_destination_records=df_big_size.vstack(df_destination_records), iteration=1 + ) def test_write_last_iteration(logger_destination: LoggerDestination, sqlite_db_session): From 81feb1296872e7d52aaad3e09acdeec90a56c41f Mon Sep 17 00:00:00 2001 From: Antoine Balliet Date: Wed, 25 Jun 2025 12:43:09 +0200 Subject: [PATCH 02/22] chore: remove loguru logger from kafka consumer (#63) --- bizon/connectors/sources/kafka/src/source.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bizon/connectors/sources/kafka/src/source.py b/bizon/connectors/sources/kafka/src/source.py index d6d286a..7fdabcf 100644 --- a/bizon/connectors/sources/kafka/src/source.py +++ b/bizon/connectors/sources/kafka/src/source.py @@ -34,6 +34,7 @@ class SchemaNotFound(Exception): """Schema not found in the Schema Registry""" + pass @@ -76,7 +77,7 @@ def __init__(self, config: KafkaSourceConfig): self.config.consumer_config["bootstrap.servers"] = self.config.bootstrap_servers # Consumer instance - self.consumer = Consumer(self.config.consumer_config, logger=logger) + self.consumer = Consumer(self.config.consumer_config) self.topic_map = {topic.name: topic.destination_id for topic in self.config.topics} From 1f3cccccd2fbe7d511af7e97625cf5e0b4d5de18 Mon Sep 17 00:00:00 2001 From: Anas El Mhamdi Date: Fri, 4 Jul 2025 12:03:59 +0200 Subject: [PATCH 03/22] chore: make streaming_v2 work with clustering and orjson (#64) * chore: updated streaming v2 to make it work * chore: added retry strategy * chore: remove all use_legacy_streaming_api params * chore: remove all use_legacy_streaming_api params --- .../bigquery_streaming/src/config.py | 4 - .../bigquery_streaming_v2/src/destination.py | 180 +++++++++++++++--- .../bigquery_streaming_v2/src/proto_utils.py | 14 +- .../sources/kafka/tests/kafka_pipeline.py | 2 +- .../test_bigquery_streaming_client.py | 1 - 5 files changed, 158 insertions(+), 43 deletions(-) diff --git a/bizon/connectors/destinations/bigquery_streaming/src/config.py b/bizon/connectors/destinations/bigquery_streaming/src/config.py index c3ef55d..d05d25e 100644 --- a/bizon/connectors/destinations/bigquery_streaming/src/config.py +++ b/bizon/connectors/destinations/bigquery_streaming/src/config.py @@ -45,10 +45,6 @@ class BigQueryStreamingConfigDetails(AbstractDestinationDetailsConfig): record_schemas: Optional[list[BigQueryRecordSchemaConfig]] = Field( default=None, description="Schema for the records. Required if unnest is set to true." ) - use_legacy_streaming_api: bool = Field( - default=False, - description="[DEPRECATED] Use the legacy streaming API. This is required for some older BigQuery versions.", - ) class BigQueryStreamingConfig(AbstractDestinationConfig): diff --git a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py index b3023bd..eecc9f0 100644 --- a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +++ b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py @@ -1,11 +1,19 @@ import os import tempfile -from concurrent.futures import ThreadPoolExecutor +from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime from typing import List, Tuple, Type +import orjson import polars as pl -from google.api_core.exceptions import NotFound +import urllib3.exceptions +from google.api_core.exceptions import ( + Conflict, + NotFound, + RetryError, + ServerError, + ServiceUnavailable, +) from google.cloud import bigquery, bigquery_storage_v1 from google.cloud.bigquery import DatasetReference, TimePartitioning from google.cloud.bigquery_storage_v1.types import ( @@ -16,6 +24,13 @@ from google.protobuf.json_format import ParseDict from google.protobuf.message import Message from loguru import logger +from requests.exceptions import ConnectionError, SSLError, Timeout +from tenacity import ( + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential, +) from bizon.common.models import SyncMetadata from bizon.destination.destination import AbstractDestination @@ -102,6 +117,26 @@ def check_connection(self) -> bool: dataset = self.bq_client.create_dataset(dataset) return True + @retry( + retry=retry_if_exception_type( + ( + ServerError, + ServiceUnavailable, + SSLError, + ConnectionError, + Timeout, + RetryError, + urllib3.exceptions.ProtocolError, + urllib3.exceptions.SSLError, + ) + ), + wait=wait_exponential(multiplier=2, min=4, max=120), + stop=stop_after_attempt(8), + before_sleep=lambda retry_state: logger.warning( + f"Streaming append attempt {retry_state.attempt_number} failed. " + f"Retrying in {retry_state.next_action.sleep} seconds..." + ), + ) def append_rows_to_stream( self, write_client: bigquery_storage_v1.BigQueryWriteClient, @@ -119,8 +154,68 @@ def append_rows_to_stream( response = write_client.append_rows(iter([request])) return response.code().name + @retry( + retry=retry_if_exception_type( + ( + ServerError, + ServiceUnavailable, + SSLError, + ConnectionError, + Timeout, + RetryError, + urllib3.exceptions.ProtocolError, + urllib3.exceptions.SSLError, + ) + ), + wait=wait_exponential(multiplier=2, min=4, max=120), + stop=stop_after_attempt(8), + before_sleep=lambda retry_state: logger.warning( + f"Attempt {retry_state.attempt_number} failed. Retrying in {retry_state.next_action.sleep} seconds..." + ), + ) + def process_streaming_batch( + self, + write_client: bigquery_storage_v1.BigQueryWriteClient, + stream_name: str, + proto_schema: ProtoSchema, + batch: dict, + ) -> Tuple[str, str]: + """Process a single batch for streaming or large rows with retry logic.""" + try: + if batch.get("stream_batch") and len(batch["stream_batch"]) > 0: + result = self.append_rows_to_stream(write_client, stream_name, proto_schema, batch["stream_batch"]) + return "streaming", result + elif batch.get("json_batch") and len(batch["json_batch"]) > 0: + # For large rows, we need to use the main client + job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, + schema=self.bq_client.get_table(self.table_id).schema, + ignore_unknown_values=True, + ) + load_job = self.bq_client.load_table_from_json( + batch["json_batch"], self.table_id, job_config=job_config, timeout=300 + ) + result = load_job.result() + if load_job.state != "DONE": + raise Exception(f"Failed to load rows to BigQuery: {load_job.errors}") + return "large_rows", "DONE" + return "empty", "SKIPPED" + except Exception as e: + logger.error(f"Error processing batch: {str(e)}") + raise + def safe_cast_record_values(self, row: dict): + """ + Safe cast record values to the correct type for BigQuery. + """ for col in self.record_schemas[self.destination_id]: + + # Handle dicts as strings + if col.type in ["STRING", "JSON"]: + if isinstance(row[col.name], dict) or isinstance(row[col.name], list): + row[col.name] = orjson.dumps(row[col.name]).decode("utf-8") + + # Handle timestamps if col.type in ["TIMESTAMP", "DATETIME"] and col.default_value_expression is None: if isinstance(row[col.name], int): if row[col.name] > datetime(9999, 12, 31).timestamp(): @@ -148,10 +243,7 @@ def to_protobuf_serialization(TableRowClass: Type[Message], row: dict) -> bytes: def load_to_bigquery_via_streaming(self, df_destination_records: pl.DataFrame) -> str: - # TODO: for now no clustering keys - clustering_keys = [] - - # Create table if it doesnt exist + # Create table if it does not exist schema = self.get_bigquery_schema() table = bigquery.Table(self.table_id, schema=schema) time_partitioning = TimePartitioning( @@ -159,12 +251,24 @@ def load_to_bigquery_via_streaming(self, df_destination_records: pl.DataFrame) - ) table.time_partitioning = time_partitioning - # Override bigquery client with project's destination id - if self.destination_id: - project, dataset, table_name = self.destination_id.split(".") - self.bq_client = bigquery.Client(project=project) - - table = self.bq_client.create_table(table, exists_ok=True) + if self.clustering_keys and self.clustering_keys[self.destination_id]: + table.clustering_fields = self.clustering_keys[self.destination_id] + try: + table = self.bq_client.create_table(table) + except Conflict: + table = self.bq_client.get_table(self.table_id) + # Compare and update schema if needed + existing_fields = {field.name: field for field in table.schema} + new_fields = {field.name: field for field in self.get_bigquery_schema()} + + # Find fields that need to be added + fields_to_add = [field for name, field in new_fields.items() if name not in existing_fields] + + if fields_to_add: + logger.warning(f"Adding new fields to table schema: {[field.name for field in fields_to_add]}") + updated_schema = table.schema + fields_to_add + table.schema = updated_schema + table = self.bq_client.update_table(table, ["schema"]) # Create the stream if self.destination_id: @@ -178,12 +282,14 @@ def load_to_bigquery_via_streaming(self, df_destination_records: pl.DataFrame) - stream_name = f"{parent}/_default" # Generating the protocol buffer representation of the message descriptor. - proto_schema, TableRow = get_proto_schema_and_class(schema, clustering_keys) + proto_schema, TableRow = get_proto_schema_and_class(schema) if self.config.unnest: serialized_rows = [ - self.to_protobuf_serialization(TableRowClass=TableRow, row=self.safe_cast_record_values(row)) - for row in df_destination_records["source_data"].str.json_decode(infer_schema_length=None).to_list() + self.to_protobuf_serialization( + TableRowClass=TableRow, row=self.safe_cast_record_values(orjson.loads(row)) + ) + for row in df_destination_records["source_data"].to_list() ] else: df_destination_records = df_destination_records.with_columns( @@ -207,16 +313,42 @@ def load_to_bigquery_via_streaming(self, df_destination_records: pl.DataFrame) - for row in df_destination_records.iter_rows(named=True) ] - results = [] - with ThreadPoolExecutor() as executor: - futures = [ - executor.submit(self.append_rows_to_stream, write_client, stream_name, proto_schema, batch_rows) - for batch_rows in self.batch(serialized_rows) - ] - for future in futures: - results.append(future.result()) + streaming_results = [] + large_rows_results = [] + + # Collect all batches first + batches = list(self.batch(serialized_rows)) + + # Use ThreadPoolExecutor for parallel processing + max_workers = min(len(batches), 10) # Limit to 10 concurrent threads + logger.info(f"Processing {len(batches)} batches with {max_workers} concurrent threads") + + try: + with ThreadPoolExecutor(max_workers=max_workers) as executor: + # Submit all batch processing tasks + future_to_batch = { + executor.submit(self.process_streaming_batch, write_client, stream_name, proto_schema, batch): batch + for batch in batches + } - assert all([r == "OK" for r in results]) is True, "Failed to append rows to stream" + # Collect results as they complete + for future in as_completed(future_to_batch): + batch_type, result = future.result() + if batch_type == "streaming": + streaming_results.append(result) + if batch_type == "large_rows": + large_rows_results.append(result) + + except Exception as e: + logger.error(f"Error in multithreaded batch processing: {str(e)}, type: {type(e)}") + if isinstance(e, RetryError): + logger.error(f"Retry error details: {e.cause if hasattr(e, 'cause') else 'No cause available'}") + raise + + if len(streaming_results) > 0: + assert all([r == "OK" for r in streaming_results]) is True, "Failed to append rows to stream" + if len(large_rows_results) > 0: + assert all([r == "DONE" for r in large_rows_results]) is True, "Failed to load rows to BigQuery" def write_records(self, df_destination_records: pl.DataFrame) -> Tuple[bool, str]: self.load_to_bigquery_via_streaming(df_destination_records=df_destination_records) diff --git a/bizon/connectors/destinations/bigquery_streaming_v2/src/proto_utils.py b/bizon/connectors/destinations/bigquery_streaming_v2/src/proto_utils.py index 3810927..27c89d9 100644 --- a/bizon/connectors/destinations/bigquery_streaming_v2/src/proto_utils.py +++ b/bizon/connectors/destinations/bigquery_streaming_v2/src/proto_utils.py @@ -32,9 +32,7 @@ def map_bq_type_to_field_descriptor(bq_type: str) -> int: return type_map.get(bq_type, FieldDescriptorProto.TYPE_STRING) # Default to TYPE_STRING -def get_proto_schema_and_class( - bq_schema: List[SchemaField], clustering_keys: List[str] = None -) -> Tuple[ProtoSchema, Type[Message]]: +def get_proto_schema_and_class(bq_schema: List[SchemaField]) -> Tuple[ProtoSchema, Type[Message]]: """Generate a ProtoSchema and a TableRow class for unnested BigQuery schema.""" # Define the FileDescriptorProto file_descriptor_proto = FileDescriptorProto() @@ -60,16 +58,6 @@ def get_proto_schema_and_class( for col in bq_schema ] - if clustering_keys: - for key in clustering_keys: - fields.append( - { - "name": key, - "type": FieldDescriptorProto.TYPE_STRING, - "label": FieldDescriptorProto.LABEL_OPTIONAL, - } - ) - for i, field in enumerate(fields, start=1): field_descriptor = message_descriptor.field.add() field_descriptor.name = field["name"] diff --git a/bizon/connectors/sources/kafka/tests/kafka_pipeline.py b/bizon/connectors/sources/kafka/tests/kafka_pipeline.py index 8d7b0ef..ed72f88 100644 --- a/bizon/connectors/sources/kafka/tests/kafka_pipeline.py +++ b/bizon/connectors/sources/kafka/tests/kafka_pipeline.py @@ -3,5 +3,5 @@ from bizon.engine.engine import RunnerFactory if __name__ == "__main__": - runner = RunnerFactory.create_from_yaml(filepath=os.path.abspath("test-pipeline-streaming.yml")) + runner = RunnerFactory.create_from_yaml(filepath=os.path.abspath("test-pipeline-streaming-v2.yml")) runner.run() diff --git a/tests/connectors/destinations/bigquery_streaming/test_bigquery_streaming_client.py b/tests/connectors/destinations/bigquery_streaming/test_bigquery_streaming_client.py index 7babb18..a210447 100644 --- a/tests/connectors/destinations/bigquery_streaming/test_bigquery_streaming_client.py +++ b/tests/connectors/destinations/bigquery_streaming/test_bigquery_streaming_client.py @@ -607,7 +607,6 @@ def test_streaming_unnested_records_legacy_clustering_keys(my_backend_config, sy project_id=TEST_PROJECT_ID, dataset_id=TEST_DATASET_ID, unnest=True, - use_legacy_streaming_api=True, time_partitioning={"type": "DAY", "field": "created_at"}, record_schemas=[ { From 5547544d28e83bcb9d157f4bbb6f46572afda91d Mon Sep 17 00:00:00 2001 From: Anas El Mhamdi Date: Fri, 4 Jul 2025 12:58:48 +0200 Subject: [PATCH 04/22] feat: add custom dsm tracking (#65) * chore: add custom dsm tracking * chore: add headers for metadata propagation * chore: fix when datadog isnt used * chore: add alias, refactor dsm monitoring call * chore: fix tests --------- Co-authored-by: Antoine Balliet --- .../destinations/bigquery/src/config.py | 1 + .../bigquery_streaming/src/config.py | 1 + .../bigquery_streaming_v2/src/config.py | 1 + .../destinations/file/src/config.py | 1 + .../destinations/logger/src/config.py | 1 + bizon/destination/config.py | 4 + bizon/engine/runner/adapters/streaming.py | 5 + bizon/monitoring/datadog/monitor.py | 28 +- bizon/monitoring/monitor.py | 11 +- poetry.lock | 453 ++++++++++++++++-- pyproject.toml | 3 +- tests/destination/test_config.py | 3 + 12 files changed, 472 insertions(+), 40 deletions(-) diff --git a/bizon/connectors/destinations/bigquery/src/config.py b/bizon/connectors/destinations/bigquery/src/config.py index 442c9c3..ec4844d 100644 --- a/bizon/connectors/destinations/bigquery/src/config.py +++ b/bizon/connectors/destinations/bigquery/src/config.py @@ -123,5 +123,6 @@ class BigQueryConfigDetails(AbstractDestinationDetailsConfig): class BigQueryConfig(AbstractDestinationConfig): name: Literal[DestinationTypes.BIGQUERY] + alias: str = "bigquery" buffer_size: Optional[int] = 400 config: BigQueryConfigDetails diff --git a/bizon/connectors/destinations/bigquery_streaming/src/config.py b/bizon/connectors/destinations/bigquery_streaming/src/config.py index d05d25e..dcec92e 100644 --- a/bizon/connectors/destinations/bigquery_streaming/src/config.py +++ b/bizon/connectors/destinations/bigquery_streaming/src/config.py @@ -49,4 +49,5 @@ class BigQueryStreamingConfigDetails(AbstractDestinationDetailsConfig): class BigQueryStreamingConfig(AbstractDestinationConfig): name: Literal[DestinationTypes.BIGQUERY_STREAMING] + alias: str = "bigquery" config: BigQueryStreamingConfigDetails diff --git a/bizon/connectors/destinations/bigquery_streaming_v2/src/config.py b/bizon/connectors/destinations/bigquery_streaming_v2/src/config.py index c4df81c..18e3e1c 100644 --- a/bizon/connectors/destinations/bigquery_streaming_v2/src/config.py +++ b/bizon/connectors/destinations/bigquery_streaming_v2/src/config.py @@ -49,4 +49,5 @@ class BigQueryStreamingV2ConfigDetails(AbstractDestinationDetailsConfig): class BigQueryStreamingV2Config(AbstractDestinationConfig): name: Literal[DestinationTypes.BIGQUERY_STREAMING_V2] + alias: str = "bigquery" config: BigQueryStreamingV2ConfigDetails diff --git a/bizon/connectors/destinations/file/src/config.py b/bizon/connectors/destinations/file/src/config.py index 66e7e37..74fca81 100644 --- a/bizon/connectors/destinations/file/src/config.py +++ b/bizon/connectors/destinations/file/src/config.py @@ -20,4 +20,5 @@ class FileDestinationDetailsConfig(AbstractDestinationDetailsConfig): class FileDestinationConfig(AbstractDestinationConfig): name: Literal[DestinationTypes.FILE] + alias: str = "file" config: FileDestinationDetailsConfig diff --git a/bizon/connectors/destinations/logger/src/config.py b/bizon/connectors/destinations/logger/src/config.py index 5f7fca6..988ce67 100644 --- a/bizon/connectors/destinations/logger/src/config.py +++ b/bizon/connectors/destinations/logger/src/config.py @@ -15,4 +15,5 @@ class LoggerDestinationConfig(AbstractDestinationDetailsConfig): class LoggerConfig(AbstractDestinationConfig): name: Literal[DestinationTypes.LOGGER] + alias: str = "logger" config: LoggerDestinationConfig diff --git a/bizon/destination/config.py b/bizon/destination/config.py index f3fd003..7fff5ac 100644 --- a/bizon/destination/config.py +++ b/bizon/destination/config.py @@ -71,4 +71,8 @@ class AbstractDestinationConfig(BaseModel): model_config = ConfigDict(extra="forbid") name: DestinationTypes = Field(..., description="Name of the destination") + alias: str = Field( + ..., + description="Alias of the destination, used for tracking the system name (ie bigquery for bigquery_streaming)", + ) config: AbstractDestinationDetailsConfig = Field(..., description="Configuration for the destination") diff --git a/bizon/engine/runner/adapters/streaming.py b/bizon/engine/runner/adapters/streaming.py index 73bff9c..7e03e3a 100644 --- a/bizon/engine/runner/adapters/streaming.py +++ b/bizon/engine/runner/adapters/streaming.py @@ -77,6 +77,8 @@ def run(self) -> RunnerStatus: for destination_id, records in destination_id_indexed_records.items(): df_source_records = StreamingRunner.convert_source_records(records) + dsm_headers = monitor.track_source_iteration(record=records[0]) + # Apply transformation df_source_records = transform.apply_transforms(df_source_records=df_source_records) @@ -92,8 +94,11 @@ def run(self) -> RunnerStatus: ) monitor.track_records_synced( num_records=len(df_destination_records), + destination_id=destination_id, extra_tags={"destination_id": destination_id}, + headers=dsm_headers, ) + if os.getenv("ENVIRONMENT") == "production": source.commit() diff --git a/bizon/monitoring/datadog/monitor.py b/bizon/monitoring/datadog/monitor.py index 2365080..f867a43 100644 --- a/bizon/monitoring/datadog/monitor.py +++ b/bizon/monitoring/datadog/monitor.py @@ -1,5 +1,5 @@ import os -from typing import Dict +from typing import Dict, Union from datadog import initialize, statsd from loguru import logger @@ -7,6 +7,7 @@ from bizon.common.models import BizonConfig from bizon.engine.pipeline.models import PipelineReturnStatus from bizon.monitoring.monitor import AbstractMonitor +from bizon.source.models import SourceRecord class DatadogMonitor(AbstractMonitor): @@ -55,7 +56,9 @@ def track_pipeline_status(self, pipeline_status: PipelineReturnStatus, extra_tag + [f"{key}:{value}" for key, value in extra_tags.items()], ) - def track_records_synced(self, num_records: int, extra_tags: Dict[str, str] = {}) -> None: + def track_records_synced( + self, num_records: int, destination_id: str, extra_tags: Dict[str, str] = {}, headers: Dict[str, str] = {} + ) -> None: """ Track the number of records synced in the pipeline. @@ -67,3 +70,24 @@ def track_records_synced(self, num_records: int, extra_tags: Dict[str, str] = {} value=num_records, tags=self.tags + [f"{key}:{value}" for key, value in extra_tags.items()], ) + if os.getenv("DD_DATA_STREAMS_ENABLED") == "true": + from ddtrace.data_streams import set_produce_checkpoint + + destination_type = self.pipeline_config.destination.alias + + set_produce_checkpoint(destination_type, destination_id, headers.setdefault) + + def track_source_iteration(self, record: SourceRecord) -> Union[Dict[str, str], None]: + """ + Track the number of records consumed from a Kafka topic. + + Args: + kafka_topic (str): The Kafka topic name + """ + + if os.getenv("DD_DATA_STREAMS_ENABLED") == "true": + from ddtrace.data_streams import set_consume_checkpoint + + headers = {} + set_consume_checkpoint("kafka", record.data["topic"], headers.get) + return headers diff --git a/bizon/monitoring/monitor.py b/bizon/monitoring/monitor.py index c0573ef..1104fc7 100644 --- a/bizon/monitoring/monitor.py +++ b/bizon/monitoring/monitor.py @@ -4,6 +4,7 @@ from bizon.common.models import BizonConfig from bizon.engine.pipeline.models import PipelineReturnStatus from bizon.monitoring.config import MonitorType +from bizon.source.models import SourceRecord class AbstractMonitor(ABC): @@ -21,7 +22,15 @@ def track_pipeline_status(self, pipeline_status: PipelineReturnStatus, extra_tag """ pass - def track_records_synced(self, num_records: int, extra_tags: Dict[str, str] = {}) -> None: + def track_source_iteration(self, record: SourceRecord, headers: Dict[str, str] = {}) -> None: + """ + Run a process that tracks the source iteration. + """ + pass + + def track_records_synced( + self, num_records: int, destination_id: str, extra_tags: Dict[str, str] = {}, headers: Dict[str, str] = {} + ) -> None: """ Track the number of records synced in the pipeline. """ diff --git a/poetry.lock b/poetry.lock index 7fc2528..1c2f35c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "annotated-types" @@ -6,6 +6,7 @@ version = "0.7.0" description = "Reusable constraint types to use with typing.Annotated" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, @@ -17,6 +18,8 @@ version = "0.1.4" description = "Disable App Nap on macOS >= 10.9" optional = false python-versions = ">=3.6" +groups = ["dev"] +markers = "platform_system == \"Darwin\"" files = [ {file = "appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c"}, {file = "appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee"}, @@ -28,6 +31,7 @@ version = "3.0.0" description = "Annotate AST trees with source code positions" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2"}, {file = "asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7"}, @@ -43,6 +47,8 @@ version = "1.12.0" description = "Avro is a serialization and RPC framework." optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"kafka\"" files = [ {file = "avro-1.12.0-py2.py3-none-any.whl", hash = "sha256:9a255c72e1837341dd4f6ff57b2b6f68c0f0cecdef62dd04962e10fd33bec05b"}, {file = "avro-1.12.0.tar.gz", hash = "sha256:cad9c53b23ceed699c7af6bddced42e2c572fd6b408c257a7d4fc4e8cf2e2d6b"}, @@ -58,17 +64,35 @@ version = "2.2.1" description = "Function decoration for backoff and retry" optional = false python-versions = ">=3.7,<4.0" +groups = ["main"] files = [ {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, ] +[[package]] +name = "bytecode" +version = "0.16.2" +description = "Python module to generate and modify bytecode" +optional = true +python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"datadog\"" +files = [ + {file = "bytecode-0.16.2-py3-none-any.whl", hash = "sha256:0a7dea0387ec5cae5ec77578690c5ca7470c8a202c50ce64a426d86380cddd7f"}, + {file = "bytecode-0.16.2.tar.gz", hash = "sha256:f05020b6dc1f48cdadd946f7c3a03131ba0f312bd103767c5d75559de5c308f8"}, +] + +[package.dependencies] +typing_extensions = {version = "*", markers = "python_version < \"3.10\""} + [[package]] name = "cachetools" version = "5.5.1" description = "Extensible memoizing collections and decorators" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "cachetools-5.5.1-py3-none-any.whl", hash = "sha256:b76651fdc3b24ead3c648bbdeeb940c1b04d365b38b4af66788f9ec4a81d42bb"}, {file = "cachetools-5.5.1.tar.gz", hash = "sha256:70f238fbba50383ef62e55c6aff6d9673175fe59f7c6782c7a0b9e38f4a9df95"}, @@ -80,6 +104,7 @@ version = "2024.12.14" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" +groups = ["main", "test"] files = [ {file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"}, {file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"}, @@ -91,6 +116,8 @@ version = "1.17.1" description = "Foreign Function Interface for Python calling C code." optional = false python-versions = ">=3.8" +groups = ["dev"] +markers = "implementation_name == \"pypy\"" files = [ {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"}, @@ -170,6 +197,7 @@ version = "3.4.0" description = "Validate configuration and produce human readable error messages." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, @@ -181,6 +209,7 @@ version = "3.4.1" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7" +groups = ["main", "test"] files = [ {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, @@ -282,6 +311,7 @@ version = "8.1.8" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, @@ -296,10 +326,12 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main", "dev", "test"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\"", dev = "sys_platform == \"win32\"", test = "sys_platform == \"win32\""} [[package]] name = "comm" @@ -307,6 +339,7 @@ version = "0.2.2" description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3"}, {file = "comm-0.2.2.tar.gz", hash = "sha256:3fd7a84065306e07bea1773df6eb8282de51ba82f77c72f9c85716ab11fe980e"}, @@ -324,6 +357,8 @@ version = "2.8.0" description = "Confluent's Python client for Apache Kafka" optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"kafka\"" files = [ {file = "confluent_kafka-2.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1ecac5681dbef34f30271c0a0a0a23b7221ae9c560e78415dbf4555e7cd7913e"}, {file = "confluent_kafka-2.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b6526119b466d63acb1c9f04b9f31077089c831c3087b5b2026cf856d6587d07"}, @@ -363,17 +398,17 @@ files = [ ] [package.extras] -all = ["attrs", "attrs", "avro (>=1.11.1,<2)", "avro (>=1.11.1,<2)", "azure-identity", "azure-identity", "azure-keyvault-keys", "azure-keyvault-keys", "boto3", "boto3", "cachetools", "cachetools", "cel-python (>=0.1.5)", "cel-python (>=0.1.5)", "confluent-kafka", "fastapi", "fastavro (<1.8.0)", "fastavro (<1.8.0)", "fastavro (<2)", "fastavro (<2)", "flake8", "google-api-core", "google-api-core", "google-auth", "google-auth", "google-cloud-kms", "google-cloud-kms", "googleapis-common-protos", "googleapis-common-protos", "hkdf (==0.0.3)", "hkdf (==0.0.3)", "httpx", "httpx", "hvac", "hvac", "jsonata-python", "jsonata-python", "jsonschema", "jsonschema", "opentelemetry-distro", "opentelemetry-exporter-otlp", "protobuf", "protobuf", "psutil", "pydantic", "pyrsistent", "pyrsistent", "pytest", "pytest-timeout", "pytest_cov", "pyyaml (>=6.0.0)", "pyyaml (>=6.0.0)", "requests", "requests", "requests-mock", "respx", "six", "sphinx", "sphinx-rtd-theme", "tink", "tink", "urllib3 (<2.0.0)", "urllib3 (>=2.0.0,<3)", "uvicorn"] -avro = ["attrs", "avro (>=1.11.1,<2)", "cachetools", "fastavro (<1.8.0)", "fastavro (<2)", "httpx", "requests"] -dev = ["attrs", "attrs", "avro (>=1.11.1,<2)", "avro (>=1.11.1,<2)", "azure-identity", "azure-identity", "azure-keyvault-keys", "azure-keyvault-keys", "boto3", "boto3", "cachetools", "cachetools", "cel-python (>=0.1.5)", "cel-python (>=0.1.5)", "confluent-kafka", "fastapi", "fastavro (<1.8.0)", "fastavro (<1.8.0)", "fastavro (<2)", "fastavro (<2)", "flake8", "google-api-core", "google-api-core", "google-auth", "google-auth", "google-cloud-kms", "google-cloud-kms", "googleapis-common-protos", "googleapis-common-protos", "hkdf (==0.0.3)", "hkdf (==0.0.3)", "httpx", "httpx", "hvac", "hvac", "jsonata-python", "jsonata-python", "jsonschema", "jsonschema", "protobuf", "protobuf", "pydantic", "pyrsistent", "pyrsistent", "pytest", "pytest-timeout", "pytest_cov", "pyyaml (>=6.0.0)", "pyyaml (>=6.0.0)", "requests", "requests", "requests-mock", "respx", "six", "sphinx", "sphinx-rtd-theme", "tink", "tink", "urllib3 (<2.0.0)", "urllib3 (>=2.0.0,<3)", "uvicorn"] -docs = ["attrs", "avro (>=1.11.1,<2)", "azure-identity", "azure-keyvault-keys", "boto3", "cachetools", "cel-python (>=0.1.5)", "fastavro (<1.8.0)", "fastavro (<2)", "google-api-core", "google-auth", "google-cloud-kms", "googleapis-common-protos", "hkdf (==0.0.3)", "httpx", "hvac", "jsonata-python", "jsonschema", "protobuf", "pyrsistent", "pyyaml (>=6.0.0)", "requests", "sphinx", "sphinx-rtd-theme", "tink"] -examples = ["attrs", "avro (>=1.11.1,<2)", "azure-identity", "azure-keyvault-keys", "boto3", "cachetools", "cel-python (>=0.1.5)", "confluent-kafka", "fastapi", "fastavro (<1.8.0)", "fastavro (<2)", "google-api-core", "google-auth", "google-cloud-kms", "googleapis-common-protos", "hkdf (==0.0.3)", "httpx", "hvac", "jsonata-python", "jsonschema", "protobuf", "pydantic", "pyrsistent", "pyyaml (>=6.0.0)", "requests", "six", "tink", "uvicorn"] +all = ["attrs", "attrs", "avro (>=1.11.1,<2)", "avro (>=1.11.1,<2)", "azure-identity", "azure-identity", "azure-keyvault-keys", "azure-keyvault-keys", "boto3", "boto3", "cachetools", "cachetools", "cel-python (>=0.1.5)", "cel-python (>=0.1.5)", "confluent-kafka", "fastapi", "fastavro (<1.8.0) ; python_version == \"3.7\"", "fastavro (<1.8.0) ; python_version == \"3.7\"", "fastavro (<2) ; python_version > \"3.7\"", "fastavro (<2) ; python_version > \"3.7\"", "flake8", "google-api-core", "google-api-core", "google-auth", "google-auth", "google-cloud-kms", "google-cloud-kms", "googleapis-common-protos", "googleapis-common-protos", "hkdf (==0.0.3)", "hkdf (==0.0.3)", "httpx", "httpx", "hvac", "hvac", "jsonata-python", "jsonata-python", "jsonschema", "jsonschema", "opentelemetry-distro", "opentelemetry-exporter-otlp", "protobuf", "protobuf", "psutil", "pydantic", "pyrsistent", "pyrsistent", "pytest", "pytest-timeout", "pytest_cov", "pyyaml (>=6.0.0)", "pyyaml (>=6.0.0)", "requests", "requests", "requests-mock", "respx", "six", "sphinx", "sphinx-rtd-theme", "tink", "tink", "urllib3 (<2.0.0) ; python_version <= \"3.7\"", "urllib3 (>=2.0.0,<3) ; python_version > \"3.7\"", "uvicorn"] +avro = ["attrs", "avro (>=1.11.1,<2)", "cachetools", "fastavro (<1.8.0) ; python_version == \"3.7\"", "fastavro (<2) ; python_version > \"3.7\"", "httpx", "requests"] +dev = ["attrs", "attrs", "avro (>=1.11.1,<2)", "avro (>=1.11.1,<2)", "azure-identity", "azure-identity", "azure-keyvault-keys", "azure-keyvault-keys", "boto3", "boto3", "cachetools", "cachetools", "cel-python (>=0.1.5)", "cel-python (>=0.1.5)", "confluent-kafka", "fastapi", "fastavro (<1.8.0) ; python_version == \"3.7\"", "fastavro (<1.8.0) ; python_version == \"3.7\"", "fastavro (<2) ; python_version > \"3.7\"", "fastavro (<2) ; python_version > \"3.7\"", "flake8", "google-api-core", "google-api-core", "google-auth", "google-auth", "google-cloud-kms", "google-cloud-kms", "googleapis-common-protos", "googleapis-common-protos", "hkdf (==0.0.3)", "hkdf (==0.0.3)", "httpx", "httpx", "hvac", "hvac", "jsonata-python", "jsonata-python", "jsonschema", "jsonschema", "protobuf", "protobuf", "pydantic", "pyrsistent", "pyrsistent", "pytest", "pytest-timeout", "pytest_cov", "pyyaml (>=6.0.0)", "pyyaml (>=6.0.0)", "requests", "requests", "requests-mock", "respx", "six", "sphinx", "sphinx-rtd-theme", "tink", "tink", "urllib3 (<2.0.0) ; python_version <= \"3.7\"", "urllib3 (>=2.0.0,<3) ; python_version > \"3.7\"", "uvicorn"] +docs = ["attrs", "avro (>=1.11.1,<2)", "azure-identity", "azure-keyvault-keys", "boto3", "cachetools", "cel-python (>=0.1.5)", "fastavro (<1.8.0) ; python_version == \"3.7\"", "fastavro (<2) ; python_version > \"3.7\"", "google-api-core", "google-auth", "google-cloud-kms", "googleapis-common-protos", "hkdf (==0.0.3)", "httpx", "hvac", "jsonata-python", "jsonschema", "protobuf", "pyrsistent", "pyyaml (>=6.0.0)", "requests", "sphinx", "sphinx-rtd-theme", "tink"] +examples = ["attrs", "avro (>=1.11.1,<2)", "azure-identity", "azure-keyvault-keys", "boto3", "cachetools", "cel-python (>=0.1.5)", "confluent-kafka", "fastapi", "fastavro (<1.8.0) ; python_version == \"3.7\"", "fastavro (<2) ; python_version > \"3.7\"", "google-api-core", "google-auth", "google-cloud-kms", "googleapis-common-protos", "hkdf (==0.0.3)", "httpx", "hvac", "jsonata-python", "jsonschema", "protobuf", "pydantic", "pyrsistent", "pyyaml (>=6.0.0)", "requests", "six", "tink", "uvicorn"] json = ["attrs", "cachetools", "httpx", "jsonschema", "pyrsistent"] protobuf = ["attrs", "cachetools", "googleapis-common-protos", "httpx", "protobuf"] rules = ["attrs", "azure-identity", "azure-keyvault-keys", "boto3", "cachetools", "cel-python (>=0.1.5)", "google-api-core", "google-auth", "google-cloud-kms", "hkdf (==0.0.3)", "httpx", "hvac", "jsonata-python", "pyyaml (>=6.0.0)", "tink"] schemaregistry = ["attrs", "cachetools", "httpx"] soaktest = ["opentelemetry-distro", "opentelemetry-exporter-otlp", "psutil"] -tests = ["attrs", "avro (>=1.11.1,<2)", "azure-identity", "azure-keyvault-keys", "boto3", "cachetools", "cel-python (>=0.1.5)", "fastavro (<1.8.0)", "fastavro (<2)", "flake8", "google-api-core", "google-auth", "google-cloud-kms", "googleapis-common-protos", "hkdf (==0.0.3)", "httpx", "hvac", "jsonata-python", "jsonschema", "protobuf", "pyrsistent", "pytest", "pytest-timeout", "pytest_cov", "pyyaml (>=6.0.0)", "requests", "requests-mock", "respx", "tink", "urllib3 (<2.0.0)", "urllib3 (>=2.0.0,<3)"] +tests = ["attrs", "avro (>=1.11.1,<2)", "azure-identity", "azure-keyvault-keys", "boto3", "cachetools", "cel-python (>=0.1.5)", "fastavro (<1.8.0) ; python_version == \"3.7\"", "fastavro (<2) ; python_version > \"3.7\"", "flake8", "google-api-core", "google-auth", "google-cloud-kms", "googleapis-common-protos", "hkdf (==0.0.3)", "httpx", "hvac", "jsonata-python", "jsonschema", "protobuf", "pyrsistent", "pytest", "pytest-timeout", "pytest_cov", "pyyaml (>=6.0.0)", "requests", "requests-mock", "respx", "tink", "urllib3 (<2.0.0) ; python_version <= \"3.7\"", "urllib3 (>=2.0.0,<3) ; python_version > \"3.7\""] [[package]] name = "datadog" @@ -381,6 +416,8 @@ version = "0.50.2" description = "The Datadog Python library" optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main"] +markers = "extra == \"datadog\"" files = [ {file = "datadog-0.50.2-py2.py3-none-any.whl", hash = "sha256:f3297858564b624efbd9ce43e4ea1c2c21e1f0477ab6d446060b536a1d9e431e"}, {file = "datadog-0.50.2.tar.gz", hash = "sha256:17725774bf2bb0a48f1d096d92707492c187f24ae08960af0b0c2fa97958fd51"}, @@ -389,12 +426,102 @@ files = [ [package.dependencies] requests = ">=2.6.0" +[[package]] +name = "ddtrace" +version = "3.10.0" +description = "Datadog APM client library" +optional = true +python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"datadog\"" +files = [ + {file = "ddtrace-3.10.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:dc91b0501dc0bb13d3c75cf99febf97899a482010381b0aeced8e3edb0fba267"}, + {file = "ddtrace-3.10.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:4b15f99875163d174dc967ce93a391d721df37ebeaebb0eec99c9cae6664efe9"}, + {file = "ddtrace-3.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92a13c1eaadfba45f8062a7998d9634812b8995c75c471f273de0c85a126cd81"}, + {file = "ddtrace-3.10.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dad4f966c5956e1305b71bcbc87cca3930a6e959db14e695e8bbd9f0cfb90d05"}, + {file = "ddtrace-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4163a4ad7b573e984c3a847971846ddc4489c172dd6d9dcc5930180dc3324b1c"}, + {file = "ddtrace-3.10.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e40899e317c1537272e8d3f4cf1f2c351b7ebc8e61b746992e1cc1c0bc6d1dec"}, + {file = "ddtrace-3.10.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f9a7ddfde70ae1dbebf97ae441b462d6f99facf638713e8dacea8fd7fa853d80"}, + {file = "ddtrace-3.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7173d28221dfbede17e49004e9170ca243eab1bebec56ac65bc46d5ebe334c66"}, + {file = "ddtrace-3.10.0-cp310-cp310-win32.whl", hash = "sha256:5aa105c228ed35a7b731fa0f1ba43b9e966eba01e6adbbe2265682b295ba002b"}, + {file = "ddtrace-3.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:899372895773e46326fb5acbcdd8d6b5c073eab8d56e56f25312cd758601a2b2"}, + {file = "ddtrace-3.10.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:4704b7af76c93ae5616bc7ce225c8dc56b1b2cb78c78f64c952392f9ef920a81"}, + {file = "ddtrace-3.10.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:9ab54d3d5b84d1ac2e570efdeef0dfa15add46a674bb034f8797ae9224280afa"}, + {file = "ddtrace-3.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c843bef47601cfca1f123d57c26a4e287906ae2fda23c55c42ed0fffdd96e9e7"}, + {file = "ddtrace-3.10.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:425062d369b709083836b37c64fa4a982da5365b7604cd714d4eca1fa5eb008d"}, + {file = "ddtrace-3.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e07bccea09006e1202f9e6398ac5641888308bbecc5e09b80190d01f48853086"}, + {file = "ddtrace-3.10.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0cfa3861dc0c7e80230b8f26dcf2656e2e613eb179dc947516657de437318026"}, + {file = "ddtrace-3.10.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fad7e92a43ea8abcafd3a332610d157ed24d614aba2deab1af026c13b0e4b84"}, + {file = "ddtrace-3.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9d3a49c5247397fbd37bffb674aefa934b238a4aa36e5cd200917a435a5a606d"}, + {file = "ddtrace-3.10.0-cp311-cp311-win32.whl", hash = "sha256:4be49a04407977e5be7d2323d7a8762ad65bb0a0c242841d292ec447fe9d3c20"}, + {file = "ddtrace-3.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:8b2d8a10e494a4cdb6bb4a41a384f561413f72a9979744ebe2f89dd2af82fd48"}, + {file = "ddtrace-3.10.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:acc084e43e42a7c780e37bca50c57c212f0bc42d9667647d9b871a0c63916c31"}, + {file = "ddtrace-3.10.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:f4a3d18d3a44594cb8a40e9a369e142fc52f544bc01df7a0a627d41238238cbb"}, + {file = "ddtrace-3.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e1e2cb1c8f18bbc58880148f9250c828bd5fd36f996b51ee05d06c29c76ac67"}, + {file = "ddtrace-3.10.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:601bb4a45abc6515335e2e73db0a7361158d8801cc31db78f791b7c536ae8b97"}, + {file = "ddtrace-3.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c400f67a2831f7a52f30046f16a1898e7236798115617bbdf8662cd1ae08bea1"}, + {file = "ddtrace-3.10.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8b10f392a332fecbaf3f5058fb1a932eb6199aee5aa49e2d41a5b35cf4f28c88"}, + {file = "ddtrace-3.10.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:af2544451bd2fc59a5d5caf2a032366fcd6034da1ff3f20fccca5ade5be254de"}, + {file = "ddtrace-3.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:22be32fe541212ab912ad21971241b979b96174c225c4950299dd3889822d791"}, + {file = "ddtrace-3.10.0-cp312-cp312-win32.whl", hash = "sha256:2fe703848a68c4314200dd4bbc7a6861c7b664700c319b39566516d3eca71688"}, + {file = "ddtrace-3.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:dc4b2bb321fe1589338e8676f4971bd276b8a2eae62774c03efe7c1e61534f92"}, + {file = "ddtrace-3.10.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:2c3db0eb18e476115087cedbbc787d2c8fae9a1353b600ef8d7ec2cf44c9b62f"}, + {file = "ddtrace-3.10.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:1a45e0d226dd78066868e71ab4f1b4688aeec4b8a482fb495ccfaafbfa11de87"}, + {file = "ddtrace-3.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:244157a6db87efcf81dfaf319a83dfaf9afd41a5cbcdd3388a86b8537fe75cda"}, + {file = "ddtrace-3.10.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9b4a59a1a2ab35a0efa58be904d8a96b505ec2e67f0db7d2856715bff1189220"}, + {file = "ddtrace-3.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:167e519c55a12a0bce8964a6d58221432f29f039dbe537092af78b2c0640205f"}, + {file = "ddtrace-3.10.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ffe9fff5364531ecbdae1f54992b8c05abac3a09cde4b0e4a7c6213ea6e1b89c"}, + {file = "ddtrace-3.10.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:fe6439351b94cca8d5422a73ffc5db60f198c1b45c7a485bfba157cb53032a6b"}, + {file = "ddtrace-3.10.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f9a5d25107ce5364d8747e999bf0ecc9a73b51d9f95a9d8a32d728bf1008ba8b"}, + {file = "ddtrace-3.10.0-cp313-cp313-win32.whl", hash = "sha256:8cb6cd3edd2ccacb79ce33b7588591ea138fab9f5299b6c7340f6512658056da"}, + {file = "ddtrace-3.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:288b61ad03eae5ac23bcea298158bfdf4547dce2ff24c98b5f24e91d99114d8a"}, + {file = "ddtrace-3.10.0-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a8364a03f75ca59a8203a9af3a9e22b9835ba59e24bffeab4634298a35aba495"}, + {file = "ddtrace-3.10.0-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:83dbc066c11c981468d4e4b5b7324c3d7ba1a9955ce21b25a55c4d3de31c1d58"}, + {file = "ddtrace-3.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26816a7e90f38b9ba32e8085a525726728a76d369306ed42d810194fec371328"}, + {file = "ddtrace-3.10.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0c02c66e0cfd5769d05bb12c147dcc3067f35dee8984b4428ee2c7972fb7545"}, + {file = "ddtrace-3.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:864fa5b2deaf2e2749861952368a369c5fb2f4ad5854a5b1791a842d50b1dd0f"}, + {file = "ddtrace-3.10.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:5a5d66aa3c83537cc9ac4931af722f5650d6c9de3b6c3f5000cb20ae61d8cf80"}, + {file = "ddtrace-3.10.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:9be86fde506c37ad68db512e681e37f2b38e0a07fe720e6fc8d4ace982308e86"}, + {file = "ddtrace-3.10.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:802a66a7c82cff1bb98cc72496965a066aba143c36f943267587dca35356507f"}, + {file = "ddtrace-3.10.0-cp38-cp38-win32.whl", hash = "sha256:a64852550ceed401739a5067798bf07978650394f42d20f6617d86a1ce6b1a5d"}, + {file = "ddtrace-3.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:e01a3fbc692a65ccf5fa6985d12f82589cc8f0098bbe1ed5d519cf0656267fe2"}, + {file = "ddtrace-3.10.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:537e81f3b6b63cbd6756b82b1ec42d7e76ebd48eb7d43873c548853a90b7c31d"}, + {file = "ddtrace-3.10.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:7982bd2deb013648a8e469eb03498d76a04621dcb661192e468c95c7b981fc4d"}, + {file = "ddtrace-3.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0da0f4a2b09f527939cc2363ffa8c748e91aef06492cc5f25a238f88ebb4d45e"}, + {file = "ddtrace-3.10.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bb37c82d4664be3fd077e69c702b95e2e9b750fbdd7e8c98f0449395972387e7"}, + {file = "ddtrace-3.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6a24fe621f6db954b00bb9b1ea75ab1fb9de67fdf3ddb80fef2e0c47ad09480"}, + {file = "ddtrace-3.10.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a62bc35abe6c93adae217ad9c5ca6f08a0270372969e76673f577e89292b5eae"}, + {file = "ddtrace-3.10.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:f725576d6def722a8116bca824336e735744a1713a80f87c3f6504d1ad932068"}, + {file = "ddtrace-3.10.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0134c62bf339d19c46f097efbfb727cdd8ce6d645975cd23377b5eb071360bb5"}, + {file = "ddtrace-3.10.0-cp39-cp39-win32.whl", hash = "sha256:02c7e5554f52e3d8259579c88d5e1818bc6b82c71a079292fb0e0833a8a22b8b"}, + {file = "ddtrace-3.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:da52e1a68377ef4acd831a6a3d96567f4e57ed3bee3b7e96b3d0269438ea0288"}, + {file = "ddtrace-3.10.0.tar.gz", hash = "sha256:82a412a4320404f4d8dc1eea7a871cf9a55392685ac5e9d7fe178dc5c40e8b5c"}, +] + +[package.dependencies] +bytecode = [ + {version = ">=0.15.1", markers = "python_version ~= \"3.12.0\""}, + {version = ">=0.14.0", markers = "python_version ~= \"3.11.0\""}, + {version = ">=0.13.0", markers = "python_version < \"3.11\""}, +] +envier = ">=0.6.1,<0.7.0" +opentelemetry-api = ">=1" +protobuf = ">=3" +typing_extensions = "*" +wrapt = ">=1" +xmltodict = ">=0.12" + +[package.extras] +openai = ["tiktoken"] +opentracing = ["opentracing (>=2.0.0)"] + [[package]] name = "debugpy" version = "1.8.12" description = "An implementation of the Debug Adapter Protocol for Python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "debugpy-1.8.12-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:a2ba7ffe58efeae5b8fad1165357edfe01464f9aef25e814e891ec690e7dd82a"}, {file = "debugpy-1.8.12-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cbbd4149c4fc5e7d508ece083e78c17442ee13b0e69bfa6bd63003e486770f45"}, @@ -430,6 +557,7 @@ version = "5.1.1" description = "Decorators for Humans" optional = false python-versions = ">=3.5" +groups = ["dev"] files = [ {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, @@ -441,6 +569,7 @@ version = "0.3.9" description = "Distribution utilities" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"}, {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"}, @@ -452,17 +581,36 @@ version = "2.2.0" description = "Filesystem-like pathing and searching for dictionaries" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "dpath-2.2.0-py3-none-any.whl", hash = "sha256:b330a375ded0a0d2ed404440f6c6a715deae5313af40bbb01c8a41d891900576"}, {file = "dpath-2.2.0.tar.gz", hash = "sha256:34f7e630dc55ea3f219e555726f5da4b4b25f2200319c8e6902c394258dd6a3e"}, ] +[[package]] +name = "envier" +version = "0.6.1" +description = "Python application configuration via the environment" +optional = true +python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"datadog\"" +files = [ + {file = "envier-0.6.1-py3-none-any.whl", hash = "sha256:73609040a76be48bbcb97074d9969666484aa0de706183a6e9ef773156a8a6a9"}, + {file = "envier-0.6.1.tar.gz", hash = "sha256:3309a01bb3d8850c9e7a31a5166d5a836846db2faecb79b9cb32654dd50ca9f9"}, +] + +[package.extras] +mypy = ["mypy"] + [[package]] name = "exceptiongroup" version = "1.2.2" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" +groups = ["dev", "test"] +markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -477,13 +625,14 @@ version = "2.2.0" description = "Get the currently executing AST node of a frame, and other information" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa"}, {file = "executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755"}, ] [package.extras] -tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"] +tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""] [[package]] name = "faker" @@ -491,6 +640,7 @@ version = "26.3.0" description = "Faker is a Python package that generates fake data for you." optional = false python-versions = ">=3.8" +groups = ["test"] files = [ {file = "Faker-26.3.0-py3-none-any.whl", hash = "sha256:97fe1e7e953dd640ca2cd4dfac4db7c4d2432dd1b7a244a3313517707f3b54e9"}, {file = "Faker-26.3.0.tar.gz", hash = "sha256:7c10ebdf74aaa0cc4fe6ec6db5a71e8598ec33503524bd4b5f4494785a5670dd"}, @@ -505,6 +655,8 @@ version = "1.10.0" description = "Fast read/write of AVRO files" optional = true python-versions = ">=3.9" +groups = ["main"] +markers = "extra == \"kafka\"" files = [ {file = "fastavro-1.10.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1a9fe0672d2caf0fe54e3be659b13de3cad25a267f2073d6f4b9f8862acc31eb"}, {file = "fastavro-1.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86dd0410770e0c99363788f0584523709d85e57bb457372ec5c285a482c17fe6"}, @@ -551,6 +703,7 @@ version = "3.17.0" description = "A platform independent file lock." optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "filelock-3.17.0-py3-none-any.whl", hash = "sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338"}, {file = "filelock-3.17.0.tar.gz", hash = "sha256:ee4e77401ef576ebb38cd7f13b9b28893194acc20a8e68e18730ba9c0e54660e"}, @@ -559,7 +712,7 @@ files = [ [package.extras] docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"] -typing = ["typing-extensions (>=4.12.2)"] +typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] [[package]] name = "google-api-core" @@ -567,6 +720,7 @@ version = "2.24.0" description = "Google API client core library" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "google_api_core-2.24.0-py3-none-any.whl", hash = "sha256:10d82ac0fca69c82a25b3efdeefccf6f28e02ebb97925a8cce8edbfe379929d9"}, {file = "google_api_core-2.24.0.tar.gz", hash = "sha256:e255640547a597a4da010876d333208ddac417d60add22b6851a0c66a831fcaf"}, @@ -581,7 +735,7 @@ grpcio = [ ] grpcio-status = [ {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, - {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "extra == \"grpc\""}, ] proto-plus = ">=1.22.3,<2.0.0dev" protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" @@ -589,7 +743,7 @@ requests = ">=2.18.0,<3.0.0.dev0" [package.extras] async-rest = ["google-auth[aiohttp] (>=2.35.0,<3.0.dev0)"] -grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev) ; python_version >= \"3.11\"", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0) ; python_version >= \"3.11\""] grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] @@ -599,6 +753,7 @@ version = "2.38.0" description = "Google Authentication Library" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "google_auth-2.38.0-py2.py3-none-any.whl", hash = "sha256:e7dae6694313f434a2727bf2906f27ad259bae090d7aa896590d86feec3d9d4a"}, {file = "google_auth-2.38.0.tar.gz", hash = "sha256:8285113607d3b80a3f1543b75962447ba8a09fe85783432a784fdeef6ac094c4"}, @@ -623,6 +778,8 @@ version = "1.2.1" description = "Google Authentication Library" optional = true python-versions = ">=3.6" +groups = ["main"] +markers = "extra == \"gsheets\"" files = [ {file = "google_auth_oauthlib-1.2.1-py2.py3-none-any.whl", hash = "sha256:2d58a27262d55aa1b87678c3ba7142a080098cbc2024f903c62355deb235d91f"}, {file = "google_auth_oauthlib-1.2.1.tar.gz", hash = "sha256:afd0cad092a2eaa53cd8e8298557d6de1034c6cb4a740500b5357b648af97263"}, @@ -641,6 +798,8 @@ version = "3.29.0" description = "Google BigQuery API client library" optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"bigquery\"" files = [ {file = "google_cloud_bigquery-3.29.0-py2.py3-none-any.whl", hash = "sha256:5453a4eabe50118254eda9778f3d7dad413490de5f7046b5e66c98f5a1580308"}, {file = "google_cloud_bigquery-3.29.0.tar.gz", hash = "sha256:fafc2b455ffce3bcc6ce0e884184ef50b6a11350a83b91e327fadda4d5566e72"}, @@ -658,12 +817,12 @@ requests = ">=2.21.0,<3.0.0dev" [package.extras] all = ["google-cloud-bigquery[bigquery-v2,bqstorage,geopandas,ipython,ipywidgets,opentelemetry,pandas,tqdm]"] bigquery-v2 = ["proto-plus (>=1.22.3,<2.0.0dev)", "protobuf (>=3.20.2,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev)"] -bqstorage = ["google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "pyarrow (>=3.0.0)"] +bqstorage = ["google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev) ; python_version >= \"3.11\"", "pyarrow (>=3.0.0)"] geopandas = ["Shapely (>=1.8.4,<3.0.0dev)", "geopandas (>=0.9.0,<2.0dev)"] ipython = ["bigquery-magics (>=0.1.0)"] ipywidgets = ["ipykernel (>=6.0.0)", "ipywidgets (>=7.7.0)"] opentelemetry = ["opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)"] -pandas = ["db-dtypes (>=0.3.0,<2.0.0dev)", "importlib-metadata (>=1.0.0)", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)"] +pandas = ["db-dtypes (>=0.3.0,<2.0.0dev)", "importlib-metadata (>=1.0.0) ; python_version < \"3.8\"", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)"] tqdm = ["tqdm (>=4.7.4,<5.0.0dev)"] [[package]] @@ -672,6 +831,8 @@ version = "2.27.0" description = "Google Cloud Bigquery Storage API client library" optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"bigquery\"" files = [ {file = "google_cloud_bigquery_storage-2.27.0-py2.py3-none-any.whl", hash = "sha256:3bfa8f74a61ceaffd3bfe90be5bbef440ad81c1c19ac9075188cccab34bffc2b"}, {file = "google_cloud_bigquery_storage-2.27.0.tar.gz", hash = "sha256:522faba9a68bea7e9857071c33fafce5ee520b7b175da00489017242ade8ec27"}, @@ -682,13 +843,13 @@ google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extr google-auth = ">=2.14.1,<3.0.0dev" proto-plus = [ {version = ">=1.22.2,<2.0.0dev", markers = "python_version >= \"3.11\""}, - {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, + {version = ">=1.22.0,<2.0.0dev"}, ] protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev" [package.extras] fastavro = ["fastavro (>=0.21.2)"] -pandas = ["importlib-metadata (>=1.0.0)", "pandas (>=0.21.1)"] +pandas = ["importlib-metadata (>=1.0.0) ; python_version < \"3.8\"", "pandas (>=0.21.1)"] pyarrow = ["pyarrow (>=0.15.0)"] [[package]] @@ -697,6 +858,7 @@ version = "2.4.1" description = "Google Cloud API client core library" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "google-cloud-core-2.4.1.tar.gz", hash = "sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073"}, {file = "google_cloud_core-2.4.1-py2.py3-none-any.whl", hash = "sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61"}, @@ -715,6 +877,7 @@ version = "2.19.0" description = "Google Cloud Storage API client library" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "google_cloud_storage-2.19.0-py2.py3-none-any.whl", hash = "sha256:aeb971b5c29cf8ab98445082cbfe7b161a1f48ed275822f59ed3f1524ea54fba"}, {file = "google_cloud_storage-2.19.0.tar.gz", hash = "sha256:cd05e9e7191ba6cb68934d8eb76054d9be4562aa89dbc4236feee4d7d51342b2"}, @@ -738,6 +901,7 @@ version = "1.6.0" description = "A python wrapper of the C library 'Google CRC32C'" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "google_crc32c-1.6.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa"}, {file = "google_crc32c-1.6.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9"}, @@ -777,6 +941,7 @@ version = "2.7.2" description = "Utilities for Google Media Downloads and Resumable Uploads" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "google_resumable_media-2.7.2-py2.py3-none-any.whl", hash = "sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa"}, {file = "google_resumable_media-2.7.2.tar.gz", hash = "sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0"}, @@ -795,6 +960,7 @@ version = "1.66.0" description = "Common protobufs used in Google APIs" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "googleapis_common_protos-1.66.0-py2.py3-none-any.whl", hash = "sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed"}, {file = "googleapis_common_protos-1.66.0.tar.gz", hash = "sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c"}, @@ -812,6 +978,8 @@ version = "3.1.1" description = "Lightweight in-process concurrent programming" optional = false python-versions = ">=3.7" +groups = ["main"] +markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\"" files = [ {file = "greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563"}, {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fde093fb93f35ca72a556cf72c92ea3ebfda3d79fc35bb19fbe685853869a83"}, @@ -898,6 +1066,8 @@ version = "1.69.0" description = "HTTP/2-based RPC framework" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"bigquery\"" files = [ {file = "grpcio-1.69.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:2060ca95a8db295ae828d0fc1c7f38fb26ccd5edf9aa51a0f44251f5da332e97"}, {file = "grpcio-1.69.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:2e52e107261fd8fa8fa457fe44bfadb904ae869d87c1280bf60f93ecd3e79278"}, @@ -965,6 +1135,8 @@ version = "1.62.3" description = "Status proto mapping for gRPC" optional = true python-versions = ">=3.6" +groups = ["main"] +markers = "extra == \"bigquery\"" files = [ {file = "grpcio-status-1.62.3.tar.gz", hash = "sha256:289bdd7b2459794a12cf95dc0cb727bd4a1742c37bd823f760236c937e53a485"}, {file = "grpcio_status-1.62.3-py3-none-any.whl", hash = "sha256:f9049b762ba8de6b1086789d8315846e094edac2c50beaf462338b301a8fd4b8"}, @@ -981,6 +1153,8 @@ version = "6.1.4" description = "Google Spreadsheets Python API" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"gsheets\"" files = [ {file = "gspread-6.1.4-py3-none-any.whl", hash = "sha256:c34781c426031a243ad154952b16f21ac56a5af90687885fbee3d1fba5280dcd"}, {file = "gspread-6.1.4.tar.gz", hash = "sha256:b8eec27de7cadb338bb1b9f14a9be168372dee8965c0da32121816b5050ac1de"}, @@ -996,6 +1170,7 @@ version = "2.6.6" description = "File identification library for Python" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "identify-2.6.6-py2.py3-none-any.whl", hash = "sha256:cbd1810bce79f8b671ecb20f53ee0ae8e86ae84b557de31d89709dc2a48ba881"}, {file = "identify-2.6.6.tar.gz", hash = "sha256:7bec12768ed44ea4761efb47806f0a41f86e7c0a5fdf5950d4648c90eca7e251"}, @@ -1010,6 +1185,7 @@ version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.6" +groups = ["main", "test"] files = [ {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, @@ -1022,23 +1198,25 @@ all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2 name = "importlib-metadata" version = "8.6.1" description = "Read metadata from Python packages" -optional = false +optional = true python-versions = ">=3.9" +groups = ["main", "dev"] files = [ {file = "importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e"}, {file = "importlib_metadata-8.6.1.tar.gz", hash = "sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580"}, ] +markers = {main = "extra == \"datadog\"", dev = "python_version == \"3.9\""} [package.dependencies] zipp = ">=3.20" [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] enabler = ["pytest-enabler (>=2.2)"] perf = ["ipython"] -test = ["flufl.flake8", "importlib_resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"] +test = ["flufl.flake8", "importlib_resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"] type = ["pytest-mypy"] [[package]] @@ -1047,6 +1225,7 @@ version = "2.0.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.7" +groups = ["test"] files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, @@ -1058,6 +1237,7 @@ version = "6.29.5" description = "IPython Kernel for Jupyter" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "ipykernel-6.29.5-py3-none-any.whl", hash = "sha256:afdb66ba5aa354b09b91379bac28ae4afebbb30e8b39510c9690afb7a10421b5"}, {file = "ipykernel-6.29.5.tar.gz", hash = "sha256:f093a22c4a40f8828f8e330a9c297cb93dcab13bd9678ded6de8e5cf81c56215"}, @@ -1091,6 +1271,7 @@ version = "8.18.1" description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397"}, {file = "ipython-8.18.1.tar.gz", hash = "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27"}, @@ -1128,6 +1309,7 @@ version = "0.19.2" description = "An autocompletion tool for Python that can be used for text editors." optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9"}, {file = "jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0"}, @@ -1147,6 +1329,7 @@ version = "8.6.3" description = "Jupyter protocol implementation and client libraries" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "jupyter_client-8.6.3-py3-none-any.whl", hash = "sha256:e8a19cc986cc45905ac3362915f410f3af85424b4c0905e94fa5f2cb08e8f23f"}, {file = "jupyter_client-8.6.3.tar.gz", hash = "sha256:35b3a0947c4a6e9d589eb97d7d4cd5e90f910ee73101611f01283732bd6d9419"}, @@ -1162,7 +1345,7 @@ traitlets = ">=5.3" [package.extras] docs = ["ipykernel", "myst-parser", "pydata-sphinx-theme", "sphinx (>=4)", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"] -test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pytest (<8.2.0)", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"] +test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko ; sys_platform == \"win32\"", "pre-commit", "pytest (<8.2.0)", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"] [[package]] name = "jupyter-core" @@ -1170,6 +1353,7 @@ version = "5.7.2" description = "Jupyter core package. A base package on which Jupyter projects rely." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409"}, {file = "jupyter_core-5.7.2.tar.gz", hash = "sha256:aa5f8d32bbf6b431ac830496da7392035d6f61b4f54872f15c4bd2a9c3f536d9"}, @@ -1190,6 +1374,8 @@ version = "2.0.2" description = "Pure Python client for Apache Kafka" optional = true python-versions = "*" +groups = ["main"] +markers = "extra == \"kafka\"" files = [ {file = "kafka-python-2.0.2.tar.gz", hash = "sha256:04dfe7fea2b63726cd6f3e79a2d86e709d608d74406638c5da33a01d45a9d7e3"}, {file = "kafka_python-2.0.2-py2.py3-none-any.whl", hash = "sha256:2d92418c7cb1c298fa6c7f0fb3519b520d0d7526ac6cb7ae2a4fc65a51a94b6e"}, @@ -1204,6 +1390,7 @@ version = "0.7.3" description = "Python logging made (stupidly) simple" optional = false python-versions = "<4.0,>=3.5" +groups = ["main"] files = [ {file = "loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c"}, {file = "loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6"}, @@ -1214,7 +1401,7 @@ colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} [package.extras] -dev = ["Sphinx (==8.1.3)", "build (==1.2.2)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.5.0)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.13.0)", "mypy (==v1.4.1)", "myst-parser (==4.0.0)", "pre-commit (==4.0.1)", "pytest (==6.1.2)", "pytest (==8.3.2)", "pytest-cov (==2.12.1)", "pytest-cov (==5.0.0)", "pytest-cov (==6.0.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.1.0)", "sphinx-rtd-theme (==3.0.2)", "tox (==3.27.1)", "tox (==4.23.2)", "twine (==6.0.1)"] +dev = ["Sphinx (==8.1.3) ; python_version >= \"3.11\"", "build (==1.2.2) ; python_version >= \"3.11\"", "colorama (==0.4.5) ; python_version < \"3.8\"", "colorama (==0.4.6) ; python_version >= \"3.8\"", "exceptiongroup (==1.1.3) ; python_version >= \"3.7\" and python_version < \"3.11\"", "freezegun (==1.1.0) ; python_version < \"3.8\"", "freezegun (==1.5.0) ; python_version >= \"3.8\"", "mypy (==v0.910) ; python_version < \"3.6\"", "mypy (==v0.971) ; python_version == \"3.6\"", "mypy (==v1.13.0) ; python_version >= \"3.8\"", "mypy (==v1.4.1) ; python_version == \"3.7\"", "myst-parser (==4.0.0) ; python_version >= \"3.11\"", "pre-commit (==4.0.1) ; python_version >= \"3.9\"", "pytest (==6.1.2) ; python_version < \"3.8\"", "pytest (==8.3.2) ; python_version >= \"3.8\"", "pytest-cov (==2.12.1) ; python_version < \"3.8\"", "pytest-cov (==5.0.0) ; python_version == \"3.8\"", "pytest-cov (==6.0.0) ; python_version >= \"3.9\"", "pytest-mypy-plugins (==1.9.3) ; python_version >= \"3.6\" and python_version < \"3.8\"", "pytest-mypy-plugins (==3.1.0) ; python_version >= \"3.8\"", "sphinx-rtd-theme (==3.0.2) ; python_version >= \"3.11\"", "tox (==3.27.1) ; python_version < \"3.8\"", "tox (==4.23.2) ; python_version >= \"3.8\"", "twine (==6.0.1) ; python_version >= \"3.11\""] [[package]] name = "matplotlib-inline" @@ -1222,6 +1409,7 @@ version = "0.1.7" description = "Inline Matplotlib backend for Jupyter" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"}, {file = "matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90"}, @@ -1236,6 +1424,7 @@ version = "1.6.0" description = "Patch asyncio to allow nested event loops" optional = false python-versions = ">=3.5" +groups = ["dev"] files = [ {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"}, {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, @@ -1247,6 +1436,7 @@ version = "1.9.1" description = "Node.js virtual environment builder" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["dev"] files = [ {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, @@ -1258,6 +1448,7 @@ version = "2.0.2" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece"}, {file = "numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04"}, @@ -1312,6 +1503,8 @@ version = "3.2.2" description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" optional = true python-versions = ">=3.6" +groups = ["main"] +markers = "extra == \"gsheets\"" files = [ {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, @@ -1322,12 +1515,30 @@ rsa = ["cryptography (>=3.0.0)"] signals = ["blinker (>=1.4.0)"] signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] +[[package]] +name = "opentelemetry-api" +version = "1.34.1" +description = "OpenTelemetry Python API" +optional = true +python-versions = ">=3.9" +groups = ["main"] +markers = "extra == \"datadog\"" +files = [ + {file = "opentelemetry_api-1.34.1-py3-none-any.whl", hash = "sha256:b7df4cb0830d5a6c29ad0c0691dbae874d8daefa934b8b1d642de48323d32a8c"}, + {file = "opentelemetry_api-1.34.1.tar.gz", hash = "sha256:64f0bd06d42824843731d05beea88d4d4b6ae59f9fe347ff7dfa2cc14233bbb3"}, +] + +[package.dependencies] +importlib-metadata = ">=6.0,<8.8.0" +typing-extensions = ">=4.5.0" + [[package]] name = "orjson" version = "3.10.16" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "orjson-3.10.16-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:4cb473b8e79154fa778fb56d2d73763d977be3dcc140587e07dbc545bbfc38f8"}, {file = "orjson-3.10.16-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:622a8e85eeec1948690409a19ca1c7d9fd8ff116f4861d261e6ae2094fe59a00"}, @@ -1405,10 +1616,12 @@ version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" +groups = ["main", "dev", "test"] files = [ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] +markers = {main = "extra == \"bigquery\""} [[package]] name = "parso" @@ -1416,6 +1629,7 @@ version = "0.8.4" description = "A Python Parser" optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"}, {file = "parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d"}, @@ -1431,6 +1645,7 @@ version = "3.0.0" description = "Python datetimes made easy" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pendulum-3.0.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2cf9e53ef11668e07f73190c805dbdf07a1939c3298b78d5a9203a86775d1bfd"}, {file = "pendulum-3.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fb551b9b5e6059377889d2d878d940fd0bbb80ae4810543db18e6f77b02c5ef6"}, @@ -1522,7 +1737,7 @@ python-dateutil = ">=2.6" tzdata = ">=2020.1" [package.extras] -test = ["time-machine (>=2.6.0)"] +test = ["time-machine (>=2.6.0) ; implementation_name != \"pypy\""] [[package]] name = "pexpect" @@ -1530,6 +1745,8 @@ version = "4.9.0" description = "Pexpect allows easy control of interactive console applications." optional = false python-versions = "*" +groups = ["dev"] +markers = "sys_platform != \"win32\"" files = [ {file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"}, {file = "pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f"}, @@ -1544,6 +1761,8 @@ version = "1.3.2" description = "Pika Python AMQP Client Library" optional = true python-versions = ">=3.7" +groups = ["main"] +markers = "extra == \"rabbitmq\"" files = [ {file = "pika-1.3.2-py3-none-any.whl", hash = "sha256:0779a7c1fafd805672796085560d290213a465e4f6f76a6fb19e378d8041a14f"}, {file = "pika-1.3.2.tar.gz", hash = "sha256:b2a327ddddf8570b4965b3576ac77091b850262d34ce8c1d8cb4e4146aa4145f"}, @@ -1560,6 +1779,7 @@ version = "4.3.6" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"}, {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"}, @@ -1576,6 +1796,7 @@ version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" +groups = ["test"] files = [ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, @@ -1591,6 +1812,7 @@ version = "1.20.0" description = "Blazingly fast DataFrame library" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "polars-1.20.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9a313e10ea80b99a0d32bfb942b2260b9658155287b0c2ac5876323acaff4f2c"}, {file = "polars-1.20.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:4474bd004376599f7e4906bd350026cbbe805ba604121090578a97f63da15381"}, @@ -1623,7 +1845,7 @@ pyarrow = ["pyarrow (>=7.0.0)"] pydantic = ["pydantic"] sqlalchemy = ["polars[pandas]", "sqlalchemy"] style = ["great-tables (>=0.8.0)"] -timezone = ["tzdata"] +timezone = ["tzdata ; platform_system == \"Windows\""] xlsx2csv = ["xlsx2csv (>=0.8.0)"] xlsxwriter = ["xlsxwriter"] @@ -1633,6 +1855,7 @@ version = "3.8.0" description = "A framework for managing and maintaining multi-language pre-commit hooks." optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "pre_commit-3.8.0-py2.py3-none-any.whl", hash = "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f"}, {file = "pre_commit-3.8.0.tar.gz", hash = "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af"}, @@ -1651,6 +1874,7 @@ version = "3.0.50" description = "Library for building powerful interactive command lines in Python" optional = false python-versions = ">=3.8.0" +groups = ["dev"] files = [ {file = "prompt_toolkit-3.0.50-py3-none-any.whl", hash = "sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198"}, {file = "prompt_toolkit-3.0.50.tar.gz", hash = "sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab"}, @@ -1665,6 +1889,7 @@ version = "1.25.0" description = "Beautiful, Pythonic protocol buffers." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "proto_plus-1.25.0-py3-none-any.whl", hash = "sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961"}, {file = "proto_plus-1.25.0.tar.gz", hash = "sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91"}, @@ -1682,6 +1907,7 @@ version = "4.25.5" description = "" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "protobuf-4.25.5-cp310-abi3-win32.whl", hash = "sha256:5e61fd921603f58d2f5acb2806a929b4675f8874ff5f330b7d6f7e2e784bbcd8"}, {file = "protobuf-4.25.5-cp310-abi3-win_amd64.whl", hash = "sha256:4be0571adcbe712b282a330c6e89eae24281344429ae95c6d85e79e84780f5ea"}, @@ -1702,6 +1928,7 @@ version = "6.1.1" description = "Cross-platform lib for process and system monitoring in Python." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +groups = ["dev"] files = [ {file = "psutil-6.1.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:9ccc4316f24409159897799b83004cb1e24f9819b0dcf9c0b68bdcb6cefee6a8"}, {file = "psutil-6.1.1-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ca9609c77ea3b8481ab005da74ed894035936223422dc591d6772b147421f777"}, @@ -1732,6 +1959,8 @@ version = "2.9.10" description = "psycopg2 - Python-PostgreSQL Database Adapter" optional = true python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"postgres\"" files = [ {file = "psycopg2-binary-2.9.10.tar.gz", hash = "sha256:4b3df0e6990aa98acda57d983942eff13d824135fe2250e6522edaa782a06de2"}, {file = "psycopg2_binary-2.9.10-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:0ea8e3d0ae83564f2fc554955d327fa081d065c8ca5cc6d2abb643e2c9c1200f"}, @@ -1808,6 +2037,8 @@ version = "0.7.0" description = "Run a subprocess in a pseudo terminal" optional = false python-versions = "*" +groups = ["dev"] +markers = "sys_platform != \"win32\"" files = [ {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, @@ -1819,6 +2050,7 @@ version = "0.2.3" description = "Safely evaluate AST nodes without side effects" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0"}, {file = "pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42"}, @@ -1833,6 +2065,7 @@ version = "16.1.0" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pyarrow-16.1.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9"}, {file = "pyarrow-16.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a"}, @@ -1881,6 +2114,7 @@ version = "0.6.1" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"}, {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"}, @@ -1892,6 +2126,7 @@ version = "0.4.1" description = "A collection of ASN.1-based protocols modules" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd"}, {file = "pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c"}, @@ -1906,6 +2141,8 @@ version = "2.22" description = "C parser in Python" optional = false python-versions = ">=3.8" +groups = ["dev"] +markers = "implementation_name == \"pypy\"" files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, @@ -1917,6 +2154,7 @@ version = "2.10.5" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pydantic-2.10.5-py3-none-any.whl", hash = "sha256:4dd4e322dbe55472cb7ca7e73f4b63574eecccf2835ffa2af9021ce113c83c53"}, {file = "pydantic-2.10.5.tar.gz", hash = "sha256:278b38dbbaec562011d659ee05f63346951b3a248a6f3642e1bc68894ea2b4ff"}, @@ -1929,7 +2167,7 @@ typing-extensions = ">=4.12.2" [package.extras] email = ["email-validator (>=2.0.0)"] -timezone = ["tzdata"] +timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows\""] [[package]] name = "pydantic-core" @@ -1937,6 +2175,7 @@ version = "2.27.2" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pydantic_core-2.27.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa"}, {file = "pydantic_core-2.27.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c"}, @@ -2049,6 +2288,7 @@ version = "2.10.2" description = "Extra Pydantic types." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pydantic_extra_types-2.10.2-py3-none-any.whl", hash = "sha256:9eccd55a2b7935cea25f0a67f6ff763d55d80c41d86b887d88915412ccf5b7fa"}, {file = "pydantic_extra_types-2.10.2.tar.gz", hash = "sha256:934d59ab7a02ff788759c3a97bc896f5cfdc91e62e4f88ea4669067a73f14b98"}, @@ -2059,11 +2299,11 @@ pydantic = ">=2.5.2" typing-extensions = "*" [package.extras] -all = ["pendulum (>=3.0.0,<4.0.0)", "phonenumbers (>=8,<9)", "pycountry (>=23)", "python-ulid (>=1,<2)", "python-ulid (>=1,<4)", "pytz (>=2024.1)", "semver (>=3.0.2)", "semver (>=3.0.2,<3.1.0)", "tzdata (>=2024.1)"] +all = ["pendulum (>=3.0.0,<4.0.0)", "phonenumbers (>=8,<9)", "pycountry (>=23)", "python-ulid (>=1,<2) ; python_version < \"3.9\"", "python-ulid (>=1,<4) ; python_version >= \"3.9\"", "pytz (>=2024.1)", "semver (>=3.0.2)", "semver (>=3.0.2,<3.1.0)", "tzdata (>=2024.1)"] pendulum = ["pendulum (>=3.0.0,<4.0.0)"] phonenumbers = ["phonenumbers (>=8,<9)"] pycountry = ["pycountry (>=23)"] -python-ulid = ["python-ulid (>=1,<2)", "python-ulid (>=1,<4)"] +python-ulid = ["python-ulid (>=1,<2) ; python_version < \"3.9\"", "python-ulid (>=1,<4) ; python_version >= \"3.9\""] semver = ["semver (>=3.0.2)"] [[package]] @@ -2072,6 +2312,7 @@ version = "2.19.1" description = "Pygments is a syntax highlighting package written in Python." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"}, {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"}, @@ -2086,6 +2327,7 @@ version = "7.4.4" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" +groups = ["test"] files = [ {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, @@ -2108,6 +2350,7 @@ version = "0.6.3" description = "It helps to use fixtures in pytest.mark.parametrize" optional = false python-versions = "*" +groups = ["test"] files = [ {file = "pytest-lazy-fixture-0.6.3.tar.gz", hash = "sha256:0e7d0c7f74ba33e6e80905e9bfd81f9d15ef9a790de97993e34213deb5ad10ac"}, {file = "pytest_lazy_fixture-0.6.3-py3-none-any.whl", hash = "sha256:e0b379f38299ff27a653f03eaa69b08a6fd4484e46fd1c9907d984b9f9daeda6"}, @@ -2122,6 +2365,7 @@ version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main", "dev", "test"] files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -2136,6 +2380,7 @@ version = "1.0.1" description = "Read key-value pairs from a .env file and set them as environment variables" optional = false python-versions = ">=3.8" +groups = ["main", "test"] files = [ {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, @@ -2150,6 +2395,7 @@ version = "2024.2" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725"}, {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"}, @@ -2161,6 +2407,8 @@ version = "308" description = "Python for Window Extensions" optional = false python-versions = "*" +groups = ["dev"] +markers = "sys_platform == \"win32\" and platform_python_implementation != \"PyPy\"" files = [ {file = "pywin32-308-cp310-cp310-win32.whl", hash = "sha256:796ff4426437896550d2981b9c2ac0ffd75238ad9ea2d3bfa67a1abd546d262e"}, {file = "pywin32-308-cp310-cp310-win_amd64.whl", hash = "sha256:4fc888c59b3c0bef905ce7eb7e2106a07712015ea1c8234b703a088d46110e8e"}, @@ -2188,6 +2436,7 @@ version = "6.0.2" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, @@ -2250,6 +2499,7 @@ version = "26.2.0" description = "Python bindings for 0MQ" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "pyzmq-26.2.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:ddf33d97d2f52d89f6e6e7ae66ee35a4d9ca6f36eda89c24591b0c40205a3629"}, {file = "pyzmq-26.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dacd995031a01d16eec825bf30802fceb2c3791ef24bcce48fa98ce40918c27b"}, @@ -2371,6 +2621,7 @@ version = "2.32.3" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" +groups = ["main", "test"] files = [ {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, @@ -2392,6 +2643,7 @@ version = "1.12.1" description = "Mock out responses from the requests package" optional = false python-versions = ">=3.5" +groups = ["test"] files = [ {file = "requests-mock-1.12.1.tar.gz", hash = "sha256:e9e12e333b525156e82a3c852f22016b9158220d2f47454de9cae8a77d371401"}, {file = "requests_mock-1.12.1-py2.py3-none-any.whl", hash = "sha256:b1e37054004cdd5e56c84454cc7df12b25f90f382159087f4b6915aaeef39563"}, @@ -2409,6 +2661,8 @@ version = "2.0.0" description = "OAuthlib authentication support for Requests." optional = true python-versions = ">=3.4" +groups = ["main"] +markers = "extra == \"gsheets\"" files = [ {file = "requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9"}, {file = "requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36"}, @@ -2427,6 +2681,7 @@ version = "4.9" description = "Pure-Python RSA implementation" optional = false python-versions = ">=3.6,<4" +groups = ["main"] files = [ {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, @@ -2441,6 +2696,7 @@ version = "3.20.1" description = "Simple, fast, extensible JSON encoder/decoder for Python" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.5" +groups = ["main"] files = [ {file = "simplejson-3.20.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:f5272b5866b259fe6c33c4a8c5073bf8b359c3c97b70c298a2f09a69b52c7c41"}, {file = "simplejson-3.20.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5c0de368f3052a59a1acf21f8b2dd28686a9e4eba2da7efae7ed9554cb31e7bc"}, @@ -2560,6 +2816,7 @@ version = "1.17.0" description = "Python 2 and 3 compatibility utilities" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main", "dev", "test"] files = [ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, @@ -2571,6 +2828,7 @@ version = "2.2.2" description = "A web-based viewer for Python profiler output" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "snakeviz-2.2.2-py3-none-any.whl", hash = "sha256:77e7b9c82f6152edc330040319b97612351cd9b48c706434c535c2df31d10ac5"}, {file = "snakeviz-2.2.2.tar.gz", hash = "sha256:08028c6f8e34a032ff14757a38424770abb8662fb2818985aeea0d9bc13a7d83"}, @@ -2585,6 +2843,7 @@ version = "2.0.37" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "SQLAlchemy-2.0.37-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da36c3b0e891808a7542c5c89f224520b9a16c7f5e4d6a1156955605e54aef0e"}, {file = "SQLAlchemy-2.0.37-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e7402ff96e2b073a98ef6d6142796426d705addd27b9d26c3b32dbaa06d7d069"}, @@ -2680,6 +2939,8 @@ version = "1.12.1" description = "SQLAlchemy dialect for BigQuery" optional = true python-versions = "<3.13,>=3.8" +groups = ["main"] +markers = "extra == \"bigquery\"" files = [ {file = "sqlalchemy_bigquery-1.12.1-py2.py3-none-any.whl", hash = "sha256:93ae66d94405c457ae111ad4ef170b152e5b3b659ae00af21c1326bbeb253b20"}, {file = "sqlalchemy_bigquery-1.12.1.tar.gz", hash = "sha256:f73165c40b4767ca2025b2c759b7619ef47b07fcd3bd8eb0246f7498bbba7cef"}, @@ -2694,8 +2955,8 @@ sqlalchemy = ">=1.4.16,<3.0.0dev" [package.extras] alembic = ["alembic"] -all = ["GeoAlchemy2", "alembic", "google-cloud-bigquery-storage (>=2.0.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "packaging", "pyarrow (>=3.0.0)", "pytz", "shapely"] -bqstorage = ["google-cloud-bigquery-storage (>=2.0.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "pyarrow (>=3.0.0)"] +all = ["GeoAlchemy2", "alembic", "google-cloud-bigquery-storage (>=2.0.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev) ; python_version >= \"3.11\"", "packaging", "pyarrow (>=3.0.0)", "pytz", "shapely"] +bqstorage = ["google-cloud-bigquery-storage (>=2.0.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev) ; python_version >= \"3.11\"", "pyarrow (>=3.0.0)"] geography = ["GeoAlchemy2", "shapely"] tests = ["packaging", "pytz"] @@ -2705,6 +2966,7 @@ version = "0.6.3" description = "Extract data from python stack frames and tracebacks for informative displays" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"}, {file = "stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9"}, @@ -2724,6 +2986,7 @@ version = "9.0.0" description = "Retry code until it succeeds" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539"}, {file = "tenacity-9.0.0.tar.gz", hash = "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b"}, @@ -2739,6 +3002,8 @@ version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" +groups = ["test"] +markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, @@ -2780,6 +3045,7 @@ version = "6.4.2" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "tornado-6.4.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e828cce1123e9e44ae2a50a9de3055497ab1d0aeb440c5ac23064d9e44880da1"}, {file = "tornado-6.4.2-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:072ce12ada169c5b00b7d92a99ba089447ccc993ea2143c9ede887e0937aa803"}, @@ -2800,6 +3066,7 @@ version = "5.14.3" description = "Traitlets Python configuration system" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"}, {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"}, @@ -2815,10 +3082,12 @@ version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] +markers = {dev = "python_version == \"3.9\""} [[package]] name = "tzdata" @@ -2826,6 +3095,7 @@ version = "2025.1" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" +groups = ["main"] files = [ {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"}, {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"}, @@ -2837,13 +3107,14 @@ version = "2.3.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" +groups = ["main", "test"] files = [ {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, ] [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] @@ -2854,6 +3125,7 @@ version = "20.29.1" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "virtualenv-20.29.1-py3-none-any.whl", hash = "sha256:4e4cb403c0b0da39e13b46b1b2476e505cb0046b25f242bee80f62bf990b2779"}, {file = "virtualenv-20.29.1.tar.gz", hash = "sha256:b8b8970138d32fb606192cb97f6cd4bb644fa486be9308fb9b63f81091b5dc35"}, @@ -2866,7 +3138,7 @@ platformdirs = ">=3.9.1,<5" [package.extras] docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] [[package]] name = "wcwidth" @@ -2874,6 +3146,7 @@ version = "0.2.13" description = "Measures the displayed width of unicode strings in a terminal" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, @@ -2885,13 +3158,118 @@ version = "1.2.0" description = "A small Python utility to set file creation time on Windows" optional = false python-versions = ">=3.5" +groups = ["main"] +markers = "sys_platform == \"win32\"" files = [ {file = "win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390"}, {file = "win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0"}, ] [package.extras] -dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] +dev = ["black (>=19.3b0) ; python_version >= \"3.6\"", "pytest (>=4.6.2)"] + +[[package]] +name = "wrapt" +version = "1.17.2" +description = "Module for decorators, wrappers and monkey patching." +optional = true +python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"datadog\"" +files = [ + {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984"}, + {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22"}, + {file = "wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7"}, + {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c"}, + {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72"}, + {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061"}, + {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2"}, + {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c"}, + {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62"}, + {file = "wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563"}, + {file = "wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f"}, + {file = "wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58"}, + {file = "wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda"}, + {file = "wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438"}, + {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a"}, + {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000"}, + {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6"}, + {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b"}, + {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662"}, + {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72"}, + {file = "wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317"}, + {file = "wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3"}, + {file = "wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925"}, + {file = "wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392"}, + {file = "wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40"}, + {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d"}, + {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b"}, + {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98"}, + {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82"}, + {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae"}, + {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9"}, + {file = "wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9"}, + {file = "wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991"}, + {file = "wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125"}, + {file = "wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998"}, + {file = "wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5"}, + {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8"}, + {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6"}, + {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc"}, + {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2"}, + {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b"}, + {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504"}, + {file = "wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a"}, + {file = "wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845"}, + {file = "wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192"}, + {file = "wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b"}, + {file = "wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0"}, + {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306"}, + {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb"}, + {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681"}, + {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6"}, + {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6"}, + {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f"}, + {file = "wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555"}, + {file = "wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c"}, + {file = "wrapt-1.17.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5c803c401ea1c1c18de70a06a6f79fcc9c5acfc79133e9869e730ad7f8ad8ef9"}, + {file = "wrapt-1.17.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f917c1180fdb8623c2b75a99192f4025e412597c50b2ac870f156de8fb101119"}, + {file = "wrapt-1.17.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ecc840861360ba9d176d413a5489b9a0aff6d6303d7e733e2c4623cfa26904a6"}, + {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb87745b2e6dc56361bfde481d5a378dc314b252a98d7dd19a651a3fa58f24a9"}, + {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58455b79ec2661c3600e65c0a716955adc2410f7383755d537584b0de41b1d8a"}, + {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4e42a40a5e164cbfdb7b386c966a588b1047558a990981ace551ed7e12ca9c2"}, + {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:91bd7d1773e64019f9288b7a5101f3ae50d3d8e6b1de7edee9c2ccc1d32f0c0a"}, + {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:bb90fb8bda722a1b9d48ac1e6c38f923ea757b3baf8ebd0c82e09c5c1a0e7a04"}, + {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:08e7ce672e35efa54c5024936e559469436f8b8096253404faeb54d2a878416f"}, + {file = "wrapt-1.17.2-cp38-cp38-win32.whl", hash = "sha256:410a92fefd2e0e10d26210e1dfb4a876ddaf8439ef60d6434f21ef8d87efc5b7"}, + {file = "wrapt-1.17.2-cp38-cp38-win_amd64.whl", hash = "sha256:95c658736ec15602da0ed73f312d410117723914a5c91a14ee4cdd72f1d790b3"}, + {file = "wrapt-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99039fa9e6306880572915728d7f6c24a86ec57b0a83f6b2491e1d8ab0235b9a"}, + {file = "wrapt-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2696993ee1eebd20b8e4ee4356483c4cb696066ddc24bd70bcbb80fa56ff9061"}, + {file = "wrapt-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:612dff5db80beef9e649c6d803a8d50c409082f1fedc9dbcdfde2983b2025b82"}, + {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c2caa1585c82b3f7a7ab56afef7b3602021d6da34fbc1cf234ff139fed3cd9"}, + {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c958bcfd59bacc2d0249dcfe575e71da54f9dcf4a8bdf89c4cb9a68a1170d73f"}, + {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc78a84e2dfbc27afe4b2bd7c80c8db9bca75cc5b85df52bfe634596a1da846b"}, + {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba0f0eb61ef00ea10e00eb53a9129501f52385c44853dbd6c4ad3f403603083f"}, + {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1e1fe0e6ab7775fd842bc39e86f6dcfc4507ab0ffe206093e76d61cde37225c8"}, + {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c86563182421896d73858e08e1db93afdd2b947a70064b813d515d66549e15f9"}, + {file = "wrapt-1.17.2-cp39-cp39-win32.whl", hash = "sha256:f393cda562f79828f38a819f4788641ac7c4085f30f1ce1a68672baa686482bb"}, + {file = "wrapt-1.17.2-cp39-cp39-win_amd64.whl", hash = "sha256:36ccae62f64235cf8ddb682073a60519426fdd4725524ae38874adf72b5f2aeb"}, + {file = "wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8"}, + {file = "wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3"}, +] + +[[package]] +name = "xmltodict" +version = "0.14.2" +description = "Makes working with XML feel like you are working with JSON" +optional = true +python-versions = ">=3.6" +groups = ["main"] +markers = "extra == \"datadog\"" +files = [ + {file = "xmltodict-0.14.2-py2.py3-none-any.whl", hash = "sha256:20cc7d723ed729276e808f26fb6b3599f786cbc37e06c65e192ba77c40f20aac"}, + {file = "xmltodict-0.14.2.tar.gz", hash = "sha256:201e7c28bb210e374999d1dde6382923ab0ed1a8a5faeece48ab525b7810a553"}, +] [[package]] name = "yappi" @@ -2899,6 +3277,7 @@ version = "1.6.10" description = "Yet Another Python Profiler" optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "yappi-1.6.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f03127742746ec4cf7e422b08212daf094505ab7f5d725d7b273ed3c475c3d9"}, {file = "yappi-1.6.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7bbafb779c3f90edd09fd34733859226785618adee3179d5949dbba2e90f550a"}, @@ -2966,30 +3345,32 @@ test = ["gevent (>=20.6.2)"] name = "zipp" version = "3.21.0" description = "Backport of pathlib-compatible object wrapper for zip files" -optional = false +optional = true python-versions = ">=3.9" +groups = ["main", "dev"] files = [ {file = "zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931"}, {file = "zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4"}, ] +markers = {main = "extra == \"datadog\"", dev = "python_version == \"3.9\""} [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] enabler = ["pytest-enabler (>=2.2)"] -test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] +test = ["big-O", "importlib-resources ; python_version < \"3.9\"", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] type = ["pytest-mypy"] [extras] bigquery = ["google-cloud-bigquery", "google-cloud-bigquery-storage", "protobuf", "sqlalchemy-bigquery"] -datadog = ["datadog"] +datadog = ["datadog", "ddtrace"] gsheets = ["gspread"] kafka = ["avro", "confluent-kafka", "fastavro", "kafka-python"] postgres = ["psycopg2-binary"] rabbitmq = ["pika"] [metadata] -lock-version = "2.0" +lock-version = "2.1" python-versions = ">=3.9,<3.13" -content-hash = "513d745ca8fec848290fc404075c0a3e5b6be54e6ca0edbfd0976c448e69ae8e" +content-hash = "0ff1aca4778eadd00ed0f554c62810ce45d0ac984af2028c2f210374cd8da443" diff --git a/pyproject.toml b/pyproject.toml index 497adc6..135c616 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ gspread = { version = "^6.1.2", optional = true } # Monitoring datadog = { version = "^0.50.2", optional = true } +ddtrace = { version = "^3.10.0", optional = true } [tool.poetry.extras] postgres = ["psycopg2-binary"] @@ -57,7 +58,7 @@ bigquery = ["google-cloud-bigquery-storage", "google-cloud-bigquery", "sqlalchem kafka = ["confluent-kafka", "fastavro", "kafka-python", "avro"] rabbitmq = ["pika"] gsheets = ["gspread"] -datadog = ["datadog"] +datadog = ["datadog", "ddtrace"] ###### [Test] dependencies ###### [tool.poetry.group.test] diff --git a/tests/destination/test_config.py b/tests/destination/test_config.py index d437069..78f2014 100644 --- a/tests/destination/test_config.py +++ b/tests/destination/test_config.py @@ -11,6 +11,7 @@ def test_config(): config = AbstractDestinationConfig( name="file", + alias="file", config=AbstractDestinationDetailsConfig( unnest=False, ), @@ -23,6 +24,7 @@ def test_config_no_record_schema_provided(): with pytest.raises(ValidationError) as e: AbstractDestinationConfig( name="file", + alias="file", config=AbstractDestinationDetailsConfig( unnest=True, ), @@ -32,6 +34,7 @@ def test_config_no_record_schema_provided(): def test_config_with_unnest_provided_schema(): config = AbstractDestinationConfig( name="file", + alias="file", config=AbstractDestinationDetailsConfig( unnest=True, record_schemas=[ From 6ce5398d623a1dccaf62164d9692703eeeccecfa Mon Sep 17 00:00:00 2001 From: Anas El Mhamdi Date: Fri, 4 Jul 2025 17:37:00 +0200 Subject: [PATCH 05/22] chore: DSM working in staging (#67) * chore: add return for DSM tracking * chore: add logs --- bizon/engine/runner/adapters/streaming.py | 4 +++- bizon/monitoring/datadog/monitor.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/bizon/engine/runner/adapters/streaming.py b/bizon/engine/runner/adapters/streaming.py index 7e03e3a..79facf7 100644 --- a/bizon/engine/runner/adapters/streaming.py +++ b/bizon/engine/runner/adapters/streaming.py @@ -78,6 +78,7 @@ def run(self) -> RunnerStatus: df_source_records = StreamingRunner.convert_source_records(records) dsm_headers = monitor.track_source_iteration(record=records[0]) + logger.info(f"DSM headers: {dsm_headers}") # Apply transformation df_source_records = transform.apply_transforms(df_source_records=df_source_records) @@ -92,12 +93,13 @@ def run(self) -> RunnerStatus: iteration=iteration, pagination=None, ) - monitor.track_records_synced( + last_dsm_headers = monitor.track_records_synced( num_records=len(df_destination_records), destination_id=destination_id, extra_tags={"destination_id": destination_id}, headers=dsm_headers, ) + logger.info(f"Last DSM headers: {last_dsm_headers}") if os.getenv("ENVIRONMENT") == "production": source.commit() diff --git a/bizon/monitoring/datadog/monitor.py b/bizon/monitoring/datadog/monitor.py index f867a43..03edc62 100644 --- a/bizon/monitoring/datadog/monitor.py +++ b/bizon/monitoring/datadog/monitor.py @@ -58,7 +58,7 @@ def track_pipeline_status(self, pipeline_status: PipelineReturnStatus, extra_tag def track_records_synced( self, num_records: int, destination_id: str, extra_tags: Dict[str, str] = {}, headers: Dict[str, str] = {} - ) -> None: + ) -> Union[Dict[str, str], None]: """ Track the number of records synced in the pipeline. @@ -76,6 +76,7 @@ def track_records_synced( destination_type = self.pipeline_config.destination.alias set_produce_checkpoint(destination_type, destination_id, headers.setdefault) + return headers def track_source_iteration(self, record: SourceRecord) -> Union[Dict[str, str], None]: """ From a6fb4a3034df0996d2be5396d171104d5550d68d Mon Sep 17 00:00:00 2001 From: Anas El Mhamdi Date: Mon, 7 Jul 2025 11:49:29 +0200 Subject: [PATCH 06/22] chore: loop through dsm call (#68) * chore: loop through dsm trace * chore: typing * chore: typing --- bizon/engine/runner/adapters/streaming.py | 6 +++--- bizon/monitoring/datadog/monitor.py | 20 ++++++++++++-------- bizon/monitoring/monitor.py | 4 ++-- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/bizon/engine/runner/adapters/streaming.py b/bizon/engine/runner/adapters/streaming.py index 79facf7..d271504 100644 --- a/bizon/engine/runner/adapters/streaming.py +++ b/bizon/engine/runner/adapters/streaming.py @@ -77,8 +77,8 @@ def run(self) -> RunnerStatus: for destination_id, records in destination_id_indexed_records.items(): df_source_records = StreamingRunner.convert_source_records(records) - dsm_headers = monitor.track_source_iteration(record=records[0]) - logger.info(f"DSM headers: {dsm_headers}") + dsm_headers = monitor.track_source_iteration(records=records) + logger.info(f"DSM headers: {dsm_headers[0] if dsm_headers else None}") # Apply transformation df_source_records = transform.apply_transforms(df_source_records=df_source_records) @@ -99,7 +99,7 @@ def run(self) -> RunnerStatus: extra_tags={"destination_id": destination_id}, headers=dsm_headers, ) - logger.info(f"Last DSM headers: {last_dsm_headers}") + logger.info(f"Last DSM headers: {last_dsm_headers[0] if last_dsm_headers else None}") if os.getenv("ENVIRONMENT") == "production": source.commit() diff --git a/bizon/monitoring/datadog/monitor.py b/bizon/monitoring/datadog/monitor.py index 03edc62..d480328 100644 --- a/bizon/monitoring/datadog/monitor.py +++ b/bizon/monitoring/datadog/monitor.py @@ -1,5 +1,5 @@ import os -from typing import Dict, Union +from typing import Dict, List, Union from datadog import initialize, statsd from loguru import logger @@ -57,8 +57,8 @@ def track_pipeline_status(self, pipeline_status: PipelineReturnStatus, extra_tag ) def track_records_synced( - self, num_records: int, destination_id: str, extra_tags: Dict[str, str] = {}, headers: Dict[str, str] = {} - ) -> Union[Dict[str, str], None]: + self, num_records: int, destination_id: str, extra_tags: Dict[str, str] = {}, headers: List[Dict[str, str]] = [] + ) -> Union[List[Dict[str, str]], None]: """ Track the number of records synced in the pipeline. @@ -75,10 +75,11 @@ def track_records_synced( destination_type = self.pipeline_config.destination.alias - set_produce_checkpoint(destination_type, destination_id, headers.setdefault) + for header in headers: + set_produce_checkpoint(destination_type, destination_id, header.setdefault) return headers - def track_source_iteration(self, record: SourceRecord) -> Union[Dict[str, str], None]: + def track_source_iteration(self, records: List[SourceRecord]) -> Union[List[Dict[str, str]], None]: """ Track the number of records consumed from a Kafka topic. @@ -89,6 +90,9 @@ def track_source_iteration(self, record: SourceRecord) -> Union[Dict[str, str], if os.getenv("DD_DATA_STREAMS_ENABLED") == "true": from ddtrace.data_streams import set_consume_checkpoint - headers = {} - set_consume_checkpoint("kafka", record.data["topic"], headers.get) - return headers + headers_list = [] + for record in records: + headers = {} + set_consume_checkpoint("kafka", record.data["topic"], headers.get) + headers_list.append(headers) + return headers_list diff --git a/bizon/monitoring/monitor.py b/bizon/monitoring/monitor.py index 1104fc7..b89076f 100644 --- a/bizon/monitoring/monitor.py +++ b/bizon/monitoring/monitor.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Dict +from typing import Dict, List from bizon.common.models import BizonConfig from bizon.engine.pipeline.models import PipelineReturnStatus @@ -22,7 +22,7 @@ def track_pipeline_status(self, pipeline_status: PipelineReturnStatus, extra_tag """ pass - def track_source_iteration(self, record: SourceRecord, headers: Dict[str, str] = {}) -> None: + def track_source_iteration(self, records: List[SourceRecord], headers: Dict[str, str] = {}) -> None: """ Run a process that tracks the source iteration. """ From 1ca934fddf10c86a47a5326172f426e4e121767c Mon Sep 17 00:00:00 2001 From: Antoine Balliet Date: Mon, 7 Jul 2025 12:48:00 +0200 Subject: [PATCH 07/22] chore: remove offsets skipping in case msg too large and log consumer config (#69) * chore: stop skipping message too large * chore: log consumer config * fix error message --- bizon/connectors/sources/kafka/src/source.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/bizon/connectors/sources/kafka/src/source.py b/bizon/connectors/sources/kafka/src/source.py index 7fdabcf..e1a7540 100644 --- a/bizon/connectors/sources/kafka/src/source.py +++ b/bizon/connectors/sources/kafka/src/source.py @@ -107,6 +107,13 @@ def check_connection(self) -> Tuple[bool | Any | None]: config_topics = [topic.name for topic in self.config.topics] + # Display consumer config + # We ignore the key sasl.password and sasl.username + consumer_config = self.config.consumer_config.copy() + consumer_config.pop("sasl.password", None) + consumer_config.pop("sasl.username", None) + logger.info(f"Consumer config: {consumer_config}") + for topic in config_topics: if topic not in topics: logger.error(f"Topic {topic} not found, available topics: {topics.keys()}") @@ -237,13 +244,12 @@ def parse_encoded_messages(self, encoded_messages: list) -> List[SourceRecord]: if message.error(): # If the message is too large, we skip it and update the offset if message.error().code() == KafkaError.MSG_SIZE_TOO_LARGE: - logger.warning( + logger.error( ( - f"Message for topic {message.topic()} partition {message.partition()} and offset {message.offset()} has been skipped. " - f"Raised MSG_SIZE_TOO_LARGE, we suppose the message does not exist. Double-check in Confluent Cloud." + f"Message for topic {message.topic()} partition {message.partition()} and offset {message.offset()} is too large. " + f"Raised MSG_SIZE_TOO_LARGE, if manually setting the offset, the message might not exist. Double-check in Confluent Cloud." ) ) - continue logger.error( ( From fec09573f378e234f4d95fba2e0dd6834316b951 Mon Sep 17 00:00:00 2001 From: Anas El Mhamdi Date: Mon, 7 Jul 2025 16:13:08 +0200 Subject: [PATCH 08/22] chore: remove return (#70) --- .../destinations/bigquery_streaming/src/destination.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bizon/connectors/destinations/bigquery_streaming/src/destination.py b/bizon/connectors/destinations/bigquery_streaming/src/destination.py index bf1f7e7..62cba19 100644 --- a/bizon/connectors/destinations/bigquery_streaming/src/destination.py +++ b/bizon/connectors/destinations/bigquery_streaming/src/destination.py @@ -222,7 +222,7 @@ def _insert_batch(self, table, batch): try: # Handle streaming batch if batch.get("stream_batch") and len(batch["stream_batch"]) > 0: - return self.bq_client.insert_rows_json( + self.bq_client.insert_rows_json( table, batch["stream_batch"], row_ids=[None] * len(batch["stream_batch"]), From 87ac6593b9588c67c8efe7703f15cab3a67e3088 Mon Sep 17 00:00:00 2001 From: Anas El Mhamdi Date: Mon, 7 Jul 2025 18:35:07 +0200 Subject: [PATCH 09/22] Fix streaming v2 for large rows (#71) * chore: change values and handle large rows in streaming v2 * chore: change values and handle large rows in streaming v2 --- .../bigquery_streaming_v2/src/destination.py | 143 +++++++++++------- 1 file changed, 86 insertions(+), 57 deletions(-) diff --git a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py index eecc9f0..edcff00 100644 --- a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +++ b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py @@ -21,7 +21,7 @@ ProtoRows, ProtoSchema, ) -from google.protobuf.json_format import ParseDict +from google.protobuf.json_format import MessageToDict, ParseDict from google.protobuf.message import Message from loguru import logger from requests.exceptions import ConnectionError, SSLError, Timeout @@ -44,9 +44,9 @@ class BigQueryStreamingV2Destination(AbstractDestination): # Add constants for limits - MAX_ROWS_PER_REQUEST = 5000 # 5000 (max is 10000) - MAX_REQUEST_SIZE_BYTES = 5 * 1024 * 1024 # 5 MB (max is 10MB) - MAX_ROW_SIZE_BYTES = 0.9 * 1024 * 1024 # 1 MB + MAX_ROWS_PER_REQUEST = 6000 # 8000 (max is 10000) + MAX_REQUEST_SIZE_BYTES = 8 * 1024 * 1024 # 8 MB (max is 10MB) + MAX_ROW_SIZE_BYTES = 3 * 1024 * 1024 # 3 MB (max is 10MB) def __init__( self, @@ -154,6 +154,53 @@ def append_rows_to_stream( response = write_client.append_rows(iter([request])) return response.code().name + def safe_cast_record_values(self, row: dict): + """ + Safe cast record values to the correct type for BigQuery. + """ + for col in self.record_schemas[self.destination_id]: + + # Handle dicts as strings + if col.type in ["STRING", "JSON"]: + if isinstance(row[col.name], dict) or isinstance(row[col.name], list): + row[col.name] = orjson.dumps(row[col.name]).decode("utf-8") + + # Handle timestamps + if col.type in ["TIMESTAMP", "DATETIME"] and col.default_value_expression is None: + if isinstance(row[col.name], int): + if row[col.name] > datetime(9999, 12, 31).timestamp(): + row[col.name] = datetime.fromtimestamp(row[col.name] / 1_000_000).strftime( + "%Y-%m-%d %H:%M:%S.%f" + ) + else: + try: + row[col.name] = datetime.fromtimestamp(row[col.name]).strftime("%Y-%m-%d %H:%M:%S.%f") + except ValueError: + error_message = ( + f"Error casting timestamp for destination '{self.destination_id}' column '{col.name}'. " + f"Invalid timestamp value: {row[col.name]} ({type(row[col.name])}). " + "Consider using a transformation." + ) + logger.error(error_message) + raise ValueError(error_message) + return row + + @staticmethod + def to_protobuf_serialization(TableRowClass: Type[Message], row: dict) -> bytes: + """Convert a row to a Protobuf serialization.""" + record = ParseDict(row, TableRowClass()) + return record.SerializeToString() + + @staticmethod + def from_protobuf_serialization( + TableRowClass: Type[Message], + serialized_data: bytes, + ) -> dict: + """Convert protobuf serialization back to a dictionary.""" + record = TableRowClass() + record.ParseFromString(serialized_data) + return MessageToDict(record, preserving_proto_field_name=True) + @retry( retry=retry_if_exception_type( ( @@ -179,13 +226,24 @@ def process_streaming_batch( stream_name: str, proto_schema: ProtoSchema, batch: dict, - ) -> Tuple[str, str]: - """Process a single batch for streaming or large rows with retry logic.""" + table_row_class: Type[Message], + ) -> List[Tuple[str, str]]: + """Process a single batch for streaming and/or large rows with retry logic.""" + results = [] try: + # Handle streaming batch if batch.get("stream_batch") and len(batch["stream_batch"]) > 0: result = self.append_rows_to_stream(write_client, stream_name, proto_schema, batch["stream_batch"]) - return "streaming", result - elif batch.get("json_batch") and len(batch["json_batch"]) > 0: + results.append(("streaming", result)) + + # Handle large rows batch + if batch.get("json_batch") and len(batch["json_batch"]) > 0: + # Deserialize protobuf bytes back to JSON for the load job + deserialized_rows = [] + for serialized_row in batch["json_batch"]: + deserialized_row = self.from_protobuf_serialization(table_row_class, serialized_row) + deserialized_rows.append(deserialized_row) + # For large rows, we need to use the main client job_config = bigquery.LoadJobConfig( source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, @@ -193,54 +251,21 @@ def process_streaming_batch( ignore_unknown_values=True, ) load_job = self.bq_client.load_table_from_json( - batch["json_batch"], self.table_id, job_config=job_config, timeout=300 + deserialized_rows, self.table_id, job_config=job_config, timeout=300 ) result = load_job.result() if load_job.state != "DONE": raise Exception(f"Failed to load rows to BigQuery: {load_job.errors}") - return "large_rows", "DONE" - return "empty", "SKIPPED" + results.append(("large_rows", "DONE")) + + if not results: + results.append(("empty", "SKIPPED")) + + return results except Exception as e: logger.error(f"Error processing batch: {str(e)}") raise - def safe_cast_record_values(self, row: dict): - """ - Safe cast record values to the correct type for BigQuery. - """ - for col in self.record_schemas[self.destination_id]: - - # Handle dicts as strings - if col.type in ["STRING", "JSON"]: - if isinstance(row[col.name], dict) or isinstance(row[col.name], list): - row[col.name] = orjson.dumps(row[col.name]).decode("utf-8") - - # Handle timestamps - if col.type in ["TIMESTAMP", "DATETIME"] and col.default_value_expression is None: - if isinstance(row[col.name], int): - if row[col.name] > datetime(9999, 12, 31).timestamp(): - row[col.name] = datetime.fromtimestamp(row[col.name] / 1_000_000).strftime( - "%Y-%m-%d %H:%M:%S.%f" - ) - else: - try: - row[col.name] = datetime.fromtimestamp(row[col.name]).strftime("%Y-%m-%d %H:%M:%S.%f") - except ValueError: - error_message = ( - f"Error casting timestamp for destination '{self.destination_id}' column '{col.name}'. " - f"Invalid timestamp value: {row[col.name]} ({type(row[col.name])}). " - "Consider using a transformation." - ) - logger.error(error_message) - raise ValueError(error_message) - return row - - @staticmethod - def to_protobuf_serialization(TableRowClass: Type[Message], row: dict) -> bytes: - """Convert a row to a Protobuf serialization.""" - record = ParseDict(row, TableRowClass()) - return record.SerializeToString() - def load_to_bigquery_via_streaming(self, df_destination_records: pl.DataFrame) -> str: # Create table if it does not exist @@ -327,17 +352,20 @@ def load_to_bigquery_via_streaming(self, df_destination_records: pl.DataFrame) - with ThreadPoolExecutor(max_workers=max_workers) as executor: # Submit all batch processing tasks future_to_batch = { - executor.submit(self.process_streaming_batch, write_client, stream_name, proto_schema, batch): batch + executor.submit( + self.process_streaming_batch, write_client, stream_name, proto_schema, batch, TableRow + ): batch for batch in batches } # Collect results as they complete for future in as_completed(future_to_batch): - batch_type, result = future.result() - if batch_type == "streaming": - streaming_results.append(result) - if batch_type == "large_rows": - large_rows_results.append(result) + batch_results = future.result() + for batch_type, result in batch_results: + if batch_type == "streaming": + streaming_results.append(result) + if batch_type == "large_rows": + large_rows_results.append(result) except Exception as e: logger.error(f"Error in multithreaded batch processing: {str(e)}, type: {type(e)}") @@ -379,15 +407,16 @@ def batch(self, iterable): if item_size > self.MAX_ROW_SIZE_BYTES: large_rows.append(item) - logger.debug(f"Large row detected: {item_size} bytes") + logger.warning(f"Large row detected: {item_size} bytes") else: current_batch.append(item) current_batch_size += item_size # Yield the last batch if current_batch: - logger.debug( + logger.info( f"Yielding streaming batch of {len(current_batch)} rows, size: {current_batch_size/1024/1024:.2f}MB" ) - logger.debug(f"Yielding large rows batch of {len(large_rows)} rows") + if large_rows: + logger.warning(f"Yielding large rows batch of {len(large_rows)} rows") yield {"stream_batch": current_batch, "json_batch": large_rows} From 18f1734d7082d34697e961e298ced0ae9b5d72d5 Mon Sep 17 00:00:00 2001 From: Antoine Balliet Date: Wed, 9 Jul 2025 14:26:07 +0200 Subject: [PATCH 10/22] feat: add monitoring for large records synced in destination (#72) * feat: add monitoring for large records synced in destination * fix import * fix * fix monitor in tests * fix test * fix * fix * fix * fix * add test for monitors * fix * fix --- .../destinations/bigquery/src/destination.py | 4 ++- .../bigquery_streaming/src/destination.py | 8 ++++- .../bigquery_streaming_v2/src/destination.py | 10 +++++- .../destinations/file/src/destination.py | 4 ++- .../destinations/logger/src/destination.py | 3 ++ bizon/destination/destination.py | 33 +++++++++++++++--- bizon/engine/runner/adapters/streaming.py | 11 ++++-- bizon/engine/runner/runner.py | 21 ++++++++---- bizon/monitoring/datadog/monitor.py | 33 +++++++++++------- bizon/monitoring/monitor.py | 28 +++++++++------ bizon/monitoring/noop/monitor.py | 7 ++-- .../bigquery/test_bigquery_client.py | 6 +++- .../bigquery/test_destination_factory.py | 16 +++++++-- .../test_bigquery_streaming_client.py | 23 +++++++++++-- tests/destination/test_destination_logic.py | 20 ++++++----- tests/engine/test_parse_yaml.py | 34 +++++++++++++++++-- tests/engine/test_producer_recovery.py | 20 ++++++----- tests/monitoring/test_monitoring_datadog.py | 33 ++++++++++++++++++ 18 files changed, 245 insertions(+), 69 deletions(-) create mode 100644 tests/monitoring/test_monitoring_datadog.py diff --git a/bizon/connectors/destinations/bigquery/src/destination.py b/bizon/connectors/destinations/bigquery/src/destination.py index 3202f15..de411f5 100644 --- a/bizon/connectors/destinations/bigquery/src/destination.py +++ b/bizon/connectors/destinations/bigquery/src/destination.py @@ -14,6 +14,7 @@ from bizon.common.models import SyncMetadata from bizon.destination.destination import AbstractDestination from bizon.engine.backend.backend import AbstractBackend +from bizon.monitoring.monitor import AbstractMonitor from bizon.source.config import SourceSyncModes from bizon.source.source import AbstractSourceCallback @@ -28,8 +29,9 @@ def __init__( config: BigQueryConfigDetails, backend: AbstractBackend, source_callback: AbstractSourceCallback, + monitor: AbstractMonitor, ): - super().__init__(sync_metadata, config, backend, source_callback) + super().__init__(sync_metadata, config, backend, source_callback, monitor) self.config: BigQueryConfigDetails = config if config.authentication and config.authentication.service_account_key: diff --git a/bizon/connectors/destinations/bigquery_streaming/src/destination.py b/bizon/connectors/destinations/bigquery_streaming/src/destination.py index 62cba19..1949944 100644 --- a/bizon/connectors/destinations/bigquery_streaming/src/destination.py +++ b/bizon/connectors/destinations/bigquery_streaming/src/destination.py @@ -36,6 +36,7 @@ ) from bizon.destination.destination import AbstractDestination from bizon.engine.backend.backend import AbstractBackend +from bizon.monitoring.monitor import AbstractMonitor from bizon.source.callback import AbstractSourceCallback from .config import BigQueryStreamingConfigDetails @@ -54,8 +55,9 @@ def __init__( config: BigQueryStreamingConfigDetails, backend: AbstractBackend, source_callback: AbstractSourceCallback, + monitor: AbstractMonitor, ): # type: ignore - super().__init__(sync_metadata, config, backend, source_callback) + super().__init__(sync_metadata, config, backend, source_callback, monitor) self.config: BigQueryStreamingConfigDetails = config if config.authentication and config.authentication.service_account_key: @@ -245,6 +247,10 @@ def _insert_batch(self, table, batch): if load_job.state != "DONE": raise Exception(f"Failed to load rows to BigQuery: {load_job.errors}") + self.monitor.track_large_records_synced( + num_records=len(batch["json_batch"]), extra_tags={"destination_id": self.destination_id} + ) + except Exception as e: logger.error(f"Error inserting batch: {str(e)}, type: {type(e)}") raise diff --git a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py index edcff00..c7b11f1 100644 --- a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +++ b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py @@ -35,6 +35,7 @@ from bizon.common.models import SyncMetadata from bizon.destination.destination import AbstractDestination from bizon.engine.backend.backend import AbstractBackend +from bizon.monitoring.monitor import AbstractMonitor from bizon.source.callback import AbstractSourceCallback from .config import BigQueryStreamingV2ConfigDetails @@ -54,8 +55,9 @@ def __init__( config: BigQueryStreamingV2ConfigDetails, backend: AbstractBackend, source_callback: AbstractSourceCallback, + monitor: AbstractMonitor, ): # type: ignore - super().__init__(sync_metadata, config, backend, source_callback) + super().__init__(sync_metadata, config, backend, source_callback, monitor) self.config: BigQueryStreamingV2ConfigDetails = config if config.authentication and config.authentication.service_account_key: @@ -256,6 +258,12 @@ def process_streaming_batch( result = load_job.result() if load_job.state != "DONE": raise Exception(f"Failed to load rows to BigQuery: {load_job.errors}") + + # Track large rows + self.monitor.track_large_records_synced( + num_records=len(batch["json_batch"]), extra_tags={"destination_id": self.destination_id} + ) + results.append(("large_rows", "DONE")) if not results: diff --git a/bizon/connectors/destinations/file/src/destination.py b/bizon/connectors/destinations/file/src/destination.py index 585a591..9eacc88 100644 --- a/bizon/connectors/destinations/file/src/destination.py +++ b/bizon/connectors/destinations/file/src/destination.py @@ -6,6 +6,7 @@ from bizon.common.models import SyncMetadata from bizon.destination.destination import AbstractDestination from bizon.engine.backend.backend import AbstractBackend +from bizon.monitoring.monitor import AbstractMonitor from bizon.source.callback import AbstractSourceCallback from .config import FileDestinationDetailsConfig @@ -19,8 +20,9 @@ def __init__( config: FileDestinationDetailsConfig, backend: AbstractBackend, source_callback: AbstractSourceCallback, + monitor: AbstractMonitor, ): - super().__init__(sync_metadata, config, backend, source_callback) + super().__init__(sync_metadata, config, backend, source_callback, monitor) self.config: FileDestinationDetailsConfig = config def check_connection(self) -> bool: diff --git a/bizon/connectors/destinations/logger/src/destination.py b/bizon/connectors/destinations/logger/src/destination.py index e7f36a2..039a5e7 100644 --- a/bizon/connectors/destinations/logger/src/destination.py +++ b/bizon/connectors/destinations/logger/src/destination.py @@ -6,6 +6,7 @@ from bizon.common.models import SyncMetadata from bizon.destination.destination import AbstractDestination from bizon.engine.backend.backend import AbstractBackend +from bizon.monitoring.monitor import AbstractMonitor from bizon.source.callback import AbstractSourceCallback from .config import LoggerDestinationConfig @@ -19,12 +20,14 @@ def __init__( config: LoggerDestinationConfig, backend: AbstractBackend, source_callback: AbstractSourceCallback, + monitor: AbstractMonitor, ): super().__init__( sync_metadata=sync_metadata, config=config, backend=backend, source_callback=source_callback, + monitor=monitor, ) def check_connection(self) -> bool: diff --git a/bizon/destination/destination.py b/bizon/destination/destination.py index 380dcd0..a9a9c5a 100644 --- a/bizon/destination/destination.py +++ b/bizon/destination/destination.py @@ -10,6 +10,7 @@ from bizon.common.models import SyncMetadata from bizon.engine.backend.backend import AbstractBackend from bizon.engine.backend.models import JobStatus +from bizon.monitoring.monitor import AbstractMonitor from bizon.source.callback import AbstractSourceCallback from bizon.source.config import SourceSyncModes @@ -50,6 +51,7 @@ def __init__( config: AbstractDestinationDetailsConfig, backend: AbstractBackend, source_callback: AbstractSourceCallback, + monitor: AbstractMonitor, ): self.sync_metadata = sync_metadata self.config = config @@ -284,6 +286,7 @@ def get_destination( config: AbstractDestinationConfig, backend: AbstractBackend, source_callback: AbstractSourceCallback, + monitor: AbstractMonitor, ) -> AbstractDestination: if config.name == DestinationTypes.LOGGER: @@ -292,7 +295,11 @@ def get_destination( ) return LoggerDestination( - sync_metadata=sync_metadata, config=config.config, backend=backend, source_callback=source_callback + sync_metadata=sync_metadata, + config=config.config, + backend=backend, + source_callback=source_callback, + monitor=monitor, ) elif config.name == DestinationTypes.BIGQUERY: @@ -301,7 +308,11 @@ def get_destination( ) return BigQueryDestination( - sync_metadata=sync_metadata, config=config.config, backend=backend, source_callback=source_callback + sync_metadata=sync_metadata, + config=config.config, + backend=backend, + source_callback=source_callback, + monitor=monitor, ) elif config.name == DestinationTypes.BIGQUERY_STREAMING: @@ -310,7 +321,11 @@ def get_destination( ) return BigQueryStreamingDestination( - sync_metadata=sync_metadata, config=config.config, backend=backend, source_callback=source_callback + sync_metadata=sync_metadata, + config=config.config, + backend=backend, + source_callback=source_callback, + monitor=monitor, ) elif config.name == DestinationTypes.BIGQUERY_STREAMING_V2: @@ -319,7 +334,11 @@ def get_destination( ) return BigQueryStreamingV2Destination( - sync_metadata=sync_metadata, config=config.config, backend=backend, source_callback=source_callback + sync_metadata=sync_metadata, + config=config.config, + backend=backend, + source_callback=source_callback, + monitor=monitor, ) elif config.name == DestinationTypes.FILE: @@ -328,7 +347,11 @@ def get_destination( ) return FileDestination( - sync_metadata=sync_metadata, config=config.config, backend=backend, source_callback=source_callback + sync_metadata=sync_metadata, + config=config.config, + backend=backend, + source_callback=source_callback, + monitor=monitor, ) raise ValueError(f"Destination {config.name}" f"with params {config} not found") diff --git a/bizon/engine/runner/adapters/streaming.py b/bizon/engine/runner/adapters/streaming.py index d271504..f154480 100644 --- a/bizon/engine/runner/adapters/streaming.py +++ b/bizon/engine/runner/adapters/streaming.py @@ -8,7 +8,7 @@ from loguru import logger from pytz import UTC -from bizon.common.models import BizonConfig +from bizon.common.models import BizonConfig, SyncMetadata from bizon.destination.models import transform_to_df_destination_records from bizon.engine.pipeline.models import PipelineReturnStatus from bizon.engine.runner.config import RunnerStatus @@ -40,14 +40,20 @@ def run(self) -> RunnerStatus: job = self.init_job(bizon_config=self.bizon_config, config=self.config) backend = self.get_backend(bizon_config=self.bizon_config) source = self.get_source(bizon_config=self.bizon_config, config=self.config) + + sync_metadata = SyncMetadata.from_bizon_config(job_id=job.id, config=self.bizon_config) + monitor = self.get_monitoring_client(sync_metadata=sync_metadata, bizon_config=self.bizon_config) + destination = self.get_destination( bizon_config=self.bizon_config, backend=backend, job_id=job.id, source_callback=None, + monitor=monitor, ) + transform = self.get_transform(bizon_config=self.bizon_config) - monitor = self.get_monitoring_client(bizon_config=self.bizon_config) + destination.buffer.buffer_size = 0 # force buffer to be flushed immediately iteration = 0 @@ -107,4 +113,5 @@ def run(self) -> RunnerStatus: iteration += 1 monitor.track_pipeline_status(PipelineReturnStatus.SUCCESS) + return RunnerStatus(stream=PipelineReturnStatus.SUCCESS) # return when max iterations is reached diff --git a/bizon/engine/runner/runner.py b/bizon/engine/runner/runner.py index 3c09f41..2e8a81c 100644 --- a/bizon/engine/runner/runner.py +++ b/bizon/engine/runner/runner.py @@ -82,7 +82,11 @@ def get_source(bizon_config: BizonConfig, config: dict) -> AbstractSource: @staticmethod def get_destination( - bizon_config: BizonConfig, backend: AbstractBackend, job_id: str, source_callback: AbstractSourceCallback + bizon_config: BizonConfig, + backend: AbstractBackend, + job_id: str, + source_callback: AbstractSourceCallback, + monitor: AbstractMonitor, ) -> AbstractDestination: """Get an instance of the destination based on the destination config dict""" @@ -93,6 +97,7 @@ def get_destination( config=bizon_config.destination, backend=backend, source_callback=source_callback, + monitor=monitor, ) @staticmethod @@ -124,9 +129,9 @@ def get_transform(bizon_config: BizonConfig) -> Transform: return Transform(transforms=bizon_config.transforms) @staticmethod - def get_monitoring_client(bizon_config: BizonConfig) -> AbstractMonitor: + def get_monitoring_client(sync_metadata: SyncMetadata, bizon_config: BizonConfig) -> AbstractMonitor: """Return the monitoring client instance""" - return MonitorFactory.get_monitor(bizon_config) + return MonitorFactory.get_monitor(sync_metadata, bizon_config.monitoring) @staticmethod def get_or_create_job( @@ -252,23 +257,25 @@ def instanciate_and_run_consumer( bizon_config=bizon_config, config=config ).get_source_callback_instance() + sync_metadata = SyncMetadata.from_bizon_config(job_id=job_id, config=bizon_config) + # Get the queue instance queue = AbstractRunner.get_queue(bizon_config=bizon_config, **kwargs) # Get the backend instance backend = AbstractRunner.get_backend(bizon_config=bizon_config, **kwargs) + # Get the monitor instance + monitor = AbstractRunner.get_monitoring_client(sync_metadata=sync_metadata, bizon_config=bizon_config) + # Get the destination instance destination = AbstractRunner.get_destination( - bizon_config=bizon_config, backend=backend, job_id=job_id, source_callback=source_callback + bizon_config=bizon_config, backend=backend, job_id=job_id, source_callback=source_callback, monitor=monitor ) # Get the transform instance transform = AbstractRunner.get_transform(bizon_config=bizon_config) - # Get the monitor instance - monitor = AbstractRunner.get_monitoring_client(bizon_config=bizon_config) - # Create the consumer instance consumer = queue.get_consumer( destination=destination, diff --git a/bizon/monitoring/datadog/monitor.py b/bizon/monitoring/datadog/monitor.py index d480328..7ea2262 100644 --- a/bizon/monitoring/datadog/monitor.py +++ b/bizon/monitoring/datadog/monitor.py @@ -4,42 +4,44 @@ from datadog import initialize, statsd from loguru import logger -from bizon.common.models import BizonConfig +from bizon.common.models import SyncMetadata from bizon.engine.pipeline.models import PipelineReturnStatus +from bizon.monitoring.config import MonitoringConfig from bizon.monitoring.monitor import AbstractMonitor from bizon.source.models import SourceRecord class DatadogMonitor(AbstractMonitor): - def __init__(self, pipeline_config: BizonConfig): - super().__init__(pipeline_config) + def __init__(self, sync_metadata: SyncMetadata, monitoring_config: MonitoringConfig): + super().__init__(sync_metadata, monitoring_config) # In Kubernetes, set the host dynamically try: - datadog_host_from_env_var = os.getenv(pipeline_config.monitoring.config.datadog_host_env_var) + datadog_host_from_env_var = os.getenv(monitoring_config.config.datadog_host_env_var) if datadog_host_from_env_var: initialize( statsd_host=datadog_host_from_env_var, - statsd_port=pipeline_config.monitoring.config.datadog_agent_port, + statsd_port=monitoring_config.config.datadog_agent_port, ) else: initialize( - statsd_host=pipeline_config.monitoring.config.datadog_agent_host, - statsd_port=pipeline_config.monitoring.config.datadog_agent_port, + statsd_host=monitoring_config.config.datadog_agent_host, + statsd_port=monitoring_config.config.datadog_agent_port, ) except Exception as e: logger.info(f"Failed to initialize Datadog agent: {e}") self.pipeline_monitor_status = "bizon_pipeline.status" self.tags = [ - f"pipeline_name:{self.pipeline_config.name}", - f"pipeline_stream:{self.pipeline_config.source.stream}", - f"pipeline_source:{self.pipeline_config.source.name}", - f"pipeline_destination:{self.pipeline_config.destination.name}", - ] + [f"{key}:{value}" for key, value in self.pipeline_config.monitoring.config.tags.items()] + f"pipeline_name:{self.sync_metadata.name}", + f"pipeline_stream:{self.sync_metadata.stream_name}", + f"pipeline_source:{self.sync_metadata.source_name}", + f"pipeline_destination:{self.sync_metadata.destination_name}", + ] + [f"{key}:{value}" for key, value in self.monitoring_config.config.tags.items()] self.pipeline_active_pipelines = "bizon_pipeline.active_pipelines" self.pipeline_records_synced = "bizon_pipeline.records_synced" + self.pipeline_large_records = "bizon_pipeline.large_records" def track_pipeline_status(self, pipeline_status: PipelineReturnStatus, extra_tags: Dict[str, str] = {}) -> None: """ @@ -79,6 +81,13 @@ def track_records_synced( set_produce_checkpoint(destination_type, destination_id, header.setdefault) return headers + def track_large_records_synced(self, num_records: int, extra_tags: Dict[str, str] = {}) -> None: + statsd.increment( + self.pipeline_large_records, + value=num_records, + tags=self.tags + [f"{key}:{value}" for key, value in extra_tags.items()], + ) + def track_source_iteration(self, records: List[SourceRecord]) -> Union[List[Dict[str, str]], None]: """ Track the number of records consumed from a Kafka topic. diff --git a/bizon/monitoring/monitor.py b/bizon/monitoring/monitor.py index b89076f..93b9973 100644 --- a/bizon/monitoring/monitor.py +++ b/bizon/monitoring/monitor.py @@ -1,16 +1,16 @@ from abc import ABC, abstractmethod -from typing import Dict, List +from typing import Dict, List, Union -from bizon.common.models import BizonConfig +from bizon.common.models import SyncMetadata from bizon.engine.pipeline.models import PipelineReturnStatus -from bizon.monitoring.config import MonitorType +from bizon.monitoring.config import MonitoringConfig, MonitorType from bizon.source.models import SourceRecord class AbstractMonitor(ABC): - def __init__(self, pipeline_config: BizonConfig): - self.pipeline_config = pipeline_config - # Initialize the monitor + def __init__(self, sync_metadata: SyncMetadata, monitoring_config: MonitoringConfig): + self.sync_metadata = sync_metadata + self.monitoring_config = monitoring_config @abstractmethod def track_pipeline_status(self, pipeline_status: PipelineReturnStatus, extra_tags: Dict[str, str] = {}) -> None: @@ -36,16 +36,22 @@ def track_records_synced( """ pass + def track_large_records_synced(self, num_records: int, extra_tags: Dict[str, str] = {}) -> None: + """ + Track the number of large records synced in the destination system. This aims at helping to identify the source of the large records. + """ + pass + class MonitorFactory: @staticmethod - def get_monitor(pipeline_config: BizonConfig) -> AbstractMonitor: - if pipeline_config.monitoring is None: + def get_monitor(sync_metadata: SyncMetadata, monitoring_config: Union[MonitoringConfig, None]) -> AbstractMonitor: + if monitoring_config is None: from bizon.monitoring.noop.monitor import NoOpMonitor - return NoOpMonitor(pipeline_config) + return NoOpMonitor(sync_metadata, monitoring_config) - if pipeline_config.monitoring.type == MonitorType.DATADOG: + if monitoring_config.type == MonitorType.DATADOG: from bizon.monitoring.datadog.monitor import DatadogMonitor - return DatadogMonitor(pipeline_config) + return DatadogMonitor(sync_metadata, monitoring_config) diff --git a/bizon/monitoring/noop/monitor.py b/bizon/monitoring/noop/monitor.py index cf77451..4958b56 100644 --- a/bizon/monitoring/noop/monitor.py +++ b/bizon/monitoring/noop/monitor.py @@ -1,11 +1,12 @@ -from bizon.common.models import BizonConfig +from bizon.common.models import SyncMetadata from bizon.engine.pipeline.models import PipelineReturnStatus +from bizon.monitoring.config import MonitoringConfig from bizon.monitoring.monitor import AbstractMonitor class NoOpMonitor(AbstractMonitor): - def __init__(self, pipeline_config: BizonConfig): - super().__init__(pipeline_config) + def __init__(self, sync_metadata: SyncMetadata, monitoring_config: MonitoringConfig): + super().__init__(sync_metadata, monitoring_config) def track_pipeline_status(self, pipeline_status: PipelineReturnStatus) -> None: pass diff --git a/tests/connectors/destinations/bigquery/test_bigquery_client.py b/tests/connectors/destinations/bigquery/test_bigquery_client.py index 9a4abb8..3faf952 100644 --- a/tests/connectors/destinations/bigquery/test_bigquery_client.py +++ b/tests/connectors/destinations/bigquery/test_bigquery_client.py @@ -19,6 +19,7 @@ from bizon.destination.config import DestinationTypes from bizon.destination.destination import DestinationFactory from bizon.destination.models import destination_record_schema +from bizon.monitoring.noop.monitor import NoOpMonitor logger = logging.getLogger(__name__) @@ -87,7 +88,10 @@ def test_load_records_to_bigquery(my_backend_config, test_table, sync_metadata): ] bq_destination = DestinationFactory().get_destination( - sync_metadata=sync_metadata, config=bigquery_config, backend=my_backend_config + sync_metadata=sync_metadata, + config=bigquery_config, + backend=my_backend_config, + monitor=NoOpMonitor(sync_metadata=sync_metadata, monitoring_config=None), ) assert isinstance(bq_destination, BigQueryDestination) diff --git a/tests/connectors/destinations/bigquery/test_destination_factory.py b/tests/connectors/destinations/bigquery/test_destination_factory.py index 45ff453..2986c37 100644 --- a/tests/connectors/destinations/bigquery/test_destination_factory.py +++ b/tests/connectors/destinations/bigquery/test_destination_factory.py @@ -11,6 +11,8 @@ from bizon.connectors.destinations.bigquery.src.destination import BigQueryDestination from bizon.destination.config import DestinationTypes from bizon.destination.destination import DestinationFactory +from bizon.monitoring.monitor import MonitorFactory +from bizon.monitoring.noop.monitor import NoOpMonitor @pytest.fixture(scope="function") @@ -42,7 +44,12 @@ def test_bigquery_factory(sync_metadata, my_backend): ), ) - destination = DestinationFactory().get_destination(sync_metadata=sync_metadata, config=config, backend=my_backend) + destination = DestinationFactory().get_destination( + sync_metadata=sync_metadata, + config=config, + backend=my_backend, + monitor=NoOpMonitor(sync_metadata=sync_metadata, monitoring_config=None), + ) assert isinstance(destination, BigQueryDestination) assert destination.config.authentication.service_account_key == "" assert destination.config.project_id == "project_id" @@ -66,6 +73,11 @@ def test_bigquery_factory_empty_service_account(sync_metadata, my_backend): ), ) - destination = DestinationFactory().get_destination(sync_metadata=sync_metadata, config=config, backend=my_backend) + destination = DestinationFactory().get_destination( + sync_metadata=sync_metadata, + config=config, + backend=my_backend, + monitor=NoOpMonitor(sync_metadata=sync_metadata, monitoring_config=None), + ) assert isinstance(destination, BigQueryDestination) assert destination.config.authentication is None diff --git a/tests/connectors/destinations/bigquery_streaming/test_bigquery_streaming_client.py b/tests/connectors/destinations/bigquery_streaming/test_bigquery_streaming_client.py index a210447..4161270 100644 --- a/tests/connectors/destinations/bigquery_streaming/test_bigquery_streaming_client.py +++ b/tests/connectors/destinations/bigquery_streaming/test_bigquery_streaming_client.py @@ -17,6 +17,7 @@ from bizon.destination.config import DestinationTypes from bizon.destination.destination import DestinationFactory from bizon.destination.models import destination_record_schema +from bizon.monitoring.noop.monitor import NoOpMonitor load_dotenv() @@ -74,6 +75,7 @@ def test_streaming_records_to_bigquery(my_backend_config, sync_metadata_stream): config=bigquery_config, backend=my_backend_config, source_callback=None, + monitor=NoOpMonitor(sync_metadata=sync_metadata_stream, monitoring_config=None), ) # Import here to not throw auth errors when running tests @@ -109,6 +111,7 @@ def test_override_destination_id_streaming_records_to_bigquery(my_backend_config config=bigquery_config, backend=my_backend_config, source_callback=None, + monitor=NoOpMonitor(sync_metadata=sync_metadata_stream, monitoring_config=None), ) # Import here to not throw auth errors when running tests @@ -143,6 +146,7 @@ def test_streaming_large_records_to_bigquery(my_backend_config, sync_metadata_st config=bigquery_config, backend=my_backend_config, source_callback=None, + monitor=NoOpMonitor(sync_metadata=sync_metadata_stream, monitoring_config=None), ) # Import here to not throw auth errors when running tests @@ -204,7 +208,10 @@ def test_streaming_unnested_records(my_backend_config, sync_metadata_stream): ) bq_destination = DestinationFactory().get_destination( - sync_metadata=sync_metadata_stream, config=bigquery_config, backend=my_backend_config + sync_metadata=sync_metadata_stream, + config=bigquery_config, + backend=my_backend_config, + monitor=NoOpMonitor(sync_metadata=sync_metadata_stream, monitoring_config=None), ) # Import here to not throw auth errors when running tests @@ -271,7 +278,10 @@ def test_error_on_added_column(my_backend_config, sync_metadata_stream): ) bq_destination = DestinationFactory().get_destination( - sync_metadata=sync_metadata_stream, config=bigquery_config, backend=my_backend_config + sync_metadata=sync_metadata_stream, + config=bigquery_config, + backend=my_backend_config, + monitor=NoOpMonitor(sync_metadata=sync_metadata_stream, monitoring_config=None), ) # Insert proper records @@ -362,6 +372,7 @@ def test_enforce_record_schema_columns(my_backend_config, sync_metadata_stream): config=bigquery_config, backend=my_backend_config, source_callback=None, + monitor=NoOpMonitor(sync_metadata=sync_metadata_stream, monitoring_config=None), ) # Insert proper records @@ -428,6 +439,7 @@ def test_enforce_record_schema_columns(my_backend_config, sync_metadata_stream): config=bigquery_config, backend=my_backend_config, source_callback=None, + monitor=NoOpMonitor(sync_metadata=sync_metadata_stream, monitoring_config=None), ) new_column_in_record = {"id": 3, "name": "Charlie", "last_name": "Chaplin", "created_at": "2021-01-01 00:00:00"} @@ -483,7 +495,10 @@ def test_error_on_deleted_column(my_backend_config, sync_metadata_stream): ) bq_destination = DestinationFactory().get_destination( - sync_metadata=sync_metadata_stream, config=bigquery_config, backend=my_backend_config + sync_metadata=sync_metadata_stream, + config=bigquery_config, + backend=my_backend_config, + monitor=NoOpMonitor(sync_metadata=sync_metadata_stream, monitoring_config=None), ) # Insert proper records @@ -564,6 +579,7 @@ def test_streaming_unnested_records_legacy(my_backend_config, sync_metadata_stre config=bigquery_config, backend=my_backend_config, source_callback=None, + monitor=NoOpMonitor(sync_metadata=sync_metadata_stream, monitoring_config=None), ) # Import here to not throw auth errors when running tests @@ -639,6 +655,7 @@ def test_streaming_unnested_records_legacy_clustering_keys(my_backend_config, sy config=bigquery_config, backend=my_backend_config, source_callback=None, + monitor=NoOpMonitor(sync_metadata=sync_metadata_stream, monitoring_config=None), ) # Import here to not throw auth errors when running tests diff --git a/tests/destination/test_destination_logic.py b/tests/destination/test_destination_logic.py index d3f8d26..efcf9f1 100644 --- a/tests/destination/test_destination_logic.py +++ b/tests/destination/test_destination_logic.py @@ -10,6 +10,7 @@ from bizon.destination.models import destination_record_schema from bizon.engine.backend.adapters.sqlalchemy.backend import SQLAlchemyBackend from bizon.engine.backend.models import JobStatus, StreamJob +from bizon.monitoring.noop.monitor import NoOpMonitor from bizon.source.callback import NoOpSourceCallback @@ -27,18 +28,21 @@ def logger_destination(my_sqlite_backend: SQLAlchemyBackend, sqlite_db_session): session=sqlite_db_session, ) + sync_metadata = SyncMetadata( + job_id=job.id, + name="job_test", + source_name="dummy", + stream_name="test", + destination_name="logger", + sync_mode="full_refresh", + ) + return LoggerDestination( - sync_metadata=SyncMetadata( - job_id=job.id, - name="job_test", - source_name="dummy", - stream_name="test", - destination_name="logger", - sync_mode="full_refresh", - ), + sync_metadata=sync_metadata, config=LoggerDestinationConfig(dummy="bizon"), backend=my_sqlite_backend, source_callback=NoOpSourceCallback(config={}), + monitor=NoOpMonitor(sync_metadata=sync_metadata, monitoring_config=None), ) diff --git a/tests/engine/test_parse_yaml.py b/tests/engine/test_parse_yaml.py index 665d2cd..571a69e 100644 --- a/tests/engine/test_parse_yaml.py +++ b/tests/engine/test_parse_yaml.py @@ -3,11 +3,13 @@ from yaml import safe_load from bizon.cli.utils import parse_from_yaml +from bizon.common.models import SyncMetadata from bizon.connectors.destinations.logger.src.destination import LoggerDestination from bizon.engine.backend.config import BackendTypes from bizon.engine.engine import RunnerFactory from bizon.engine.queue.adapters.kafka.queue import KafkaQueue from bizon.engine.queue.adapters.rabbitmq.queue import RabbitMQ +from bizon.monitoring.noop.monitor import NoOpMonitor from bizon.source.callback import NoOpSourceCallback @@ -49,8 +51,16 @@ def test_parse_task_runner_python_queue(): runner = RunnerFactory.create_from_config_dict(config=config) backend = runner.get_backend(bizon_config=runner.bizon_config) + destination = runner.get_destination( - bizon_config=runner.bizon_config, backend=backend, job_id="123", source_callback=NoOpSourceCallback(config={}) + bizon_config=runner.bizon_config, + backend=backend, + job_id="123", + source_callback=NoOpSourceCallback(config={}), + monitor=NoOpMonitor( + sync_metadata=SyncMetadata.from_bizon_config(job_id="123", config=runner.bizon_config), + monitoring_config=None, + ), ) assert isinstance(destination, LoggerDestination) @@ -101,9 +111,18 @@ def test_parse_task_runner_kafka_queue(): runner = RunnerFactory.create_from_config_dict(config=config) backend = runner.get_backend(bizon_config=runner.bizon_config) + destination = runner.get_destination( - bizon_config=runner.bizon_config, backend=backend, job_id="123", source_callback=NoOpSourceCallback(config={}) + bizon_config=runner.bizon_config, + backend=backend, + job_id="123", + source_callback=NoOpSourceCallback(config={}), + monitor=NoOpMonitor( + sync_metadata=SyncMetadata.from_bizon_config(job_id="123", config=runner.bizon_config), + monitoring_config=None, + ), ) + queue = runner.get_queue(bizon_config=runner.bizon_config) assert isinstance(destination, LoggerDestination) @@ -158,9 +177,18 @@ def test_parse_task_runner_rabbitmq_queue(): assert runner.bizon_config.name == "test_job" backend = runner.get_backend(bizon_config=runner.bizon_config) + destination = runner.get_destination( - bizon_config=runner.bizon_config, backend=backend, job_id="123", source_callback=NoOpSourceCallback(config={}) + bizon_config=runner.bizon_config, + backend=backend, + job_id="123", + source_callback=NoOpSourceCallback(config={}), + monitor=NoOpMonitor( + sync_metadata=SyncMetadata.from_bizon_config(job_id="123", config=runner.bizon_config), + monitoring_config=None, + ), ) + queue = runner.get_queue(bizon_config=runner.bizon_config) assert isinstance(destination, LoggerDestination) diff --git a/tests/engine/test_producer_recovery.py b/tests/engine/test_producer_recovery.py index 18e4382..53cdd09 100644 --- a/tests/engine/test_producer_recovery.py +++ b/tests/engine/test_producer_recovery.py @@ -18,6 +18,7 @@ from bizon.engine.engine import RunnerFactory from bizon.engine.runner.adapters.thread import ThreadRunner from bizon.engine.runner.runner import AbstractRunner +from bizon.monitoring.noop.monitor import NoOpMonitor from bizon.source.callback import NoOpSourceCallback temporary_file = NamedTemporaryFile() @@ -66,15 +67,17 @@ def file_destination(my_sqlite_backend: SQLAlchemyBackend, sqlite_db_session): session=sqlite_db_session, ) + sync_metadata = SyncMetadata( + job_id=job.id, + name="job_test", + source_name="dummy", + stream_name="test", + destination_name="logger", + sync_mode="full_refresh", + ) + return FileDestination( - sync_metadata=SyncMetadata( - job_id=job.id, - name="job_test", - source_name="dummy", - stream_name="test", - destination_name="logger", - sync_mode="full_refresh", - ), + sync_metadata=sync_metadata, config=FileDestinationDetailsConfig( format=FileFormat.JSON, destination_id=temporary_file.name, @@ -83,6 +86,7 @@ def file_destination(my_sqlite_backend: SQLAlchemyBackend, sqlite_db_session): ), backend=my_sqlite_backend, source_callback=NoOpSourceCallback(config={}), + monitor=NoOpMonitor(sync_metadata=sync_metadata, monitoring_config=None), ) diff --git a/tests/monitoring/test_monitoring_datadog.py b/tests/monitoring/test_monitoring_datadog.py new file mode 100644 index 0000000..cf767c5 --- /dev/null +++ b/tests/monitoring/test_monitoring_datadog.py @@ -0,0 +1,33 @@ +from bizon.common.models import SyncMetadata +from bizon.monitoring.config import DatadogConfig, MonitoringConfig, MonitorType +from bizon.monitoring.datadog.monitor import DatadogMonitor +from bizon.monitoring.monitor import MonitorFactory +from bizon.monitoring.noop.monitor import NoOpMonitor + +sync_metadata = SyncMetadata( + job_id="123", + name="pipeline_test", + source_name="source_test", + stream_name="stream_test", + destination_name="destination_test", + sync_mode="full_refresh", +) + + +def test_datadog_monitor(): + dd_monitor = MonitorFactory.get_monitor( + sync_metadata=sync_metadata, + monitoring_config=MonitoringConfig( + type=MonitorType.DATADOG, config=DatadogConfig(datadog_agent_host="localhost", datadog_agent_port=8125) + ), + ) + + assert type(dd_monitor) == DatadogMonitor + assert dd_monitor.monitoring_config.type == MonitorType.DATADOG + + +def test_no_op_monitor(): + no_op_monitor = MonitorFactory.get_monitor(sync_metadata=sync_metadata, monitoring_config=None) + + assert type(no_op_monitor) == NoOpMonitor + assert no_op_monitor.monitoring_config is None From f5218ad79c82ed41be44364d064154aa963153f8 Mon Sep 17 00:00:00 2001 From: Anas El Mhamdi Date: Wed, 9 Jul 2025 14:43:23 +0200 Subject: [PATCH 11/22] chore: add better datadog span to trace stream iteration (#73) * chore: change values and handle large rows in streaming v2 * chore: change values and handle large rows in streaming v2 * chore: add monitoring trace * chore: remove service override * chore: add kafka headers * chore: add current span log * chore: wrap monitor to track iteration * chore: upgrade span priority * chore: remove priority * chore: set priority while removing kafka sampling prio * chore: auto keep sampling prio * chore: remove initial dd pathway * chore: add enable_tracing config --- bizon/engine/runner/adapters/streaming.py | 95 ++++++++++++----------- bizon/monitoring/config.py | 8 +- bizon/monitoring/datadog/monitor.py | 52 ++++++++++++- bizon/monitoring/monitor.py | 16 +++- bizon/monitoring/noop/monitor.py | 21 ++++- 5 files changed, 140 insertions(+), 52 deletions(-) diff --git a/bizon/engine/runner/adapters/streaming.py b/bizon/engine/runner/adapters/streaming.py index f154480..d4494f8 100644 --- a/bizon/engine/runner/adapters/streaming.py +++ b/bizon/engine/runner/adapters/streaming.py @@ -63,55 +63,56 @@ def run(self) -> RunnerStatus: logger.info(f"Max iterations {source.config.max_iterations} reached, terminating stream ...") break - source_iteration = source.get() + with monitor.trace(operation_name="bizon.stream.iteration"): + source_iteration = source.get() + + destination_id_indexed_records = {} + + if len(source_iteration.records) == 0: + logger.info("No new records found, stopping iteration") + time.sleep(2) + monitor.track_pipeline_status(PipelineReturnStatus.SUCCESS) + iteration += 1 + continue + + for record in source_iteration.records: + if destination_id_indexed_records.get(record.destination_id): + destination_id_indexed_records[record.destination_id].append(record) + else: + destination_id_indexed_records[record.destination_id] = [record] + + for destination_id, records in destination_id_indexed_records.items(): + df_source_records = StreamingRunner.convert_source_records(records) + + dsm_headers = monitor.track_source_iteration(records=records) + logger.info(f"DSM headers: {dsm_headers[0] if dsm_headers else None}") + + # Apply transformation + df_source_records = transform.apply_transforms(df_source_records=df_source_records) + + df_destination_records = StreamingRunner.convert_to_destination_records( + df_source_records, datetime.now(tz=UTC) + ) + # Override destination_id + destination.destination_id = destination_id + destination.write_or_buffer_records( + df_destination_records=df_destination_records, + iteration=iteration, + pagination=None, + ) + last_dsm_headers = monitor.track_records_synced( + num_records=len(df_destination_records), + destination_id=destination_id, + extra_tags={"destination_id": destination_id}, + headers=dsm_headers, + ) + logger.info(f"Last DSM headers: {last_dsm_headers[0] if last_dsm_headers else None}") + + if os.getenv("ENVIRONMENT") == "production": + source.commit() - destination_id_indexed_records = {} + iteration += 1 - if len(source_iteration.records) == 0: - logger.info("No new records found, stopping iteration") - time.sleep(2) monitor.track_pipeline_status(PipelineReturnStatus.SUCCESS) - iteration += 1 - continue - - for record in source_iteration.records: - if destination_id_indexed_records.get(record.destination_id): - destination_id_indexed_records[record.destination_id].append(record) - else: - destination_id_indexed_records[record.destination_id] = [record] - - for destination_id, records in destination_id_indexed_records.items(): - df_source_records = StreamingRunner.convert_source_records(records) - - dsm_headers = monitor.track_source_iteration(records=records) - logger.info(f"DSM headers: {dsm_headers[0] if dsm_headers else None}") - - # Apply transformation - df_source_records = transform.apply_transforms(df_source_records=df_source_records) - - df_destination_records = StreamingRunner.convert_to_destination_records( - df_source_records, datetime.now(tz=UTC) - ) - # Override destination_id - destination.destination_id = destination_id - destination.write_or_buffer_records( - df_destination_records=df_destination_records, - iteration=iteration, - pagination=None, - ) - last_dsm_headers = monitor.track_records_synced( - num_records=len(df_destination_records), - destination_id=destination_id, - extra_tags={"destination_id": destination_id}, - headers=dsm_headers, - ) - logger.info(f"Last DSM headers: {last_dsm_headers[0] if last_dsm_headers else None}") - - if os.getenv("ENVIRONMENT") == "production": - source.commit() - - iteration += 1 - - monitor.track_pipeline_status(PipelineReturnStatus.SUCCESS) return RunnerStatus(stream=PipelineReturnStatus.SUCCESS) # return when max iterations is reached diff --git a/bizon/monitoring/config.py b/bizon/monitoring/config.py index 7b5b594..d877f5c 100644 --- a/bizon/monitoring/config.py +++ b/bizon/monitoring/config.py @@ -8,7 +8,11 @@ class MonitorType(str, Enum): DATADOG = "datadog" -class DatadogConfig(BaseModel): +class BaseMonitoringConfig(BaseModel): + enable_tracing: bool = Field(default=False, description="Enable tracing for the monitor") + + +class DatadogConfig(BaseMonitoringConfig): datadog_agent_host: Optional[str] = None datadog_host_env_var: Optional[str] = None datadog_agent_port: int = 8125 @@ -24,6 +28,6 @@ def __init__(self, **data): raise ValueError("Either datadog_agent_host or datadog_host_env_var must be specified") -class MonitoringConfig(BaseModel): +class MonitoringConfig(BaseMonitoringConfig): type: MonitorType config: Optional[DatadogConfig] = None diff --git a/bizon/monitoring/datadog/monitor.py b/bizon/monitoring/datadog/monitor.py index 7ea2262..94a9ec9 100644 --- a/bizon/monitoring/datadog/monitor.py +++ b/bizon/monitoring/datadog/monitor.py @@ -1,4 +1,5 @@ import os +from contextlib import contextmanager from typing import Dict, List, Union from datadog import initialize, statsd @@ -73,11 +74,18 @@ def track_records_synced( tags=self.tags + [f"{key}:{value}" for key, value in extra_tags.items()], ) if os.getenv("DD_DATA_STREAMS_ENABLED") == "true": + from ddtrace import tracer from ddtrace.data_streams import set_produce_checkpoint + logger.info(f"Current span in track_consume: {tracer.current_span()}") + destination_type = self.pipeline_config.destination.alias for header in headers: + if "x-datadog-sampling-priority" in header: + del header["x-datadog-sampling-priority"] + if "dd-pathway-ctx-base64" in header: + del header["dd-pathway-ctx-base64"] set_produce_checkpoint(destination_type, destination_id, header.setdefault) return headers @@ -97,11 +105,53 @@ def track_source_iteration(self, records: List[SourceRecord]) -> Union[List[Dict """ if os.getenv("DD_DATA_STREAMS_ENABLED") == "true": + from ddtrace import tracer from ddtrace.data_streams import set_consume_checkpoint + logger.info(f"Current span in track_consume: {tracer.current_span()}") headers_list = [] for record in records: - headers = {} + headers = record.data.get("headers", {}) set_consume_checkpoint("kafka", record.data["topic"], headers.get) headers_list.append(headers) return headers_list + + @contextmanager + def trace(self, operation_name: str, resource: str = None, extra_tags: Dict[str, str] = None): + """ + Create a trace span for monitoring using Datadog APM. + + Args: + operation_name (str): The name of the operation being traced + resource (str): The resource being operated on (e.g., topic name, table name) + extra_tags (Dict[str, str]): Additional tags for the trace + + Yields: + A span object that can be used to add additional metadata + """ + if not self.monitoring_config.config.enable_tracing: + yield None + + try: + from ddtrace import tracer + + # Combine tags + all_tags = self.tags.copy() + if extra_tags: + all_tags.extend([f"{key}:{value}" for key, value in extra_tags.items()]) + + # Create the span + with tracer.trace(operation_name, resource=resource) as span: + # Add tags to the span + for tag in all_tags: + if ":" in tag: + key, value = tag.split(":", 1) + span.set_tag(key, value) + span.set_tag("_sampling_priority_v1", 1) + yield span + except ImportError: + logger.warning("ddtrace not available, skipping tracing") + yield None + except Exception as e: + logger.warning(f"Failed to create trace: {e}") + yield None diff --git a/bizon/monitoring/monitor.py b/bizon/monitoring/monitor.py index 93b9973..e4f03ba 100644 --- a/bizon/monitoring/monitor.py +++ b/bizon/monitoring/monitor.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Dict, List, Union +from typing import Callable, Dict, List, Union from bizon.common.models import SyncMetadata from bizon.engine.pipeline.models import PipelineReturnStatus @@ -36,6 +36,20 @@ def track_records_synced( """ pass + def trace(self, operation_name: str, resource: str = None, extra_tags: Dict[str, str] = None): + """ + Create a trace span for monitoring. + + Args: + operation_name (str): The name of the operation being traced + resource (str): The resource being operated on (e.g., topic name, table name) + extra_tags (Dict[str, str]): Additional tags for the trace + + Returns: + A context manager that can be used with 'with' statement + """ + pass + def track_large_records_synced(self, num_records: int, extra_tags: Dict[str, str] = {}) -> None: """ Track the number of large records synced in the destination system. This aims at helping to identify the source of the large records. diff --git a/bizon/monitoring/noop/monitor.py b/bizon/monitoring/noop/monitor.py index 4958b56..dc0f6fb 100644 --- a/bizon/monitoring/noop/monitor.py +++ b/bizon/monitoring/noop/monitor.py @@ -1,4 +1,8 @@ -from bizon.common.models import SyncMetadata +from contextlib import contextmanager +from typing import Dict + +from bizon.common.models import BizonConfig, SyncMetadata + from bizon.engine.pipeline.models import PipelineReturnStatus from bizon.monitoring.config import MonitoringConfig from bizon.monitoring.monitor import AbstractMonitor @@ -10,3 +14,18 @@ def __init__(self, sync_metadata: SyncMetadata, monitoring_config: MonitoringCon def track_pipeline_status(self, pipeline_status: PipelineReturnStatus) -> None: pass + + @contextmanager + def trace(self, operation_name: str, resource: str = None, extra_tags: Dict[str, str] = None): + """ + No-op trace implementation. + + Args: + operation_name (str): The name of the operation being traced + resource (str): The resource being operated on (e.g., topic name, table name) + extra_tags (Dict[str, str]): Additional tags for the trace + + Yields: + None (no-op implementation) + """ + yield None From 7ce4f7178d879199ff58ff7b9b30d30ac69df523 Mon Sep 17 00:00:00 2001 From: Antoine Balliet Date: Wed, 9 Jul 2025 15:30:53 +0200 Subject: [PATCH 12/22] feat: add destination_alias to sync_metadata (#74) * feat: add destination_alias to sync_metadata * add unit testing --- bizon/common/models.py | 2 ++ bizon/monitoring/datadog/monitor.py | 2 +- bizon/monitoring/monitor.py | 2 +- bizon/monitoring/noop/monitor.py | 1 - .../bigquery/test_bigquery_client.py | 1 + .../bigquery/test_destination_factory.py | 1 + .../test_bigquery_streaming_client.py | 1 + tests/destination/test_destination_logic.py | 1 + tests/engine/test_producer_recovery.py | 1 + tests/monitoring/test_monitoring_datadog.py | 32 +++++++++++++++++++ 10 files changed, 41 insertions(+), 3 deletions(-) diff --git a/bizon/common/models.py b/bizon/common/models.py index a4ff8c4..4194327 100644 --- a/bizon/common/models.py +++ b/bizon/common/models.py @@ -75,6 +75,7 @@ class SyncMetadata(BaseModel): stream_name: str sync_mode: SourceSyncModes destination_name: str + destination_alias: str @classmethod def from_bizon_config(cls, job_id: str, config: BizonConfig) -> "SyncMetadata": @@ -85,4 +86,5 @@ def from_bizon_config(cls, job_id: str, config: BizonConfig) -> "SyncMetadata": stream_name=config.source.stream, sync_mode=config.source.sync_mode, destination_name=config.destination.name, + destination_alias=config.destination.alias, ) diff --git a/bizon/monitoring/datadog/monitor.py b/bizon/monitoring/datadog/monitor.py index 94a9ec9..7f93858 100644 --- a/bizon/monitoring/datadog/monitor.py +++ b/bizon/monitoring/datadog/monitor.py @@ -79,7 +79,7 @@ def track_records_synced( logger.info(f"Current span in track_consume: {tracer.current_span()}") - destination_type = self.pipeline_config.destination.alias + destination_type = self.sync_metadata.destination_alias for header in headers: if "x-datadog-sampling-priority" in header: diff --git a/bizon/monitoring/monitor.py b/bizon/monitoring/monitor.py index e4f03ba..e3c82f5 100644 --- a/bizon/monitoring/monitor.py +++ b/bizon/monitoring/monitor.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Callable, Dict, List, Union +from typing import Dict, List, Union from bizon.common.models import SyncMetadata from bizon.engine.pipeline.models import PipelineReturnStatus diff --git a/bizon/monitoring/noop/monitor.py b/bizon/monitoring/noop/monitor.py index dc0f6fb..1cba085 100644 --- a/bizon/monitoring/noop/monitor.py +++ b/bizon/monitoring/noop/monitor.py @@ -2,7 +2,6 @@ from typing import Dict from bizon.common.models import BizonConfig, SyncMetadata - from bizon.engine.pipeline.models import PipelineReturnStatus from bizon.monitoring.config import MonitoringConfig from bizon.monitoring.monitor import AbstractMonitor diff --git a/tests/connectors/destinations/bigquery/test_bigquery_client.py b/tests/connectors/destinations/bigquery/test_bigquery_client.py index 3faf952..58fbdf1 100644 --- a/tests/connectors/destinations/bigquery/test_bigquery_client.py +++ b/tests/connectors/destinations/bigquery/test_bigquery_client.py @@ -52,6 +52,7 @@ def sync_metadata() -> SyncMetadata: source_name="cookie", stream_name="test", destination_name="bigquery", + destination_alias="bigquery", sync_mode="full_refresh", ) diff --git a/tests/connectors/destinations/bigquery/test_destination_factory.py b/tests/connectors/destinations/bigquery/test_destination_factory.py index 2986c37..7f7b65d 100644 --- a/tests/connectors/destinations/bigquery/test_destination_factory.py +++ b/tests/connectors/destinations/bigquery/test_destination_factory.py @@ -22,6 +22,7 @@ def sync_metadata() -> SyncMetadata: source_name="cookie", stream_name="test", destination_name="bigquery", + destination_alias="bigquery", sync_mode="full_refresh", ) diff --git a/tests/connectors/destinations/bigquery_streaming/test_bigquery_streaming_client.py b/tests/connectors/destinations/bigquery_streaming/test_bigquery_streaming_client.py index 4161270..eae2e0e 100644 --- a/tests/connectors/destinations/bigquery_streaming/test_bigquery_streaming_client.py +++ b/tests/connectors/destinations/bigquery_streaming/test_bigquery_streaming_client.py @@ -52,6 +52,7 @@ def sync_metadata_stream() -> SyncMetadata: source_name="cookie_test", stream_name="test_stream_2", destination_name="bigquery", + destination_alias="bigquery", sync_mode="stream", ) diff --git a/tests/destination/test_destination_logic.py b/tests/destination/test_destination_logic.py index efcf9f1..3779eae 100644 --- a/tests/destination/test_destination_logic.py +++ b/tests/destination/test_destination_logic.py @@ -34,6 +34,7 @@ def logger_destination(my_sqlite_backend: SQLAlchemyBackend, sqlite_db_session): source_name="dummy", stream_name="test", destination_name="logger", + destination_alias="logger", sync_mode="full_refresh", ) diff --git a/tests/engine/test_producer_recovery.py b/tests/engine/test_producer_recovery.py index 53cdd09..7b4c7b7 100644 --- a/tests/engine/test_producer_recovery.py +++ b/tests/engine/test_producer_recovery.py @@ -73,6 +73,7 @@ def file_destination(my_sqlite_backend: SQLAlchemyBackend, sqlite_db_session): source_name="dummy", stream_name="test", destination_name="logger", + destination_alias="logger", sync_mode="full_refresh", ) diff --git a/tests/monitoring/test_monitoring_datadog.py b/tests/monitoring/test_monitoring_datadog.py index cf767c5..309e2e6 100644 --- a/tests/monitoring/test_monitoring_datadog.py +++ b/tests/monitoring/test_monitoring_datadog.py @@ -1,4 +1,5 @@ from bizon.common.models import SyncMetadata +from bizon.engine.pipeline.models import PipelineReturnStatus from bizon.monitoring.config import DatadogConfig, MonitoringConfig, MonitorType from bizon.monitoring.datadog.monitor import DatadogMonitor from bizon.monitoring.monitor import MonitorFactory @@ -10,6 +11,7 @@ source_name="source_test", stream_name="stream_test", destination_name="destination_test", + destination_alias="destination_test", sync_mode="full_refresh", ) @@ -26,6 +28,36 @@ def test_datadog_monitor(): assert dd_monitor.monitoring_config.type == MonitorType.DATADOG +def test_datadog_track_records_synced(): + dd_monitor = MonitorFactory.get_monitor( + sync_metadata=sync_metadata, + monitoring_config=MonitoringConfig( + type=MonitorType.DATADOG, config=DatadogConfig(datadog_agent_host="localhost", datadog_agent_port=8125) + ), + ) + dd_monitor.track_records_synced(num_records=10, destination_id="123") + + +def test_datadog_track_pipeline_status(): + dd_monitor = MonitorFactory.get_monitor( + sync_metadata=sync_metadata, + monitoring_config=MonitoringConfig( + type=MonitorType.DATADOG, config=DatadogConfig(datadog_agent_host="localhost", datadog_agent_port=8125) + ), + ) + dd_monitor.track_pipeline_status(PipelineReturnStatus.SUCCESS) + + +def test_datadog_track_large_records_synced(): + dd_monitor = MonitorFactory.get_monitor( + sync_metadata=sync_metadata, + monitoring_config=MonitoringConfig( + type=MonitorType.DATADOG, config=DatadogConfig(datadog_agent_host="localhost", datadog_agent_port=8125) + ), + ) + dd_monitor.track_large_records_synced(num_records=10) + + def test_no_op_monitor(): no_op_monitor = MonitorFactory.get_monitor(sync_metadata=sync_metadata, monitoring_config=None) From f1d7c444a3694699e255926539596b40b6e3e5a0 Mon Sep 17 00:00:00 2001 From: Anas El Mhamdi Date: Wed, 9 Jul 2025 16:05:15 +0200 Subject: [PATCH 13/22] Fix generator did not stop error when enable_tracing is false (#75) * chore: update config, add return when tracing disabled * chore: fix commented commit * chore: add forbidden extra fields --- bizon/monitoring/config.py | 6 ++++++ bizon/monitoring/datadog/monitor.py | 1 + 2 files changed, 7 insertions(+) diff --git a/bizon/monitoring/config.py b/bizon/monitoring/config.py index d877f5c..c41190d 100644 --- a/bizon/monitoring/config.py +++ b/bizon/monitoring/config.py @@ -27,7 +27,13 @@ def __init__(self, **data): if not self.host_is_configured: raise ValueError("Either datadog_agent_host or datadog_host_env_var must be specified") + class Config: + extra = "forbid" + class MonitoringConfig(BaseMonitoringConfig): type: MonitorType config: Optional[DatadogConfig] = None + + class Config: + extra = "forbid" diff --git a/bizon/monitoring/datadog/monitor.py b/bizon/monitoring/datadog/monitor.py index 7f93858..2bfaf2e 100644 --- a/bizon/monitoring/datadog/monitor.py +++ b/bizon/monitoring/datadog/monitor.py @@ -131,6 +131,7 @@ def trace(self, operation_name: str, resource: str = None, extra_tags: Dict[str, """ if not self.monitoring_config.config.enable_tracing: yield None + return try: from ddtrace import tracer From 9eb0e67b233a9c28117b5368515c83d9a14ff6a8 Mon Sep 17 00:00:00 2001 From: Anas El Mhamdi Date: Wed, 9 Jul 2025 16:22:54 +0200 Subject: [PATCH 14/22] chore: fix monitor instantiation in destination and removed redundant logs (#76) * chore: removed logs * chore: instantiate monitor --- bizon/destination/destination.py | 1 + bizon/engine/runner/adapters/streaming.py | 4 +--- bizon/monitoring/datadog/monitor.py | 5 ----- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/bizon/destination/destination.py b/bizon/destination/destination.py index a9a9c5a..0097bc7 100644 --- a/bizon/destination/destination.py +++ b/bizon/destination/destination.py @@ -56,6 +56,7 @@ def __init__( self.sync_metadata = sync_metadata self.config = config self.backend = backend + self.monitor = monitor self.buffer = DestinationBuffer( buffer_size=self.config.buffer_size, buffer_flush_timeout=self.config.buffer_flush_timeout ) diff --git a/bizon/engine/runner/adapters/streaming.py b/bizon/engine/runner/adapters/streaming.py index d4494f8..d300e70 100644 --- a/bizon/engine/runner/adapters/streaming.py +++ b/bizon/engine/runner/adapters/streaming.py @@ -85,7 +85,6 @@ def run(self) -> RunnerStatus: df_source_records = StreamingRunner.convert_source_records(records) dsm_headers = monitor.track_source_iteration(records=records) - logger.info(f"DSM headers: {dsm_headers[0] if dsm_headers else None}") # Apply transformation df_source_records = transform.apply_transforms(df_source_records=df_source_records) @@ -100,13 +99,12 @@ def run(self) -> RunnerStatus: iteration=iteration, pagination=None, ) - last_dsm_headers = monitor.track_records_synced( + monitor.track_records_synced( num_records=len(df_destination_records), destination_id=destination_id, extra_tags={"destination_id": destination_id}, headers=dsm_headers, ) - logger.info(f"Last DSM headers: {last_dsm_headers[0] if last_dsm_headers else None}") if os.getenv("ENVIRONMENT") == "production": source.commit() diff --git a/bizon/monitoring/datadog/monitor.py b/bizon/monitoring/datadog/monitor.py index 2bfaf2e..4e2715e 100644 --- a/bizon/monitoring/datadog/monitor.py +++ b/bizon/monitoring/datadog/monitor.py @@ -74,11 +74,8 @@ def track_records_synced( tags=self.tags + [f"{key}:{value}" for key, value in extra_tags.items()], ) if os.getenv("DD_DATA_STREAMS_ENABLED") == "true": - from ddtrace import tracer from ddtrace.data_streams import set_produce_checkpoint - logger.info(f"Current span in track_consume: {tracer.current_span()}") - destination_type = self.sync_metadata.destination_alias for header in headers: @@ -105,10 +102,8 @@ def track_source_iteration(self, records: List[SourceRecord]) -> Union[List[Dict """ if os.getenv("DD_DATA_STREAMS_ENABLED") == "true": - from ddtrace import tracer from ddtrace.data_streams import set_consume_checkpoint - logger.info(f"Current span in track_consume: {tracer.current_span()}") headers_list = [] for record in records: headers = record.data.get("headers", {}) From 9b64afc6fb8055d55a12f8e358136bd7e20e84d7 Mon Sep 17 00:00:00 2001 From: Antoine Balliet Date: Thu, 10 Jul 2025 12:08:27 +0200 Subject: [PATCH 15/22] feat(bigquery): allow set bq_max_rows_per_request (#77) --- .../destinations/bigquery_streaming/src/config.py | 6 +++++- .../destinations/bigquery_streaming/src/destination.py | 3 +-- .../destinations/bigquery_streaming_v2/src/config.py | 6 +++++- .../destinations/bigquery_streaming_v2/src/destination.py | 3 +-- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/bizon/connectors/destinations/bigquery_streaming/src/config.py b/bizon/connectors/destinations/bigquery_streaming/src/config.py index dcec92e..2655979 100644 --- a/bizon/connectors/destinations/bigquery_streaming/src/config.py +++ b/bizon/connectors/destinations/bigquery_streaming/src/config.py @@ -41,7 +41,11 @@ class BigQueryStreamingConfigDetails(AbstractDestinationDetailsConfig): description="BigQuery Time partitioning type", ) authentication: Optional[BigQueryAuthentication] = None - bq_max_rows_per_request: Optional[int] = Field(30000, description="Max rows per buffer streaming request.") + bq_max_rows_per_request: Optional[int] = Field( + 5000, + description="Max rows per buffer streaming request. Must not exceed 10000.", + le=10000, + ) record_schemas: Optional[list[BigQueryRecordSchemaConfig]] = Field( default=None, description="Schema for the records. Required if unnest is set to true." ) diff --git a/bizon/connectors/destinations/bigquery_streaming/src/destination.py b/bizon/connectors/destinations/bigquery_streaming/src/destination.py index 1949944..959e8a6 100644 --- a/bizon/connectors/destinations/bigquery_streaming/src/destination.py +++ b/bizon/connectors/destinations/bigquery_streaming/src/destination.py @@ -45,7 +45,6 @@ class BigQueryStreamingDestination(AbstractDestination): # Add constants for limits - MAX_ROWS_PER_REQUEST = 5000 # 5000 (max is 10000) MAX_REQUEST_SIZE_BYTES = 5 * 1024 * 1024 # 5 MB (max is 10MB) MAX_ROW_SIZE_BYTES = 0.9 * 1024 * 1024 # 1 MB @@ -353,7 +352,7 @@ def batch(self, iterable): # If adding this item would exceed either limit, yield current batch and start new one if ( - len(current_batch) >= self.MAX_ROWS_PER_REQUEST + len(current_batch) >= self.bq_max_rows_per_request or current_batch_size + item_size > self.MAX_REQUEST_SIZE_BYTES ): logger.debug(f"Yielding batch of {len(current_batch)} rows, size: {current_batch_size/1024/1024:.2f}MB") diff --git a/bizon/connectors/destinations/bigquery_streaming_v2/src/config.py b/bizon/connectors/destinations/bigquery_streaming_v2/src/config.py index 18e3e1c..a0af3cc 100644 --- a/bizon/connectors/destinations/bigquery_streaming_v2/src/config.py +++ b/bizon/connectors/destinations/bigquery_streaming_v2/src/config.py @@ -41,7 +41,11 @@ class BigQueryStreamingV2ConfigDetails(AbstractDestinationDetailsConfig): description="BigQuery Time partitioning type", ) authentication: Optional[BigQueryAuthentication] = None - bq_max_rows_per_request: Optional[int] = Field(30000, description="Max rows per buffer streaming request.") + bq_max_rows_per_request: Optional[int] = Field( + 5000, + description="Max rows per buffer streaming request. Must not exceed 10000.", + le=10000, + ) record_schemas: Optional[list[BigQueryRecordSchemaConfig]] = Field( default=None, description="Schema for the records. Required if unnest is set to true." ) diff --git a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py index c7b11f1..f184446 100644 --- a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +++ b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py @@ -45,7 +45,6 @@ class BigQueryStreamingV2Destination(AbstractDestination): # Add constants for limits - MAX_ROWS_PER_REQUEST = 6000 # 8000 (max is 10000) MAX_REQUEST_SIZE_BYTES = 8 * 1024 * 1024 # 8 MB (max is 10MB) MAX_ROW_SIZE_BYTES = 3 * 1024 * 1024 # 3 MB (max is 10MB) @@ -404,7 +403,7 @@ def batch(self, iterable): # If adding this item would exceed either limit, yield current batch and start new one if ( - len(current_batch) >= self.MAX_ROWS_PER_REQUEST + len(current_batch) >= self.bq_max_rows_per_request or current_batch_size + item_size > self.MAX_REQUEST_SIZE_BYTES ): logger.debug(f"Yielding batch of {len(current_batch)} rows, size: {current_batch_size/1024/1024:.2f}MB") From 8f6453d40248ca62a5c2bb90773b11846dddc422 Mon Sep 17 00:00:00 2001 From: Antoine Balliet Date: Thu, 10 Jul 2025 12:26:42 +0200 Subject: [PATCH 16/22] feat(bigquery): allow set concurrent thread (#78) --- .../destinations/bigquery_streaming_v2/src/destination.py | 2 +- bizon/destination/config.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py index f184446..5d632a4 100644 --- a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +++ b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py @@ -352,7 +352,7 @@ def load_to_bigquery_via_streaming(self, df_destination_records: pl.DataFrame) - batches = list(self.batch(serialized_rows)) # Use ThreadPoolExecutor for parallel processing - max_workers = min(len(batches), 10) # Limit to 10 concurrent threads + max_workers = min(len(batches), self.config.max_concurrent_threads) logger.info(f"Processing {len(batches)} batches with {max_workers} concurrent threads") try: diff --git a/bizon/destination/config.py b/bizon/destination/config.py index 7fff5ac..c64255c 100644 --- a/bizon/destination/config.py +++ b/bizon/destination/config.py @@ -42,6 +42,11 @@ class AbstractDestinationDetailsConfig(BaseModel): description="Maximum time in seconds for buffering after which the records will be written to the destination. Set to 0 to deactivate the timeout buffer check.", # noqa ) + max_concurrent_threads: int = Field( + default=10, + description="Maximum number of concurrent threads to use for writing to the destination.", + ) + record_schemas: Optional[list[RecordSchemaConfig]] = Field( default=None, description="Schemas for the records. Required if unnest is set to true." ) From 4120b76930004bef7264cbf1f6f125737b359519 Mon Sep 17 00:00:00 2001 From: Anas El Mhamdi Date: Thu, 10 Jul 2025 16:21:04 +0200 Subject: [PATCH 17/22] chore: increase max row size and max request size (#79) --- .../destinations/bigquery_streaming_v2/src/destination.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py index 5d632a4..ab706f5 100644 --- a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +++ b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py @@ -45,8 +45,8 @@ class BigQueryStreamingV2Destination(AbstractDestination): # Add constants for limits - MAX_REQUEST_SIZE_BYTES = 8 * 1024 * 1024 # 8 MB (max is 10MB) - MAX_ROW_SIZE_BYTES = 3 * 1024 * 1024 # 3 MB (max is 10MB) + MAX_REQUEST_SIZE_BYTES = 9.5 * 1024 * 1024 # 9.5 MB (max is 10MB) + MAX_ROW_SIZE_BYTES = 8 * 1024 * 1024 # 8 MB (max is 10MB) def __init__( self, From 1203f0249e5123a7aaea92a791b285d5c807526c Mon Sep 17 00:00:00 2001 From: Antoine Balliet Date: Fri, 11 Jul 2025 11:55:22 +0200 Subject: [PATCH 18/22] chore: add logging when proto serialization fails (#80) --- .../bigquery_streaming_v2/src/destination.py | 10 +++- .../test_bigquery_streaming_v2.py | 57 +++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 tests/connectors/destinations/bigquery_streaming_v2/test_bigquery_streaming_v2.py diff --git a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py index ab706f5..00cf9dc 100644 --- a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +++ b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py @@ -22,7 +22,7 @@ ProtoSchema, ) from google.protobuf.json_format import MessageToDict, ParseDict -from google.protobuf.message import Message +from google.protobuf.message import EncodeError, Message from loguru import logger from requests.exceptions import ConnectionError, SSLError, Timeout from tenacity import ( @@ -190,7 +190,13 @@ def safe_cast_record_values(self, row: dict): def to_protobuf_serialization(TableRowClass: Type[Message], row: dict) -> bytes: """Convert a row to a Protobuf serialization.""" record = ParseDict(row, TableRowClass()) - return record.SerializeToString() + + try: + serialized_record = record.SerializeToString() + except EncodeError as e: + logger.error(f"Error serializing record: {e} for row: {row}.") + raise e + return serialized_record @staticmethod def from_protobuf_serialization( diff --git a/tests/connectors/destinations/bigquery_streaming_v2/test_bigquery_streaming_v2.py b/tests/connectors/destinations/bigquery_streaming_v2/test_bigquery_streaming_v2.py new file mode 100644 index 0000000..1c98f87 --- /dev/null +++ b/tests/connectors/destinations/bigquery_streaming_v2/test_bigquery_streaming_v2.py @@ -0,0 +1,57 @@ +import pytest +from google.cloud.bigquery import SchemaField +from google.protobuf.message import EncodeError + +from bizon.connectors.destinations.bigquery_streaming_v2.src.destination import ( + BigQueryStreamingV2Destination, +) +from bizon.connectors.destinations.bigquery_streaming_v2.src.proto_utils import ( + get_proto_schema_and_class, +) + + +def test_get_proto_schema_and_class(): + bq_schema = [ + SchemaField(name="name", field_type="STRING", mode="REQUIRED"), + SchemaField(name="age", field_type="INTEGER", mode="REQUIRED"), + ] + proto_schema, table_row_class = get_proto_schema_and_class(bq_schema) + assert proto_schema is not None + + +def test_to_protobuf_serialization(): + # Test to_protobuf_serialization + bq_schema = [ + SchemaField(name="name", field_type="STRING", mode="REQUIRED"), + SchemaField(name="age", field_type="INTEGER", mode="REQUIRED"), + ] + + proto_schema, table_row_class = get_proto_schema_and_class(bq_schema) + + data = { + "name": "John", + "age": 30, + } + + serialized_record = BigQueryStreamingV2Destination.to_protobuf_serialization(table_row_class, data) + + assert serialized_record is not None + + +def test_to_protobuf_serialization_error_mismatch_schema(): + # Test to_protobuf_serialization + bq_schema = [ + SchemaField(name="name", field_type="STRING", mode="REQUIRED"), + SchemaField(name="age", field_type="INTEGER", mode="REQUIRED"), + SchemaField(name="email", field_type="STRING", mode="REQUIRED"), + ] + + proto_schema, table_row_class = get_proto_schema_and_class(bq_schema) + + data = { + "name": "John", + "age": 30, + } + + with pytest.raises(EncodeError): + BigQueryStreamingV2Destination.to_protobuf_serialization(table_row_class, data) From af28219384d046bbbdf9822d5b2037b2642e1c29 Mon Sep 17 00:00:00 2001 From: Antoine Balliet Date: Thu, 24 Jul 2025 18:12:42 +0200 Subject: [PATCH 19/22] chore: add more logs when proto serialization fails (#81) --- .../bigquery_streaming_v2/src/destination.py | 8 ++++++-- .../test_bigquery_streaming_v2.py | 20 +++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py index 00cf9dc..06917c5 100644 --- a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +++ b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py @@ -21,7 +21,7 @@ ProtoRows, ProtoSchema, ) -from google.protobuf.json_format import MessageToDict, ParseDict +from google.protobuf.json_format import MessageToDict, ParseDict, ParseError from google.protobuf.message import EncodeError, Message from loguru import logger from requests.exceptions import ConnectionError, SSLError, Timeout @@ -189,7 +189,11 @@ def safe_cast_record_values(self, row: dict): @staticmethod def to_protobuf_serialization(TableRowClass: Type[Message], row: dict) -> bytes: """Convert a row to a Protobuf serialization.""" - record = ParseDict(row, TableRowClass()) + try: + record = ParseDict(row, TableRowClass()) + except ParseError as e: + logger.error(f"Error serializing record: {e} for row: {row}.") + raise e try: serialized_record = record.SerializeToString() diff --git a/tests/connectors/destinations/bigquery_streaming_v2/test_bigquery_streaming_v2.py b/tests/connectors/destinations/bigquery_streaming_v2/test_bigquery_streaming_v2.py index 1c98f87..fd47637 100644 --- a/tests/connectors/destinations/bigquery_streaming_v2/test_bigquery_streaming_v2.py +++ b/tests/connectors/destinations/bigquery_streaming_v2/test_bigquery_streaming_v2.py @@ -1,5 +1,6 @@ import pytest from google.cloud.bigquery import SchemaField +from google.protobuf.json_format import ParseError from google.protobuf.message import EncodeError from bizon.connectors.destinations.bigquery_streaming_v2.src.destination import ( @@ -55,3 +56,22 @@ def test_to_protobuf_serialization_error_mismatch_schema(): with pytest.raises(EncodeError): BigQueryStreamingV2Destination.to_protobuf_serialization(table_row_class, data) + + +def test_to_protobuf_serialization_error_mismatch_schema_parse_error(): + # Test to_protobuf_serialization + bq_schema = [ + SchemaField(name="name", field_type="STRING", mode="REQUIRED"), + SchemaField(name="email", field_type="STRING", mode="REQUIRED"), + ] + + proto_schema, table_row_class = get_proto_schema_and_class(bq_schema) + + data = { + "not_in_schema": "John", + "name": "John", + "age": 30, + } + + with pytest.raises(ParseError): + BigQueryStreamingV2Destination.to_protobuf_serialization(table_row_class, data) From 7dbaac2aef56872f157602ea8ced87c6d9b09c31 Mon Sep 17 00:00:00 2001 From: Anas El Mhamdi Date: Thu, 31 Jul 2025 13:28:55 +0200 Subject: [PATCH 20/22] chore: catch empty project_id error (#84) --- .../bigquery_streaming_v2/src/destination.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py index 06917c5..a1b7cc5 100644 --- a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +++ b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py @@ -152,8 +152,13 @@ def append_rows_to_stream( writer_schema=proto_schema, ), ) - response = write_client.append_rows(iter([request])) - return response.code().name + try: + response = write_client.append_rows(iter([request])) + return response.code().name + except Exception as e: + logger.error(f"Error in append_rows_to_stream: {str(e)}") + logger.error(f"Stream name: {stream_name}") + raise def safe_cast_record_values(self, row: dict): """ @@ -315,7 +320,7 @@ def load_to_bigquery_via_streaming(self, df_destination_records: pl.DataFrame) - # Create the stream if self.destination_id: project, dataset, table_name = self.destination_id.split(".") - write_client = bigquery_storage_v1.BigQueryWriteClient() + write_client = self.bq_storage_client parent = write_client.table_path(project, dataset, table_name) else: write_client = self.bq_storage_client From c0bc8343e747db9988530fb62548787246e322dd Mon Sep 17 00:00:00 2001 From: Anas El Mhamdi Date: Wed, 6 Aug 2025 18:11:28 +0200 Subject: [PATCH 21/22] chore: re-init client in append rows --- .../bigquery_streaming_v2/src/destination.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py index a1b7cc5..6210967 100644 --- a/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +++ b/bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py @@ -7,6 +7,7 @@ import orjson import polars as pl import urllib3.exceptions +from google.api_core.client_options import ClientOptions from google.api_core.exceptions import ( Conflict, NotFound, @@ -67,10 +68,13 @@ def __init__( self.project_id = config.project_id self.bq_client = bigquery.Client(project=self.project_id) - self.bq_storage_client = bigquery_storage_v1.BigQueryWriteClient() self.dataset_id = config.dataset_id self.dataset_location = config.dataset_location self.bq_max_rows_per_request = config.bq_max_rows_per_request + self.bq_storage_client_options = ClientOptions( + quota_project_id=self.project_id, + ) + self.bq_storage_client = bigquery_storage_v1.BigQueryWriteClient(client_options=self.bq_storage_client_options) @property def table_id(self) -> str: @@ -140,11 +144,12 @@ def check_connection(self) -> bool: ) def append_rows_to_stream( self, - write_client: bigquery_storage_v1.BigQueryWriteClient, stream_name: str, proto_schema: ProtoSchema, serialized_rows: List[bytes], ): + write_client = bigquery_storage_v1.BigQueryWriteClient(client_options=self.bq_storage_client_options) + request = AppendRowsRequest( write_stream=stream_name, proto_rows=AppendRowsRequest.ProtoData( @@ -238,7 +243,6 @@ def from_protobuf_serialization( ) def process_streaming_batch( self, - write_client: bigquery_storage_v1.BigQueryWriteClient, stream_name: str, proto_schema: ProtoSchema, batch: dict, @@ -249,7 +253,7 @@ def process_streaming_batch( try: # Handle streaming batch if batch.get("stream_batch") and len(batch["stream_batch"]) > 0: - result = self.append_rows_to_stream(write_client, stream_name, proto_schema, batch["stream_batch"]) + result = self.append_rows_to_stream(stream_name, proto_schema, batch["stream_batch"]) results.append(("streaming", result)) # Handle large rows batch @@ -374,9 +378,7 @@ def load_to_bigquery_via_streaming(self, df_destination_records: pl.DataFrame) - with ThreadPoolExecutor(max_workers=max_workers) as executor: # Submit all batch processing tasks future_to_batch = { - executor.submit( - self.process_streaming_batch, write_client, stream_name, proto_schema, batch, TableRow - ): batch + executor.submit(self.process_streaming_batch, stream_name, proto_schema, batch, TableRow): batch for batch in batches } From ae839607a94f14cb1f6af4cc1720eac6a03ddc7a Mon Sep 17 00:00:00 2001 From: Anas El Mhamdi Date: Wed, 6 Aug 2025 18:22:02 +0200 Subject: [PATCH 22/22] chore: add kafka error handling, bizon api quota config --- bizon/connectors/sources/kafka/src/source.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bizon/connectors/sources/kafka/src/source.py b/bizon/connectors/sources/kafka/src/source.py index e1a7540..769cd1a 100644 --- a/bizon/connectors/sources/kafka/src/source.py +++ b/bizon/connectors/sources/kafka/src/source.py @@ -12,6 +12,7 @@ Message, TopicPartition, ) +from confluent_kafka.cimpl import KafkaException as CimplKafkaException from loguru import logger from pydantic import BaseModel from pytz import UTC @@ -379,4 +380,9 @@ def get(self, pagination: dict = None) -> SourceIteration: def commit(self): """Commit the offsets of the consumer""" - self.consumer.commit(asynchronous=False) + try: + self.consumer.commit(asynchronous=False) + except CimplKafkaException as e: + logger.error(f"Kafka exception occurred during commit: {e}") + logger.info("Gracefully exiting without committing offsets due to Kafka exception") + return