diff --git a/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py b/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py index fb71254c0..5ea40ffb8 100644 --- a/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py +++ b/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py @@ -90,10 +90,13 @@ def load(self, data): """Load transformed data into Parquet format""" self.logger.info(f"Loading data into {self.target_path} as Parquet") for name, dataframe in data.items(): - name = name.replace("--", "_") - dataframe.write.mode("append").partitionBy(*self.partition_cols).parquet( - f"{self.target_path}{name}" - ) + if dataframe.na.drop().count() > 0: + name = name.replace("--", "_") + dataframe.write.mode("append").partitionBy( + *self.partition_cols + ).parquet(f"{self.target_path}{name}") + else: + self.logger.info(f"Dataframe {name} is null, skipping") def trigger_crawler(self): self.glue.start_crawler(Name=f"{self.name_prefix}-log-crawler")