diff --git a/apps/optimizer/build.gradle b/apps/optimizer/build.gradle new file mode 100644 index 000000000..f14969274 --- /dev/null +++ b/apps/optimizer/build.gradle @@ -0,0 +1,13 @@ +plugins { + id 'openhouse.java-minimal-conventions' +} + +// Avoid build-directory collision with services:optimizer (same project.name 'optimizer'). +buildDir = "${rootProject.buildDir}/apps-optimizer" + +dependencies { + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' + testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' + testRuntimeOnly 'com.h2database:h2' +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java new file mode 100644 index 000000000..4e638e2e1 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java @@ -0,0 +1,37 @@ +package com.linkedin.openhouse.optimizer.entity; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** Lightweight JPA entity for reading {@code table_operations_history} rows. */ +@Entity +@Table(name = "table_operations_history") +@Getter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationHistoryRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "operation_type", nullable = false, length = 50) + private String operationType; + + @Column(name = "submitted_at", nullable = false) + private Instant submittedAt; + + @Column(name = "status", nullable = false, length = 20) + private String status; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java new file mode 100644 index 000000000..fc0104604 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.optimizer.entity; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +/** JPA entity mapping to the {@code table_operations} table in the optimizer DB. */ +@Entity +@Table(name = "table_operations") +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 255) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Column(name = "operation_type", nullable = false, length = 50) + private String operationType; + + @Column(name = "status", nullable = false, length = 20) + private String status; + + @Column(name = "created_at") + private Instant createdAt; + + @Column(name = "scheduled_at") + private Instant scheduledAt; + + @Column(name = "job_id", length = 255) + private String jobId; + + /** Plain version column — not managed by JPA optimistic locking. */ + @Column(name = "version") + private Long version; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java new file mode 100644 index 000000000..5cdf16a97 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -0,0 +1,53 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import java.util.Map; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every + * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table(name = "table_stats") +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsRow { + + @Id + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_id", nullable = false, length = 255) + private String databaseId; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Type(type = "json") + @Column(name = "table_properties", columnDefinition = "TEXT") + private Map tableProperties; + + @Column(name = "updated_at", nullable = false) + private Instant updatedAt; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java new file mode 100644 index 000000000..5e0f51468 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -0,0 +1,45 @@ +package com.linkedin.openhouse.optimizer.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Combined stats payload stored as a single JSON blob per table in {@code table_stats}. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long deletedSizeBytes; + } +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java new file mode 100644 index 000000000..f2ea9e3c8 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -0,0 +1,32 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Repository for reading {@code table_operations_history} in the Analyzer. */ +public interface TableOperationHistoryRepository + extends JpaRepository { + + /** + * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Every parameter is optional — pass {@code null} to skip that filter. + */ + @Query( + "SELECT r FROM TableOperationHistoryRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:since IS NULL OR r.submittedAt >= :since) " + + "ORDER BY r.submittedAt DESC") + List find( + @Param("operationType") String operationType, + @Param("tableUuid") String tableUuid, + @Param("status") String status, + @Param("since") Instant since, + Pageable pageable); +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java new file mode 100644 index 000000000..27424dfdc --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -0,0 +1,29 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ +public interface TableOperationsRepository extends JpaRepository { + + /** + * Return operations matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. + */ + @Query( + "SELECT r FROM TableOperationRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName)") + List find( + @Param("operationType") String operationType, + @Param("status") String status, + @Param("tableUuid") String tableUuid, + @Param("databaseName") String databaseName, + @Param("tableName") String tableName); +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java new file mode 100644 index 000000000..6effe19c2 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for {@code table_stats} rows in the optimizer DB. */ +public interface TableStatsRepository extends JpaRepository { + + /** + * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. + */ + @Query( + "SELECT r FROM TableStatsRow r " + + "WHERE (:databaseId IS NULL OR r.databaseId = :databaseId) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List find( + @Param("databaseId") String databaseId, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java new file mode 100644 index 000000000..71ab1cde4 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -0,0 +1,44 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; +import org.springframework.stereotype.Repository; + +/** + * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is auto-increment {@code id}. + */ +@Repository +public interface TableOperationsHistoryRepository + extends JpaRepository { + + /** + * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Every parameter is optional — pass {@code null} to skip that filter. + */ + @Query( + "SELECT r FROM TableOperationsHistoryRow r " + + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:since IS NULL OR r.submittedAt >= :since) " + + "AND (:until IS NULL OR r.submittedAt <= :until) " + + "ORDER BY r.submittedAt DESC") + List find( + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid, + @Param("operationType") OperationType operationType, + @Param("status") OperationHistoryStatus status, + @Param("since") Instant since, + @Param("until") Instant until, + Pageable pageable); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java new file mode 100644 index 000000000..891322134 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -0,0 +1,33 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; +import org.springframework.stereotype.Repository; + +/** Repository for {@link TableOperationsRow}. PK is the client-generated UUID {@code id}. */ +@Repository +public interface TableOperationsRepository extends JpaRepository { + + /** + * Return operations matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. No filters returns all rows. + */ + @Query( + "SELECT r FROM TableOperationsRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List find( + @Param("operationType") OperationType operationType, + @Param("status") OperationStatus status, + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java new file mode 100644 index 000000000..767d60c22 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -0,0 +1,29 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Append-only repository for per-commit stats history rows. */ +public interface TableStatsHistoryRepository extends JpaRepository { + + /** + * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the + * time filter. + * + * @param tableUuid the stable table UUID + * @param since inclusive lower bound on recorded_at; {@code null} to skip + * @param pageable use {@code PageRequest.of(0, limit)} to cap results + */ + @Query( + "SELECT r FROM TableStatsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "AND (:since IS NULL OR r.recordedAt >= :since) " + + "ORDER BY r.recordedAt DESC") + List find( + @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java new file mode 100644 index 000000000..ecae70feb --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for reading and writing {@code table_stats} rows. */ +public interface TableStatsRepository extends JpaRepository { + + /** + * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. No filters returns all rows. + */ + @Query( + "SELECT r FROM TableStatsRow r " + + "WHERE (:databaseId IS NULL OR r.databaseId = :databaseId) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List find( + @Param("databaseId") String databaseId, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid); +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java new file mode 100644 index 000000000..fa373c57d --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.ApplicationContext; +import org.springframework.test.context.ActiveProfiles; + +/** + * Validates that the Spring application context loads successfully against the H2 schema. This test + * exercises schema-SQL-init, JPA entity scanning, and repository wiring. + */ +@SpringBootTest +@ActiveProfiles("test") +class OptimizerServiceContextTest { + + @Autowired ApplicationContext context; + + @Test + void contextLoads() { + assertThat(context).isNotNull(); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java new file mode 100644 index 000000000..1a35a8fda --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -0,0 +1,192 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import java.time.Instant; +import java.util.List; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableOperationsHistoryRepositoryTest { + + @Autowired TableOperationsHistoryRepository repository; + + @Test + void appendAndFindByTableUuid() { + Instant t1 = Instant.parse("2024-01-01T10:00:00Z"); + Instant t2 = Instant.parse("2024-01-02T10:00:00Z"); + String tableUuid = UUID.randomUUID().toString(); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(t1) + .status(OperationHistoryStatus.SUCCESS) + .jobId("job-001") + .build()); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(t2) + .status(OperationHistoryStatus.FAILED) + .jobId("job-002") + .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) + .build()); + + List rows = + repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); + + assertThat(rows).hasSize(2); + // Newest first + assertThat(rows.get(0).getJobId()).isEqualTo("job-002"); + assertThat(rows.get(1).getJobId()).isEqualTo("job-001"); + } + + @Test + void appendIsNonDestructive_multipleRunsRetained() { + Instant now = Instant.now(); + String tableUuid = UUID.randomUUID().toString(); + for (int i = 0; i < 3; i++) { + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now.plusSeconds(i)) + .status(OperationHistoryStatus.SUCCESS) + .build()); + } + + List rows = + repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); + assertThat(rows).hasSize(3); + } + + @Test + void find_respectsLimit() { + Instant now = Instant.now(); + String tableUuid = UUID.randomUUID().toString(); + for (int i = 0; i < 5; i++) { + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl3") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now.plusSeconds(i)) + .status(OperationHistoryStatus.SUCCESS) + .build()); + } + + List rows = + repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 3)); + assertThat(rows).hasSize(3); + } + + @Test + void find_noParams_returnsAll() { + Instant now = Instant.now(); + String uuid1 = UUID.randomUUID().toString(); + String uuid2 = UUID.randomUUID().toString(); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(uuid1) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now) + .status(OperationHistoryStatus.SUCCESS) + .build()); + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(uuid2) + .databaseName("db2") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now.plusSeconds(1)) + .status(OperationHistoryStatus.FAILED) + .build()); + + List rows = + repository.find(null, null, null, null, null, null, null, PageRequest.of(0, 100)); + assertThat(rows).hasSize(2); + // Newest first + assertThat(rows.get(0).getStatus()).isEqualTo(OperationHistoryStatus.FAILED); + } + + @Test + void find_byStatusAndTimeWindow() { + Instant old = Instant.parse("2024-01-01T00:00:00Z"); + Instant recent = Instant.parse("2024-06-01T00:00:00Z"); + String tableUuid = UUID.randomUUID().toString(); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(old) + .status(OperationHistoryStatus.SUCCESS) + .build()); + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(recent) + .status(OperationHistoryStatus.FAILED) + .build()); + + // Filter by status + List failed = + repository.find( + null, + null, + null, + null, + OperationHistoryStatus.FAILED, + null, + null, + PageRequest.of(0, 100)); + assertThat(failed).hasSize(1); + assertThat(failed.get(0).getSubmittedAt()).isEqualTo(recent); + + // Filter by time window + Instant cutoff = Instant.parse("2024-03-01T00:00:00Z"); + List afterCutoff = + repository.find(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); + assertThat(afterCutoff).hasSize(1); + assertThat(afterCutoff.get(0).getSubmittedAt()).isEqualTo(recent); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java new file mode 100644 index 000000000..b1342b12d --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -0,0 +1,135 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableOperationsRepositoryTest { + + @Autowired TableOperationsRepository repository; + + @Test + void saveAndFindById() { + String id = UUID.randomUUID().toString(); + + TableOperationsRow row = + TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + + repository.save(row); + + Optional found = repository.findById(id); + assertThat(found).isPresent(); + assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING); + } + + @Test + void find_noParams_returnsAll() { + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .build()); + + List rows = repository.find(null, null, null, null, null); + assertThat(rows).hasSize(2); + } + + @Test + void find_byStatus() { + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .build()); + + List pending = + repository.find(null, OperationStatus.PENDING, null, null, null); + assertThat(pending).hasSize(1); + assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); + + List scheduled = + repository.find(null, OperationStatus.SCHEDULED, null, null, null); + assertThat(scheduled).hasSize(1); + assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); + } + + @Test + void find_byDatabaseAndTable() { + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db2") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + + assertThat(repository.find(null, null, "db1", null, null)).hasSize(1); + assertThat(repository.find(null, null, "db2", "tbl2", null)).hasSize(1); + assertThat(repository.find(null, null, "db1", "tbl2", null)).isEmpty(); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java new file mode 100644 index 000000000..a76c7155d --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -0,0 +1,126 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.List; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableStatsHistoryRepositoryTest { + + @Autowired TableStatsHistoryRepository repository; + + @Test + void saveAndFind() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + repository.save(buildRow(tableUuid, "db1", "tbl1", 10L, 2L, now.minus(2, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); + + List rows = repository.find(tableUuid, null, PageRequest.of(0, 100)); + + assertThat(rows).hasSize(3); + // newest first + assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); + assertThat(rows.get(2).getStats().getDelta().getNumFilesAdded()).isEqualTo(10L); + } + + @Test + void find_respectsLimit() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + for (int i = 0; i < 5; i++) { + repository.save(buildRow(tableUuid, "db1", "tbl1", i, 0L, now.minus(i, ChronoUnit.HOURS))); + } + + List rows = repository.find(tableUuid, null, PageRequest.of(0, 3)); + + assertThat(rows).hasSize(3); + } + + @Test + void find_withSince_filtersOlderRows() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + Instant cutoff = now.minus(90, ChronoUnit.MINUTES); + + repository.save(buildRow(tableUuid, "db1", "tbl1", 10L, 2L, now.minus(2, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); + + List rows = repository.find(tableUuid, cutoff, PageRequest.of(0, 100)); + + // only the 2 rows within the last 90 minutes + assertThat(rows).hasSize(2); + assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); + } + + @Test + void find_isolatesByTableUuid() { + String uuid1 = UUID.randomUUID().toString(); + String uuid2 = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + repository.save(buildRow(uuid1, "db1", "tbl1", 10L, 0L, now)); + repository.save(buildRow(uuid2, "db2", "tbl2", 20L, 0L, now)); + + assertThat(repository.find(uuid1, null, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.find(uuid2, null, PageRequest.of(0, 100))).hasSize(1); + } + + @Test + void autoIncrementId() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + TableStatsHistoryRow row1 = repository.save(buildRow(tableUuid, "db1", "tbl1", 1L, 0L, now)); + TableStatsHistoryRow row2 = repository.save(buildRow(tableUuid, "db1", "tbl1", 2L, 0L, now)); + + assertThat(row1.getId()).isNotNull(); + assertThat(row2.getId()).isNotNull(); + assertThat(row2.getId()).isGreaterThan(row1.getId()); + } + + private static TableStatsHistoryRow buildRow( + String tableUuid, + String databaseId, + String tableName, + long numFilesAdded, + long numFilesDeleted, + Instant recordedAt) { + return TableStatsHistoryRow.builder() + .tableUuid(tableUuid) + .databaseId(databaseId) + .tableName(tableName) + .stats( + TableStats.builder() + .snapshot( + TableStats.SnapshotMetrics.builder() + .clusterId("cl1") + .tableSizeBytes(1024L) + .build()) + .delta( + TableStats.CommitDelta.builder() + .numFilesAdded(numFilesAdded) + .numFilesDeleted(numFilesDeleted) + .build()) + .build()) + .recordedAt(recordedAt) + .build(); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java new file mode 100644 index 000000000..a8ac1cbbb --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -0,0 +1,141 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.time.Instant; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableStatsRepositoryTest { + + @Autowired TableStatsRepository repository; + + @Test + void saveAndFindById() { + String tableUuid = UUID.randomUUID().toString(); + TableStats stats = + TableStats.builder() + .snapshot( + TableStats.SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build()) + .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(1L).build()) + .build(); + + repository.save( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId("db1") + .tableName("tbl1") + .stats(stats) + .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) + .updatedAt(Instant.now()) + .build()); + + Optional found = repository.findById(tableUuid); + assertThat(found).isPresent(); + assertThat(found.get().getDatabaseId()).isEqualTo("db1"); + assertThat(found.get().getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(found.get().getTableProperties()) + .containsEntry("maintenance.optimizer.ofd.enabled", "true"); + } + + @Test + void upsert_overwritesPreviousStats() { + String tableUuid = UUID.randomUUID().toString(); + + repository.save( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + repository.save( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + assertThat(repository.findAll()).hasSize(1); + assertThat(repository.findById(tableUuid).get().getStats().getSnapshot().getTableSizeBytes()) + .isEqualTo(200L); + } + + @Test + void find_noParams_returnsAll() { + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db2") + .tableName("tbl2") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + assertThat(repository.find(null, null, null)).hasSize(2); + } + + @Test + void find_byDatabase() { + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db2") + .tableName("tbl2") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + assertThat(repository.find("db1", null, null)).hasSize(1); + assertThat(repository.find("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); + } +} diff --git a/settings.gradle b/settings.gradle index cad06785e..0d64dad53 100644 --- a/settings.gradle +++ b/settings.gradle @@ -50,6 +50,7 @@ include ':services:common' include ':services:housetables' include ':services:jobs' include ':services:optimizer' +include ':apps:optimizer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5'