From d419eb31f0449b5893739391047cf1af013cc6e3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 10:57:51 -0700 Subject: [PATCH 1/3] feat(optimizer): add repositories and repository tests Spring Data JPA repositories for all four optimizer tables with filtered query support. Includes tests exercising save/find, filtered queries, upsert semantics, and append-only history. Co-Authored-By: Claude Opus 4.6 --- .../TableOperationsHistoryRepository.java | 60 ++++++ .../repository/TableOperationsRepository.java | 33 +++ .../TableStatsHistoryRepository.java | 41 ++++ .../repository/TableStatsRepository.java | 25 +++ .../OptimizerServiceContextTest.java | 19 ++ .../TableOperationsHistoryRepositoryTest.java | 189 ++++++++++++++++++ .../TableOperationsRepositoryTest.java | 135 +++++++++++++ .../TableStatsHistoryRepositoryTest.java | 127 ++++++++++++ .../repository/TableStatsRepositoryTest.java | 141 +++++++++++++ 9 files changed, 770 insertions(+) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java new file mode 100644 index 000000000..2ba5bdf7a --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -0,0 +1,60 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; +import org.springframework.stereotype.Repository; + +/** + * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is auto-increment {@code id}. + */ +@Repository +public interface TableOperationsHistoryRepository + extends JpaRepository { + + /** + * Return the most recent history rows for a table UUID, newest first, up to {@code limit} rows. + * + * @param tableUuid the stable table UUID + * @param limit maximum number of rows to return + * @return history rows ordered by {@code submitted_at} descending + */ + @Query( + value = + "SELECT * FROM table_operations_history " + + "WHERE table_uuid = :tableUuid " + + "ORDER BY submitted_at DESC LIMIT :limit", + nativeQuery = true) + List find( + @Param("tableUuid") String tableUuid, @Param("limit") int limit); + + /** + * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Every parameter is optional — pass {@code null} to skip that filter. + */ + @Query( + "SELECT r FROM TableOperationsHistoryRow r " + + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:since IS NULL OR r.submittedAt >= :since) " + + "AND (:until IS NULL OR r.submittedAt <= :until) " + + "ORDER BY r.submittedAt DESC") + List findFiltered( + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid, + @Param("operationType") OperationType operationType, + @Param("status") OperationHistoryStatus status, + @Param("since") Instant since, + @Param("until") Instant until, + Pageable pageable); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java new file mode 100644 index 000000000..69476991f --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -0,0 +1,33 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; +import org.springframework.stereotype.Repository; + +/** Repository for {@link TableOperationsRow}. PK is the client-generated UUID {@code id}. */ +@Repository +public interface TableOperationsRepository extends JpaRepository { + + /** + * Return operations matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. No filters returns all rows. + */ + @Query( + "SELECT r FROM TableOperationsRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List findFiltered( + @Param("operationType") OperationType operationType, + @Param("status") OperationStatus status, + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java new file mode 100644 index 000000000..c6ec3befd --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -0,0 +1,41 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Append-only repository for per-commit stats history rows. */ +public interface TableStatsHistoryRepository extends JpaRepository { + + /** + * Return history rows for a table, newest first. + * + * @param tableUuid the stable table UUID + * @param pageable use {@code PageRequest.of(0, limit)} to cap results + */ + @Query( + "SELECT r FROM TableStatsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "ORDER BY r.recordedAt DESC") + List findByTableUuid( + @Param("tableUuid") String tableUuid, Pageable pageable); + + /** + * Return history rows for a table recorded at or after {@code since}, newest first. + * + * @param tableUuid the stable table UUID + * @param since inclusive lower bound on recorded_at + * @param pageable use {@code PageRequest.of(0, limit)} to cap results + */ + @Query( + "SELECT r FROM TableStatsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "AND r.recordedAt >= :since " + + "ORDER BY r.recordedAt DESC") + List findByTableUuidSince( + @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java new file mode 100644 index 000000000..6c071cf5b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for reading and writing {@code table_stats} rows. */ +public interface TableStatsRepository extends JpaRepository { + + /** + * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. No filters returns all rows. + */ + @Query( + "SELECT r FROM TableStatsRow r " + + "WHERE (:databaseId IS NULL OR r.databaseId = :databaseId) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List findFiltered( + @Param("databaseId") String databaseId, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid); +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java new file mode 100644 index 000000000..abb89ec42 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java @@ -0,0 +1,19 @@ +package com.linkedin.openhouse.optimizer; + +import org.junit.jupiter.api.Test; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; + +/** + * Validates that the Spring application context loads successfully against the H2 schema. This test + * exercises schema-SQL-init, JPA entity scanning, and repository wiring. + */ +@SpringBootTest +@ActiveProfiles("test") +class OptimizerServiceContextTest { + + @Test + void contextLoads() { + // Context load is the assertion — no additional assertions needed. + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java new file mode 100644 index 000000000..9bde34334 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -0,0 +1,189 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import java.time.Instant; +import java.util.List; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableOperationsHistoryRepositoryTest { + + @Autowired TableOperationsHistoryRepository repository; + + @Test + void appendAndFindByTableUuid() { + Instant t1 = Instant.parse("2024-01-01T10:00:00Z"); + Instant t2 = Instant.parse("2024-01-02T10:00:00Z"); + String tableUuid = UUID.randomUUID().toString(); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(t1) + .status(OperationHistoryStatus.SUCCESS) + .jobId("job-001") + .build()); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(t2) + .status(OperationHistoryStatus.FAILED) + .jobId("job-002") + .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) + .build()); + + List rows = repository.find(tableUuid, 10); + + assertThat(rows).hasSize(2); + // Newest first + assertThat(rows.get(0).getJobId()).isEqualTo("job-002"); + assertThat(rows.get(1).getJobId()).isEqualTo("job-001"); + } + + @Test + void appendIsNonDestructive_multipleRunsRetained() { + Instant now = Instant.now(); + String tableUuid = UUID.randomUUID().toString(); + for (int i = 0; i < 3; i++) { + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now.plusSeconds(i)) + .status(OperationHistoryStatus.SUCCESS) + .build()); + } + + List rows = repository.find(tableUuid, 10); + assertThat(rows).hasSize(3); + } + + @Test + void find_respectsLimit() { + Instant now = Instant.now(); + String tableUuid = UUID.randomUUID().toString(); + for (int i = 0; i < 5; i++) { + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl3") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now.plusSeconds(i)) + .status(OperationHistoryStatus.SUCCESS) + .build()); + } + + List rows = repository.find(tableUuid, 3); + assertThat(rows).hasSize(3); + } + + @Test + void findFiltered_noParams_returnsAll() { + Instant now = Instant.now(); + String uuid1 = UUID.randomUUID().toString(); + String uuid2 = UUID.randomUUID().toString(); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(uuid1) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now) + .status(OperationHistoryStatus.SUCCESS) + .build()); + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(uuid2) + .databaseName("db2") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now.plusSeconds(1)) + .status(OperationHistoryStatus.FAILED) + .build()); + + List rows = + repository.findFiltered(null, null, null, null, null, null, null, PageRequest.of(0, 100)); + assertThat(rows).hasSize(2); + // Newest first + assertThat(rows.get(0).getStatus()).isEqualTo(OperationHistoryStatus.FAILED); + } + + @Test + void findFiltered_byStatusAndTimeWindow() { + Instant old = Instant.parse("2024-01-01T00:00:00Z"); + Instant recent = Instant.parse("2024-06-01T00:00:00Z"); + String tableUuid = UUID.randomUUID().toString(); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(old) + .status(OperationHistoryStatus.SUCCESS) + .build()); + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(recent) + .status(OperationHistoryStatus.FAILED) + .build()); + + // Filter by status + List failed = + repository.findFiltered( + null, + null, + null, + null, + OperationHistoryStatus.FAILED, + null, + null, + PageRequest.of(0, 100)); + assertThat(failed).hasSize(1); + assertThat(failed.get(0).getSubmittedAt()).isEqualTo(recent); + + // Filter by time window + Instant cutoff = Instant.parse("2024-03-01T00:00:00Z"); + List afterCutoff = + repository.findFiltered(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); + assertThat(afterCutoff).hasSize(1); + assertThat(afterCutoff.get(0).getSubmittedAt()).isEqualTo(recent); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java new file mode 100644 index 000000000..d7b8ee0b8 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -0,0 +1,135 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableOperationsRepositoryTest { + + @Autowired TableOperationsRepository repository; + + @Test + void saveAndFindById() { + String id = UUID.randomUUID().toString(); + + TableOperationsRow row = + TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + + repository.save(row); + + Optional found = repository.findById(id); + assertThat(found).isPresent(); + assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING); + } + + @Test + void findFiltered_noParams_returnsAll() { + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .build()); + + List rows = repository.findFiltered(null, null, null, null, null); + assertThat(rows).hasSize(2); + } + + @Test + void findFiltered_byStatus() { + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .build()); + + List pending = + repository.findFiltered(null, OperationStatus.PENDING, null, null, null); + assertThat(pending).hasSize(1); + assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); + + List scheduled = + repository.findFiltered(null, OperationStatus.SCHEDULED, null, null, null); + assertThat(scheduled).hasSize(1); + assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); + } + + @Test + void findFiltered_byDatabaseAndTable() { + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db2") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + + assertThat(repository.findFiltered(null, null, "db1", null, null)).hasSize(1); + assertThat(repository.findFiltered(null, null, "db2", "tbl2", null)).hasSize(1); + assertThat(repository.findFiltered(null, null, "db1", "tbl2", null)).isEmpty(); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java new file mode 100644 index 000000000..fb86762dc --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -0,0 +1,127 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.List; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableStatsHistoryRepositoryTest { + + @Autowired TableStatsHistoryRepository repository; + + @Test + void saveAndFindByTableUuid() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + repository.save(buildRow(tableUuid, "db1", "tbl1", 10L, 2L, now.minus(2, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); + + List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 100)); + + assertThat(rows).hasSize(3); + // newest first + assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); + assertThat(rows.get(2).getStats().getDelta().getNumFilesAdded()).isEqualTo(10L); + } + + @Test + void findByTableUuid_respectsLimit() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + for (int i = 0; i < 5; i++) { + repository.save(buildRow(tableUuid, "db1", "tbl1", i, 0L, now.minus(i, ChronoUnit.HOURS))); + } + + List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 3)); + + assertThat(rows).hasSize(3); + } + + @Test + void findByTableUuidSince_filtersOlderRows() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + Instant cutoff = now.minus(90, ChronoUnit.MINUTES); + + repository.save(buildRow(tableUuid, "db1", "tbl1", 10L, 2L, now.minus(2, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); + + List rows = + repository.findByTableUuidSince(tableUuid, cutoff, PageRequest.of(0, 100)); + + // only the 2 rows within the last 90 minutes + assertThat(rows).hasSize(2); + assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); + } + + @Test + void findByTableUuid_isolatesByTableUuid() { + String uuid1 = UUID.randomUUID().toString(); + String uuid2 = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + repository.save(buildRow(uuid1, "db1", "tbl1", 10L, 0L, now)); + repository.save(buildRow(uuid2, "db2", "tbl2", 20L, 0L, now)); + + assertThat(repository.findByTableUuid(uuid1, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.findByTableUuid(uuid2, PageRequest.of(0, 100))).hasSize(1); + } + + @Test + void autoIncrementId() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + TableStatsHistoryRow row1 = repository.save(buildRow(tableUuid, "db1", "tbl1", 1L, 0L, now)); + TableStatsHistoryRow row2 = repository.save(buildRow(tableUuid, "db1", "tbl1", 2L, 0L, now)); + + assertThat(row1.getId()).isNotNull(); + assertThat(row2.getId()).isNotNull(); + assertThat(row2.getId()).isGreaterThan(row1.getId()); + } + + private static TableStatsHistoryRow buildRow( + String tableUuid, + String databaseId, + String tableName, + long numFilesAdded, + long numFilesDeleted, + Instant recordedAt) { + return TableStatsHistoryRow.builder() + .tableUuid(tableUuid) + .databaseId(databaseId) + .tableName(tableName) + .stats( + TableStats.builder() + .snapshot( + TableStats.SnapshotMetrics.builder() + .clusterId("cl1") + .tableSizeBytes(1024L) + .build()) + .delta( + TableStats.CommitDelta.builder() + .numFilesAdded(numFilesAdded) + .numFilesDeleted(numFilesDeleted) + .build()) + .build()) + .recordedAt(recordedAt) + .build(); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java new file mode 100644 index 000000000..5efb49148 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -0,0 +1,141 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.time.Instant; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableStatsRepositoryTest { + + @Autowired TableStatsRepository repository; + + @Test + void saveAndFindById() { + String tableUuid = UUID.randomUUID().toString(); + TableStats stats = + TableStats.builder() + .snapshot( + TableStats.SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build()) + .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(1L).build()) + .build(); + + repository.save( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId("db1") + .tableName("tbl1") + .stats(stats) + .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) + .updatedAt(Instant.now()) + .build()); + + Optional found = repository.findById(tableUuid); + assertThat(found).isPresent(); + assertThat(found.get().getDatabaseId()).isEqualTo("db1"); + assertThat(found.get().getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(found.get().getTableProperties()) + .containsEntry("maintenance.optimizer.ofd.enabled", "true"); + } + + @Test + void upsert_overwritesPreviousStats() { + String tableUuid = UUID.randomUUID().toString(); + + repository.save( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + repository.save( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + assertThat(repository.findAll()).hasSize(1); + assertThat(repository.findById(tableUuid).get().getStats().getSnapshot().getTableSizeBytes()) + .isEqualTo(200L); + } + + @Test + void findFiltered_noParams_returnsAll() { + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db2") + .tableName("tbl2") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + assertThat(repository.findFiltered(null, null, null)).hasSize(2); + } + + @Test + void findFiltered_byDatabase() { + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db2") + .tableName("tbl2") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + assertThat(repository.findFiltered("db1", null, null)).hasSize(1); + assertThat(repository.findFiltered("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); + } +} From 7ff3b4360877580f395650223c19542849a5e1f7 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 11:35:45 -0700 Subject: [PATCH 2/3] =?UTF-8?q?fix:=20consolidate=20repo=20methods=20?= =?UTF-8?q?=E2=80=94=20single=20find=20with=20optional=20filters?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address PR review comments: rename findFiltered → find across all repos, remove redundant findByTableUuid/findByTableUuidSince from history repos, add explicit assertion to context test. Co-Authored-By: Claude Opus 4.6 --- .../TableOperationsHistoryRepository.java | 18 +-------------- .../repository/TableOperationsRepository.java | 2 +- .../TableStatsHistoryRepository.java | 22 +++++-------------- .../repository/TableStatsRepository.java | 2 +- .../OptimizerServiceContextTest.java | 8 ++++++- .../TableOperationsHistoryRepositoryTest.java | 19 +++++++++------- .../TableOperationsRepositoryTest.java | 18 +++++++-------- .../TableStatsHistoryRepositoryTest.java | 19 ++++++++-------- .../repository/TableStatsRepositoryTest.java | 10 ++++----- 9 files changed, 49 insertions(+), 69 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 2ba5bdf7a..71ab1cde4 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -18,22 +18,6 @@ public interface TableOperationsHistoryRepository extends JpaRepository { - /** - * Return the most recent history rows for a table UUID, newest first, up to {@code limit} rows. - * - * @param tableUuid the stable table UUID - * @param limit maximum number of rows to return - * @return history rows ordered by {@code submitted_at} descending - */ - @Query( - value = - "SELECT * FROM table_operations_history " - + "WHERE table_uuid = :tableUuid " - + "ORDER BY submitted_at DESC LIMIT :limit", - nativeQuery = true) - List find( - @Param("tableUuid") String tableUuid, @Param("limit") int limit); - /** * Return history rows matching the given filters, ordered by {@code submittedAt} descending. * Every parameter is optional — pass {@code null} to skip that filter. @@ -48,7 +32,7 @@ List find( + "AND (:since IS NULL OR r.submittedAt >= :since) " + "AND (:until IS NULL OR r.submittedAt <= :until) " + "ORDER BY r.submittedAt DESC") - List findFiltered( + List find( @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 69476991f..891322134 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -24,7 +24,7 @@ public interface TableOperationsRepository extends JpaRepository findFiltered( + List find( @Param("operationType") OperationType operationType, @Param("status") OperationStatus status, @Param("databaseName") String databaseName, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java index c6ec3befd..767d60c22 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -12,30 +12,18 @@ public interface TableStatsHistoryRepository extends JpaRepository { /** - * Return history rows for a table, newest first. + * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the + * time filter. * * @param tableUuid the stable table UUID + * @param since inclusive lower bound on recorded_at; {@code null} to skip * @param pageable use {@code PageRequest.of(0, limit)} to cap results */ @Query( "SELECT r FROM TableStatsHistoryRow r " + "WHERE r.tableUuid = :tableUuid " + + "AND (:since IS NULL OR r.recordedAt >= :since) " + "ORDER BY r.recordedAt DESC") - List findByTableUuid( - @Param("tableUuid") String tableUuid, Pageable pageable); - - /** - * Return history rows for a table recorded at or after {@code since}, newest first. - * - * @param tableUuid the stable table UUID - * @param since inclusive lower bound on recorded_at - * @param pageable use {@code PageRequest.of(0, limit)} to cap results - */ - @Query( - "SELECT r FROM TableStatsHistoryRow r " - + "WHERE r.tableUuid = :tableUuid " - + "AND r.recordedAt >= :since " - + "ORDER BY r.recordedAt DESC") - List findByTableUuidSince( + List find( @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 6c071cf5b..ecae70feb 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -18,7 +18,7 @@ public interface TableStatsRepository extends JpaRepository findFiltered( + List find( @Param("databaseId") String databaseId, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java index abb89ec42..fa373c57d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java @@ -1,7 +1,11 @@ package com.linkedin.openhouse.optimizer; +import static org.assertj.core.api.Assertions.assertThat; + import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.ApplicationContext; import org.springframework.test.context.ActiveProfiles; /** @@ -12,8 +16,10 @@ @ActiveProfiles("test") class OptimizerServiceContextTest { + @Autowired ApplicationContext context; + @Test void contextLoads() { - // Context load is the assertion — no additional assertions needed. + assertThat(context).isNotNull(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index 9bde34334..1a35a8fda 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -54,7 +54,8 @@ void appendAndFindByTableUuid() { .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) .build()); - List rows = repository.find(tableUuid, 10); + List rows = + repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); assertThat(rows).hasSize(2); // Newest first @@ -79,7 +80,8 @@ void appendIsNonDestructive_multipleRunsRetained() { .build()); } - List rows = repository.find(tableUuid, 10); + List rows = + repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); assertThat(rows).hasSize(3); } @@ -100,12 +102,13 @@ void find_respectsLimit() { .build()); } - List rows = repository.find(tableUuid, 3); + List rows = + repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @Test - void findFiltered_noParams_returnsAll() { + void find_noParams_returnsAll() { Instant now = Instant.now(); String uuid1 = UUID.randomUUID().toString(); String uuid2 = UUID.randomUUID().toString(); @@ -132,14 +135,14 @@ void findFiltered_noParams_returnsAll() { .build()); List rows = - repository.findFiltered(null, null, null, null, null, null, null, PageRequest.of(0, 100)); + repository.find(null, null, null, null, null, null, null, PageRequest.of(0, 100)); assertThat(rows).hasSize(2); // Newest first assertThat(rows.get(0).getStatus()).isEqualTo(OperationHistoryStatus.FAILED); } @Test - void findFiltered_byStatusAndTimeWindow() { + void find_byStatusAndTimeWindow() { Instant old = Instant.parse("2024-01-01T00:00:00Z"); Instant recent = Instant.parse("2024-06-01T00:00:00Z"); String tableUuid = UUID.randomUUID().toString(); @@ -167,7 +170,7 @@ void findFiltered_byStatusAndTimeWindow() { // Filter by status List failed = - repository.findFiltered( + repository.find( null, null, null, @@ -182,7 +185,7 @@ void findFiltered_byStatusAndTimeWindow() { // Filter by time window Instant cutoff = Instant.parse("2024-03-01T00:00:00Z"); List afterCutoff = - repository.findFiltered(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); + repository.find(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); assertThat(afterCutoff).hasSize(1); assertThat(afterCutoff.get(0).getSubmittedAt()).isEqualTo(recent); } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index d7b8ee0b8..b1342b12d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -45,7 +45,7 @@ void saveAndFindById() { } @Test - void findFiltered_noParams_returnsAll() { + void find_noParams_returnsAll() { repository.save( TableOperationsRow.builder() .id(UUID.randomUUID().toString()) @@ -67,12 +67,12 @@ void findFiltered_noParams_returnsAll() { .createdAt(Instant.now()) .build()); - List rows = repository.findFiltered(null, null, null, null, null); + List rows = repository.find(null, null, null, null, null); assertThat(rows).hasSize(2); } @Test - void findFiltered_byStatus() { + void find_byStatus() { repository.save( TableOperationsRow.builder() .id(UUID.randomUUID().toString()) @@ -95,18 +95,18 @@ void findFiltered_byStatus() { .build()); List pending = - repository.findFiltered(null, OperationStatus.PENDING, null, null, null); + repository.find(null, OperationStatus.PENDING, null, null, null); assertThat(pending).hasSize(1); assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); List scheduled = - repository.findFiltered(null, OperationStatus.SCHEDULED, null, null, null); + repository.find(null, OperationStatus.SCHEDULED, null, null, null); assertThat(scheduled).hasSize(1); assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); } @Test - void findFiltered_byDatabaseAndTable() { + void find_byDatabaseAndTable() { repository.save( TableOperationsRow.builder() .id(UUID.randomUUID().toString()) @@ -128,8 +128,8 @@ void findFiltered_byDatabaseAndTable() { .createdAt(Instant.now()) .build()); - assertThat(repository.findFiltered(null, null, "db1", null, null)).hasSize(1); - assertThat(repository.findFiltered(null, null, "db2", "tbl2", null)).hasSize(1); - assertThat(repository.findFiltered(null, null, "db1", "tbl2", null)).isEmpty(); + assertThat(repository.find(null, null, "db1", null, null)).hasSize(1); + assertThat(repository.find(null, null, "db2", "tbl2", null)).hasSize(1); + assertThat(repository.find(null, null, "db1", "tbl2", null)).isEmpty(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index fb86762dc..a76c7155d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -23,7 +23,7 @@ class TableStatsHistoryRepositoryTest { @Autowired TableStatsHistoryRepository repository; @Test - void saveAndFindByTableUuid() { + void saveAndFind() { String tableUuid = UUID.randomUUID().toString(); Instant now = Instant.now(); @@ -31,7 +31,7 @@ void saveAndFindByTableUuid() { repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); - List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 100)); + List rows = repository.find(tableUuid, null, PageRequest.of(0, 100)); assertThat(rows).hasSize(3); // newest first @@ -40,7 +40,7 @@ void saveAndFindByTableUuid() { } @Test - void findByTableUuid_respectsLimit() { + void find_respectsLimit() { String tableUuid = UUID.randomUUID().toString(); Instant now = Instant.now(); @@ -48,13 +48,13 @@ void findByTableUuid_respectsLimit() { repository.save(buildRow(tableUuid, "db1", "tbl1", i, 0L, now.minus(i, ChronoUnit.HOURS))); } - List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 3)); + List rows = repository.find(tableUuid, null, PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @Test - void findByTableUuidSince_filtersOlderRows() { + void find_withSince_filtersOlderRows() { String tableUuid = UUID.randomUUID().toString(); Instant now = Instant.now(); Instant cutoff = now.minus(90, ChronoUnit.MINUTES); @@ -63,8 +63,7 @@ void findByTableUuidSince_filtersOlderRows() { repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); - List rows = - repository.findByTableUuidSince(tableUuid, cutoff, PageRequest.of(0, 100)); + List rows = repository.find(tableUuid, cutoff, PageRequest.of(0, 100)); // only the 2 rows within the last 90 minutes assertThat(rows).hasSize(2); @@ -72,7 +71,7 @@ void findByTableUuidSince_filtersOlderRows() { } @Test - void findByTableUuid_isolatesByTableUuid() { + void find_isolatesByTableUuid() { String uuid1 = UUID.randomUUID().toString(); String uuid2 = UUID.randomUUID().toString(); Instant now = Instant.now(); @@ -80,8 +79,8 @@ void findByTableUuid_isolatesByTableUuid() { repository.save(buildRow(uuid1, "db1", "tbl1", 10L, 0L, now)); repository.save(buildRow(uuid2, "db2", "tbl2", 20L, 0L, now)); - assertThat(repository.findByTableUuid(uuid1, PageRequest.of(0, 100))).hasSize(1); - assertThat(repository.findByTableUuid(uuid2, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.find(uuid1, null, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.find(uuid2, null, PageRequest.of(0, 100))).hasSize(1); } @Test diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index 5efb49148..a8ac1cbbb 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -83,7 +83,7 @@ void upsert_overwritesPreviousStats() { } @Test - void findFiltered_noParams_returnsAll() { + void find_noParams_returnsAll() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) @@ -107,11 +107,11 @@ void findFiltered_noParams_returnsAll() { .updatedAt(Instant.now()) .build()); - assertThat(repository.findFiltered(null, null, null)).hasSize(2); + assertThat(repository.find(null, null, null)).hasSize(2); } @Test - void findFiltered_byDatabase() { + void find_byDatabase() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) @@ -135,7 +135,7 @@ void findFiltered_byDatabase() { .updatedAt(Instant.now()) .build()); - assertThat(repository.findFiltered("db1", null, null)).hasSize(1); - assertThat(repository.findFiltered("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); + assertThat(repository.find("db1", null, null)).hasSize(1); + assertThat(repository.find("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); } } From ac1da013711ca3ac680bb24e48f3859813f099a2 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 12:09:53 -0700 Subject: [PATCH 3/3] feat(optimizer): add apps/optimizer shared module with find-only repos Shared JPA entities and repositories for optimizer apps (analyzer, scheduler). All repos expose a single find method with optional filters. Co-Authored-By: Claude Opus 4.6 --- apps/optimizer/build.gradle | 13 +++++ .../entity/TableOperationHistoryRow.java | 37 +++++++++++++ .../optimizer/entity/TableOperationRow.java | 55 +++++++++++++++++++ .../optimizer/entity/TableStatsRow.java | 53 ++++++++++++++++++ .../openhouse/optimizer/model/TableStats.java | 45 +++++++++++++++ .../TableOperationHistoryRepository.java | 32 +++++++++++ .../repository/TableOperationsRepository.java | 29 ++++++++++ .../repository/TableStatsRepository.java | 25 +++++++++ settings.gradle | 1 + 9 files changed, 290 insertions(+) create mode 100644 apps/optimizer/build.gradle create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java diff --git a/apps/optimizer/build.gradle b/apps/optimizer/build.gradle new file mode 100644 index 000000000..f14969274 --- /dev/null +++ b/apps/optimizer/build.gradle @@ -0,0 +1,13 @@ +plugins { + id 'openhouse.java-minimal-conventions' +} + +// Avoid build-directory collision with services:optimizer (same project.name 'optimizer'). +buildDir = "${rootProject.buildDir}/apps-optimizer" + +dependencies { + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' + testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' + testRuntimeOnly 'com.h2database:h2' +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java new file mode 100644 index 000000000..4e638e2e1 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java @@ -0,0 +1,37 @@ +package com.linkedin.openhouse.optimizer.entity; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** Lightweight JPA entity for reading {@code table_operations_history} rows. */ +@Entity +@Table(name = "table_operations_history") +@Getter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationHistoryRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "operation_type", nullable = false, length = 50) + private String operationType; + + @Column(name = "submitted_at", nullable = false) + private Instant submittedAt; + + @Column(name = "status", nullable = false, length = 20) + private String status; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java new file mode 100644 index 000000000..fc0104604 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.optimizer.entity; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +/** JPA entity mapping to the {@code table_operations} table in the optimizer DB. */ +@Entity +@Table(name = "table_operations") +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 255) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Column(name = "operation_type", nullable = false, length = 50) + private String operationType; + + @Column(name = "status", nullable = false, length = 20) + private String status; + + @Column(name = "created_at") + private Instant createdAt; + + @Column(name = "scheduled_at") + private Instant scheduledAt; + + @Column(name = "job_id", length = 255) + private String jobId; + + /** Plain version column — not managed by JPA optimistic locking. */ + @Column(name = "version") + private Long version; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java new file mode 100644 index 000000000..5cdf16a97 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -0,0 +1,53 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import java.util.Map; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every + * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table(name = "table_stats") +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsRow { + + @Id + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_id", nullable = false, length = 255) + private String databaseId; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Type(type = "json") + @Column(name = "table_properties", columnDefinition = "TEXT") + private Map tableProperties; + + @Column(name = "updated_at", nullable = false) + private Instant updatedAt; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java new file mode 100644 index 000000000..5e0f51468 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -0,0 +1,45 @@ +package com.linkedin.openhouse.optimizer.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Combined stats payload stored as a single JSON blob per table in {@code table_stats}. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long deletedSizeBytes; + } +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java new file mode 100644 index 000000000..f2ea9e3c8 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -0,0 +1,32 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Repository for reading {@code table_operations_history} in the Analyzer. */ +public interface TableOperationHistoryRepository + extends JpaRepository { + + /** + * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Every parameter is optional — pass {@code null} to skip that filter. + */ + @Query( + "SELECT r FROM TableOperationHistoryRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:since IS NULL OR r.submittedAt >= :since) " + + "ORDER BY r.submittedAt DESC") + List find( + @Param("operationType") String operationType, + @Param("tableUuid") String tableUuid, + @Param("status") String status, + @Param("since") Instant since, + Pageable pageable); +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java new file mode 100644 index 000000000..27424dfdc --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -0,0 +1,29 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ +public interface TableOperationsRepository extends JpaRepository { + + /** + * Return operations matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. + */ + @Query( + "SELECT r FROM TableOperationRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName)") + List find( + @Param("operationType") String operationType, + @Param("status") String status, + @Param("tableUuid") String tableUuid, + @Param("databaseName") String databaseName, + @Param("tableName") String tableName); +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java new file mode 100644 index 000000000..6effe19c2 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for {@code table_stats} rows in the optimizer DB. */ +public interface TableStatsRepository extends JpaRepository { + + /** + * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. + */ + @Query( + "SELECT r FROM TableStatsRow r " + + "WHERE (:databaseId IS NULL OR r.databaseId = :databaseId) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List find( + @Param("databaseId") String databaseId, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid); +} diff --git a/settings.gradle b/settings.gradle index cad06785e..0d64dad53 100644 --- a/settings.gradle +++ b/settings.gradle @@ -50,6 +50,7 @@ include ':services:common' include ':services:housetables' include ':services:jobs' include ':services:optimizer' +include ':apps:optimizer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5'