From 2119555c2fe7748f704dfdf245fb32921349ba52 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 3 Apr 2026 11:00:58 -0700 Subject: [PATCH 01/55] =?UTF-8?q?feat(optimizer):=20add=20data=20model=20?= =?UTF-8?q?=E2=80=94=20schema,=20entities,=20DTOs,=20converters?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces the optimizer service module with: - MySQL/H2 schema for table_operations, table_stats, table_stats_history, and table_operations_history - JPA entities with JSON column support (vladmihalcea hibernate-types) - All model/DTO/enum types: OperationType, OperationStatus, TableStats, CompleteOperationRequest, JobResult, OperationMetrics, etc. - JPA AttributeConverters for JobResult and OperationMetrics JSON columns - MapStruct mapper (OptimizerMapper) for entity→DTO conversion - Spring Boot application shell and build wiring (settings.gradle, build.gradle dockerPrereqs) No repositories, controllers, or service layer yet — those follow in subsequent PRs. Co-Authored-By: Claude Opus 4.6 --- build.gradle | 3 + services/optimizer/build.gradle | 17 ++++ .../OptimizerServiceApplication.java | 13 +++ .../optimizer/api/mapper/OptimizerMapper.java | 32 ++++++ .../api/model/CompleteOperationRequest.java | 31 ++++++ .../optimizer/api/model/JobResult.java | 25 +++++ .../api/model/OperationHistoryStatus.java | 7 ++ .../optimizer/api/model/OperationMetrics.java | 24 +++++ .../optimizer/api/model/OperationStatus.java | 21 ++++ .../optimizer/api/model/OperationType.java | 12 +++ .../api/model/TableOperationsDto.java | 40 ++++++++ .../api/model/TableOperationsHistoryDto.java | 43 ++++++++ .../optimizer/api/model/TableStats.java | 48 +++++++++ .../optimizer/api/model/TableStatsDto.java | 23 +++++ .../api/model/TableStatsHistoryDto.java | 22 +++++ .../model/UpsertTableOperationsRequest.java | 26 +++++ .../api/model/UpsertTableStatsRequest.java | 25 +++++ .../optimizer/config/JobResultConverter.java | 39 ++++++++ .../config/OperationMetricsConverter.java | 44 +++++++++ .../entity/TableOperationsHistoryRow.java | 91 +++++++++++++++++ .../optimizer/entity/TableOperationsRow.java | 99 +++++++++++++++++++ .../entity/TableStatsHistoryRow.java | 64 ++++++++++++ .../optimizer/entity/TableStatsRow.java | 57 +++++++++++ .../optimizer/entity/package-info.java | 2 + .../src/main/resources/application.properties | 20 ++++ .../main/resources/db/optimizer-schema.sql | 53 ++++++++++ .../resources/application-test.properties | 12 +++ settings.gradle | 1 + 28 files changed, 894 insertions(+) create mode 100644 services/optimizer/build.gradle create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java create mode 100644 services/optimizer/src/main/resources/application.properties create mode 100644 services/optimizer/src/main/resources/db/optimizer-schema.sql create mode 100644 services/optimizer/src/test/resources/application-test.properties diff --git a/build.gradle b/build.gradle index 4699ca592..4cfac4a5d 100644 --- a/build.gradle +++ b/build.gradle @@ -157,6 +157,7 @@ tasks.register('CopyGitHooksTask', Copy) { // tables-service.Dockerfile -> :services:tables:bootJar // housetables-service.Dockerfile -> :services:housetables:bootJar // jobs-service.Dockerfile -> :services:jobs:bootJar +// optimizer-service.Dockerfile -> :services:optimizer:bootJar // jobs-scheduler.Dockerfile -> :apps:openhouse-spark-apps_2.12:shadowJar (uber JAR) // spark-base-hadoop2.8.dockerfile -> // :integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar (uber JAR) @@ -176,6 +177,7 @@ tasks.register('dockerPrereqs') { dependsOn ':services:tables:bootJar' dependsOn ':services:housetables:bootJar' dependsOn ':services:jobs:bootJar' + dependsOn ':services:optimizer:bootJar' // Spark runtime uber JARs (shadowJar) dependsOn ':integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar' @@ -196,6 +198,7 @@ tasks.register('dockerPrereqs') { println ' build/tables/libs/tables.jar' println ' build/housetables/libs/housetables.jar' println ' build/jobs/libs/jobs.jar' + println ' build/optimizer/libs/optimizer.jar' println ' build/openhouse-spark-runtime_2.12/libs/openhouse-spark-runtime_2.12-uber.jar' println ' build/openhouse-spark-3.5-runtime_2.12/libs/openhouse-spark-3.5-runtime_2.12-uber.jar' println ' build/openhouse-spark-apps_2.12/libs/openhouse-spark-apps_2.12-uber.jar' diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle new file mode 100644 index 000000000..c05c7f9c3 --- /dev/null +++ b/services/optimizer/build.gradle @@ -0,0 +1,17 @@ +plugins { + id 'openhouse.springboot-ext-conventions' + id 'org.springframework.boot' version '2.7.8' +} + +dependencies { + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' + implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' + implementation 'mysql:mysql-connector-java:8.+' + testImplementation 'com.h2database:h2:2.2.224' + testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' +} + +test { + useJUnitPlatform() +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java new file mode 100644 index 000000000..38eb363a8 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.optimizer; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; + +/** Spring Boot entry point for the Optimizer Service. */ +@SpringBootApplication +public class OptimizerServiceApplication { + + public static void main(String[] args) { + SpringApplication.run(OptimizerServiceApplication.class, args); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java new file mode 100644 index 000000000..8c0b17462 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java @@ -0,0 +1,32 @@ +package com.linkedin.openhouse.optimizer.api.mapper; + +import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import org.mapstruct.Mapper; + +/** + * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. + * + *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. + */ +@Mapper(componentModel = "spring") +public interface OptimizerMapper { + + /** Map a {@link TableOperationsRow} to its DTO. */ + TableOperationsDto toDto(TableOperationsRow row); + + /** Map a {@link TableOperationsHistoryRow} to its DTO. */ + TableOperationsHistoryDto toDto(TableOperationsHistoryRow row); + + /** Map a {@link TableStatsRow} to its DTO. */ + TableStatsDto toDto(TableStatsRow row); + + /** Map a {@link TableStatsHistoryRow} to its DTO. */ + TableStatsHistoryDto toDto(TableStatsHistoryRow row); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java new file mode 100644 index 000000000..c26893197 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -0,0 +1,31 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Request body for {@code POST /v1/table-operations/{id}/complete}. + * + *

Reports the outcome of a completed operation. The backend looks up the operation row by {@code + * id} and writes a history entry with the operation's table metadata and the supplied result. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class CompleteOperationRequest { + + /** Outcome of the operation. */ + private OperationHistoryStatus status; + + /** Error details on failure; {@code null} on success. */ + private JobResult result; + + /** Number of orphan files deleted; set by OFD Spark app on success. */ + private Integer orphanFilesDeleted; + + /** Bytes reclaimed by orphan file deletion; set by OFD Spark app on success. */ + private Long orphanBytesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java new file mode 100644 index 000000000..74942243c --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Result payload for a completed Spark maintenance job. + * + *

Stored as JSON in the {@code result} column of {@code table_operations_history}. Both fields + * are {@code null} on success; populated on failure. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class JobResult { + + /** Human-readable error message; {@code null} if the job succeeded. */ + private String errorMessage; + + /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ + private String errorType; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java new file mode 100644 index 000000000..791d910a6 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java @@ -0,0 +1,7 @@ +package com.linkedin.openhouse.optimizer.api.model; + +/** Terminal states for a completed Spark maintenance job. */ +public enum OperationHistoryStatus { + SUCCESS, + FAILED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java new file mode 100644 index 000000000..d6f788fcc --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java @@ -0,0 +1,24 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Denormalized stats snapshot captured by the Analyzer at analysis time. + * + *

Stored as JSON in the {@code metrics} column of {@code table_operations}. These values are + * point-in-time snapshots — they record what the Analyzer saw when it recommended the operation, + * not cumulative totals. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class OperationMetrics { + + private Long tableSizeBytes; + private Integer numFilesAdded; + private Integer numFilesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java new file mode 100644 index 000000000..c97be441b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java @@ -0,0 +1,21 @@ +package com.linkedin.openhouse.optimizer.api.model; + +/** Lifecycle states for a table operation recommendation. */ +public enum OperationStatus { + + /** Recommended by the Analyzer but not yet claimed by the Scheduler. */ + PENDING, + + /** Claimed by the Scheduler; waiting for the Jobs Service to return a job ID. */ + SCHEDULING, + + /** Job submitted to the Jobs Service; the row now carries a {@code jobId}. */ + SCHEDULED, + + /** + * Marked by the Scheduler when it detects duplicate PENDING rows for the same {@code (table_uuid, + * operation_type)}. Only the most-recent PENDING row is claimed; older duplicates are CANCELED + * before the claim step. + */ + CANCELED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java new file mode 100644 index 000000000..05e4a1e7b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java @@ -0,0 +1,12 @@ +package com.linkedin.openhouse.optimizer.api.model; + +/** + * Maintenance operation types supported by the continuous optimizer. + * + *

Only {@code ORPHAN_FILES_DELETION} is currently implemented. Additional types will be added as + * they are built out. + */ +public enum OperationType { + /** Removes orphaned data files no longer referenced by table metadata. */ + ORPHAN_FILES_DELETION +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java new file mode 100644 index 000000000..5eb5eaaa6 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -0,0 +1,40 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_operations} — Analyzer recommendations read by the Scheduler. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationsDto { + + /** Client-generated UUID identifying this specific operation recommendation. */ + private String id; + + /** Stable table identity from the Tables Service. */ + private String tableUuid; + + private String databaseName; + private String tableName; + private OperationType operationType; + + /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ + private OperationStatus status; + + /** Server-set when the row is first created by the Analyzer. */ + private Instant createdAt; + + /** Set by the Scheduler when claiming; {@code null} while PENDING. */ + private Instant scheduledAt; + + /** Job ID returned by the Jobs Service after successful submission. */ + private String jobId; + + /** Denormalized stats snapshot captured at analysis time. */ + private OperationMetrics metrics; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java new file mode 100644 index 000000000..7dca34271 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -0,0 +1,43 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_operations_history} — append-only Spark job results. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationsHistoryDto { + + /** Same UUID as the originating {@code table_operations.id}; supplied by the caller. */ + private String id; + + /** Stable table identity from the Tables Service. */ + private String tableUuid; + + private String databaseName; + private String tableName; + private OperationType operationType; + + /** When the Spark job was submitted / ran. */ + private Instant submittedAt; + + /** {@code SUCCESS} or {@code FAILED}. */ + private OperationHistoryStatus status; + + /** Spark job ID. */ + private String jobId; + + /** Job result payload; both fields null on success. */ + private JobResult result; + + /** Number of orphan files deleted; null for non-OFD operations or before completion. */ + private Integer orphanFilesDeleted; + + /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ + private Long orphanBytesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java new file mode 100644 index 000000000..cb77d994f --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -0,0 +1,48 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Combined stats payload stored as a single JSON blob per table. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters; accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long deletedSizeBytes; + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java new file mode 100644 index 000000000..1663d5ab0 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -0,0 +1,23 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_stats} — used for response payloads. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsDto { + + private String tableUuid; + private String databaseId; + private String tableName; + private TableStats stats; + private Map tableProperties; + private Instant updatedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java new file mode 100644 index 000000000..142f00245 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -0,0 +1,22 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_stats_history} — used for response payloads. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsHistoryDto { + + private Long id; + private String tableUuid; + private String databaseId; + private String tableName; + private TableStats stats; + private Instant recordedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java new file mode 100644 index 000000000..19dd1baac --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java @@ -0,0 +1,26 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * PUT request body for {@code /v1/table-operations/{id}}. + * + *

The Analyzer supplies the operation {@code id} (client-generated UUID) in the path and all + * table-identifying fields in this body. The service upserts by {@code id}: creates on first call, + * updates {@code metrics} on subsequent calls with the same {@code id}. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class UpsertTableOperationsRequest { + + private String tableUuid; + private String databaseName; + private String tableName; + private OperationType operationType; + private OperationMetrics metrics; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java new file mode 100644 index 000000000..3214a85a6 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Request body for {@code PUT /v1/table-stats/{tableUuid}}. + * + *

{@code tableUuid} comes from the path variable. {@code databaseId} and {@code tableName} are + * denormalized display columns carried in the body. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class UpsertTableStatsRequest { + + private String databaseId; + private String tableName; + private TableStats stats; + private Map tableProperties; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java new file mode 100644 index 000000000..4c9bfbe76 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java @@ -0,0 +1,39 @@ +package com.linkedin.openhouse.optimizer.config; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import java.io.IOException; +import javax.persistence.AttributeConverter; +import javax.persistence.Converter; + +/** JPA {@link AttributeConverter} that serializes {@link JobResult} to/from a JSON string. */ +@Converter +public class JobResultConverter implements AttributeConverter { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @Override + public String convertToDatabaseColumn(JobResult attribute) { + if (attribute == null) { + return null; + } + try { + return OBJECT_MAPPER.writeValueAsString(attribute); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize JobResult to JSON", e); + } + } + + @Override + public JobResult convertToEntityAttribute(String dbData) { + if (dbData == null) { + return null; + } + try { + return OBJECT_MAPPER.readValue(dbData, JobResult.class); + } catch (IOException e) { + throw new IllegalStateException("Failed to deserialize JobResult from JSON: " + dbData, e); + } + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java new file mode 100644 index 000000000..27f0882f5 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java @@ -0,0 +1,44 @@ +package com.linkedin.openhouse.optimizer.config; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; +import java.io.IOException; +import javax.persistence.AttributeConverter; +import javax.persistence.Converter; + +/** + * JPA {@link AttributeConverter} that serializes {@link OperationMetrics} to/from a JSON string. + */ +@Converter +public class OperationMetricsConverter implements AttributeConverter { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @Override + public String convertToDatabaseColumn(OperationMetrics attribute) { + // Null metrics are valid for PENDING operations that have not yet produced output. + if (attribute == null) { + return null; + } + try { + return OBJECT_MAPPER.writeValueAsString(attribute); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize OperationMetrics to JSON", e); + } + } + + @Override + public OperationMetrics convertToEntityAttribute(String dbData) { + // Null is stored for PENDING rows; return null so the entity reflects that state. + if (dbData == null) { + return null; + } + try { + return OBJECT_MAPPER.readValue(dbData, OperationMetrics.class); + } catch (IOException e) { + throw new IllegalStateException( + "Failed to deserialize OperationMetrics from JSON: " + dbData, e); + } + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java new file mode 100644 index 000000000..6a47b5022 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -0,0 +1,91 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.config.JobResultConverter; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Convert; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Append-only record of a completed Spark maintenance job. + * + *

Written by the Spark app after each table's operation finishes. The {@code id} is the same + * UUID as the originating {@code table_operations.id}, tying each history entry directly back to + * the specific operation cycle that produced it. Multiple runs of the same operation on the same + * table produce multiple rows (each cycle gets a new UUID from the Analyzer). + */ +@Entity +@Table( + name = "table_operations_history", + indexes = { + @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), + @Index(name = "idx_op_type_hist", columnList = "operation_type"), + @Index(name = "idx_submitted_at", columnList = "submitted_at"), + @Index(name = "idx_status_hist", columnList = "status"), + @Index(name = "idx_job_id", columnList = "job_id") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableOperationsHistoryRow { + + /** Same UUID as the originating {@code table_operations.id}. Set by the caller; not generated. */ + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 255) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Enumerated(EnumType.STRING) + @Column(name = "operation_type", nullable = false, length = 50) + private OperationType operationType; + + /** When the Spark job was submitted / ran, as reported by the job itself. */ + @Column(name = "submitted_at", nullable = false) + private Instant submittedAt; + + /** {@code SUCCESS} or {@code FAILED}. */ + @Enumerated(EnumType.STRING) + @Column(name = "status", nullable = false, length = 20) + private OperationHistoryStatus status; + + /** Spark job ID; indexed for job → result lookups. */ + @Column(name = "job_id", length = 255) + private String jobId; + + /** Job result: error details on failure, both fields null on success. */ + @Convert(converter = JobResultConverter.class) + @Column(name = "result") + private JobResult result; + + /** Number of orphan files deleted by the Spark job; null for non-OFD operations. */ + @Column(name = "orphan_files_deleted") + private Integer orphanFilesDeleted; + + /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ + @Column(name = "orphan_bytes_deleted") + private Long orphanBytesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java new file mode 100644 index 000000000..9d835aa20 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -0,0 +1,99 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.config.OperationMetricsConverter; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Convert; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * JPA entity representing an Analyzer recommendation for a table maintenance operation. + * + *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row + * when it first recommends an operation for a table, or when re-recommending after a prior terminal + * state (SUCCESS/FAILED). Old terminal rows accumulate — they serve as implicit history. {@code + * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). + * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, + * operation_type)} at a time. + */ +@Entity +@Table( + name = "table_operations", + indexes = { + @Index(name = "idx_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_op_type", columnList = "operation_type"), + @Index(name = "idx_status", columnList = "status"), + @Index(name = "idx_created_at", columnList = "created_at"), + @Index(name = "idx_scheduled_at", columnList = "scheduled_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableOperationsRow { + + /** Client-generated UUID identifying this specific operation recommendation. */ + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 255) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Enumerated(EnumType.STRING) + @Column(name = "operation_type", nullable = false, length = 50) + private OperationType operationType; + + @Enumerated(EnumType.STRING) + @Column(name = "status", nullable = false, length = 20) + private OperationStatus status; + + /** When the Analyzer first created this row. Set by the service on insert; never updated. */ + @Column(name = "created_at", nullable = false) + private Instant createdAt; + + /** Set when the operation is claimed; {@code null} while {@code PENDING}. */ + @Column(name = "scheduled_at") + private Instant scheduledAt; + + /** Job ID returned by the Jobs Service after successful submission. */ + @Column(name = "job_id", length = 255) + private String jobId; + + /** + * Manual optimistic lock for the Scheduler claim. Incremented by the raw {@code claimOperation} + * UPDATE query; must NOT use JPA {@code @Version} since the claim bypasses JPA entity management. + */ + @Column(name = "version") + private Long version; + + /** + * Denormalized stats snapshot captured at analysis time: table size, snapshot count, and file + * counts as of the moment the Analyzer ran. + */ + @Convert(converter = OperationMetricsConverter.class) + @Column(name = "metrics") + private OperationMetrics metrics; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java new file mode 100644 index 000000000..85d97a5eb --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -0,0 +1,64 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * Append-only record of per-commit stats reported by the Tables Service. + * + *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can + * query this table to reconstruct change rates over arbitrary time windows. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table( + name = "table_stats_history", + indexes = { + @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsHistoryRow { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "id", nullable = false) + private Long id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_id", nullable = false, length = 255) + private String databaseId; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Column(name = "recorded_at", nullable = false) + private Instant recordedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java new file mode 100644 index 000000000..71d6a9421 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -0,0 +1,57 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import java.util.Map; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * JPA entity representing a per-table stats snapshot in the optimizer DB. + * + *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA + * to enumerate tables and check scheduling eligibility. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table(name = "table_stats") +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsRow { + + @Id + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_id", nullable = false, length = 255) + private String databaseId; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Type(type = "json") + @Column(name = "table_properties", columnDefinition = "TEXT") + private Map tableProperties; + + /** Set on every upsert. Used for stats pipeline staleness monitoring. */ + @Column(name = "updated_at", nullable = false) + private Instant updatedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java new file mode 100644 index 000000000..7c0ca1f67 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java @@ -0,0 +1,2 @@ +/** JPA entities for the optimizer service. */ +package com.linkedin.openhouse.optimizer.entity; diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties new file mode 100644 index 000000000..c6c3f8437 --- /dev/null +++ b/services/optimizer/src/main/resources/application.properties @@ -0,0 +1,20 @@ +spring.application.name=openhouse-optimizer-service +server.port=8080 + +spring.jpa.hibernate.ddl-auto=none +spring.sql.init.mode=always +spring.jpa.defer-datasource-initialization=true +spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql + +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQL8Dialect +spring.jpa.properties.hibernate.show_sql=false +spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl + +spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver +spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:mysql://localhost:3306/oh_db} +spring.datasource.username=${OPTIMIZER_DB_USERNAME:oh_user} +spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} +spring.datasource.hikari.maximum-pool-size=20 + +management.endpoints.web.exposure.include=health,prometheus +management.endpoint.health.enabled=true diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql new file mode 100644 index 000000000..53062c5ad --- /dev/null +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -0,0 +1,53 @@ +-- Optimizer Service Schema +-- Compatible with MySQL (production) and H2 in MySQL mode (tests). +CREATE TABLE IF NOT EXISTS table_operations ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + status VARCHAR(20) NOT NULL, + created_at TIMESTAMP(6) NOT NULL, + scheduled_at TIMESTAMP(6), + job_id VARCHAR(255), + version BIGINT, + metrics TEXT, + PRIMARY KEY (id) +); + +CREATE TABLE IF NOT EXISTS table_stats ( + table_uuid VARCHAR(36) NOT NULL, + database_id VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + stats TEXT, + table_properties TEXT, + updated_at TIMESTAMP(6) NOT NULL, + PRIMARY KEY (table_uuid) +); + +CREATE TABLE IF NOT EXISTS table_stats_history ( + id BIGINT NOT NULL AUTO_INCREMENT, + table_uuid VARCHAR(36) NOT NULL, + database_id VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + stats TEXT, + recorded_at TIMESTAMP(6) NOT NULL, + PRIMARY KEY (id), + INDEX idx_tsh_table_uuid (table_uuid), + INDEX idx_tsh_recorded_at (recorded_at) +); + +CREATE TABLE IF NOT EXISTS table_operations_history ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + submitted_at TIMESTAMP(6) NOT NULL, + status VARCHAR(20) NOT NULL, + job_id VARCHAR(255), + result TEXT, + orphan_files_deleted INT, + orphan_bytes_deleted BIGINT, + PRIMARY KEY (id) +); diff --git a/services/optimizer/src/test/resources/application-test.properties b/services/optimizer/src/test/resources/application-test.properties new file mode 100644 index 000000000..97b7841dc --- /dev/null +++ b/services/optimizer/src/test/resources/application-test.properties @@ -0,0 +1,12 @@ +spring.datasource.url=jdbc:h2:mem:optimizer_test;MODE=MySQL;DATABASE_TO_LOWER=TRUE;DB_CLOSE_DELAY=-1 +spring.datasource.driver-class-name=org.h2.Driver +spring.datasource.username=sa +spring.datasource.password= + +spring.jpa.hibernate.ddl-auto=none +spring.sql.init.mode=always +spring.jpa.defer-datasource-initialization=true +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect +spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl + +spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql diff --git a/settings.gradle b/settings.gradle index 035e54349..cad06785e 100644 --- a/settings.gradle +++ b/settings.gradle @@ -49,6 +49,7 @@ include ':libs:datalayout' include ':services:common' include ':services:housetables' include ':services:jobs' +include ':services:optimizer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From 3c93d52f21ce82cc01ae37fef8ca5c1dba2522e1 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 3 Apr 2026 11:35:45 -0700 Subject: [PATCH 02/55] fix: address PR review feedback on optimizer data model - Remove OperationMetrics class and converter; stats are read directly from table_stats instead of duplicating into operations - Remove orphanFilesDeleted/orphanBytesDeleted from history entity, DTO, and schema; operation-specific data belongs in the result JSON - Add addedSizeBytes to CommitDelta for tracking write volume - Fix OperationType javadoc to describe current state, not roadmap - Fix TableOperationsHistoryRow javadoc: written on operation complete, not by Spark app directly - Add field comments to all DTOs and request objects Co-Authored-By: Claude Opus 4.6 --- .../optimizer/api/model/OperationMetrics.java | 24 ---------- .../optimizer/api/model/OperationType.java | 7 +-- .../api/model/TableOperationsDto.java | 9 +++- .../api/model/TableOperationsHistoryDto.java | 12 ++--- .../optimizer/api/model/TableStats.java | 1 + .../optimizer/api/model/TableStatsDto.java | 11 +++++ .../api/model/TableStatsHistoryDto.java | 11 +++++ .../model/UpsertTableOperationsRequest.java | 11 +++-- .../api/model/UpsertTableStatsRequest.java | 7 +++ .../config/OperationMetricsConverter.java | 44 ------------------- .../entity/TableOperationsHistoryRow.java | 20 +++------ .../optimizer/entity/TableOperationsRow.java | 10 ++--- .../main/resources/db/optimizer-schema.sql | 2 - 13 files changed, 58 insertions(+), 111 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java deleted file mode 100644 index d6f788fcc..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Denormalized stats snapshot captured by the Analyzer at analysis time. - * - *

Stored as JSON in the {@code metrics} column of {@code table_operations}. These values are - * point-in-time snapshots — they record what the Analyzer saw when it recommended the operation, - * not cumulative totals. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class OperationMetrics { - - private Long tableSizeBytes; - private Integer numFilesAdded; - private Integer numFilesDeleted; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java index 05e4a1e7b..8507bae12 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java @@ -1,11 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; -/** - * Maintenance operation types supported by the continuous optimizer. - * - *

Only {@code ORPHAN_FILES_DELETION} is currently implemented. Additional types will be added as - * they are built out. - */ +/** Maintenance operation types supported by the continuous optimizer. */ public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index 5eb5eaaa6..9c33d8907 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -19,8 +19,13 @@ public class TableOperationsDto { /** Stable table identity from the Tables Service. */ private String tableUuid; + /** Denormalized database name for display; not part of the primary key. */ private String databaseName; + + /** Denormalized table name for display; not part of the primary key. */ private String tableName; + + /** The type of maintenance operation (e.g. ORPHAN_FILES_DELETION). */ private OperationType operationType; /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ @@ -35,6 +40,6 @@ public class TableOperationsDto { /** Job ID returned by the Jobs Service after successful submission. */ private String jobId; - /** Denormalized stats snapshot captured at analysis time. */ - private OperationMetrics metrics; + /** Reserved for future per-operation metadata; currently unused. */ + private String metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 7dca34271..efc9bebbb 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -6,7 +6,7 @@ import lombok.Data; import lombok.NoArgsConstructor; -/** DTO for {@code table_operations_history} — append-only Spark job results. */ +/** DTO for {@code table_operations_history} — append-only operation results. */ @Data @Builder @NoArgsConstructor @@ -23,21 +23,15 @@ public class TableOperationsHistoryDto { private String tableName; private OperationType operationType; - /** When the Spark job was submitted / ran. */ + /** When the operation completed, as recorded by the complete endpoint. */ private Instant submittedAt; /** {@code SUCCESS} or {@code FAILED}. */ private OperationHistoryStatus status; - /** Spark job ID. */ + /** Job ID from the Jobs Service. */ private String jobId; /** Job result payload; both fields null on success. */ private JobResult result; - - /** Number of orphan files deleted; null for non-OFD operations or before completion. */ - private Integer orphanFilesDeleted; - - /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ - private Long orphanBytesDeleted; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index cb77d994f..51aa8a712 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -43,6 +43,7 @@ public static class SnapshotMetrics { public static class CommitDelta { private Long numFilesAdded; private Long numFilesDeleted; + private Long addedSizeBytes; private Long deletedSizeBytes; } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 1663d5ab0..a668af434 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -14,10 +14,21 @@ @AllArgsConstructor public class TableStatsDto { + /** Stable Iceberg table UUID. Primary key of the stats row. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseId; + + /** Denormalized table name for display. */ private String tableName; + + /** Combined snapshot + delta stats payload, stored as JSON. */ private TableStats stats; + + /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; + + /** When this row was last written. Used for staleness monitoring. */ private Instant updatedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 142f00245..0604e07de 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -13,10 +13,21 @@ @AllArgsConstructor public class TableStatsHistoryDto { + /** Auto-increment primary key. */ private Long id; + + /** Stable Iceberg table UUID. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseId; + + /** Denormalized table name for display. */ private String tableName; + + /** Snapshot + delta stats from this commit event. */ private TableStats stats; + + /** When this history row was recorded. */ private Instant recordedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java index 19dd1baac..21174c337 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java @@ -9,8 +9,7 @@ * PUT request body for {@code /v1/table-operations/{id}}. * *

The Analyzer supplies the operation {@code id} (client-generated UUID) in the path and all - * table-identifying fields in this body. The service upserts by {@code id}: creates on first call, - * updates {@code metrics} on subsequent calls with the same {@code id}. + * table-identifying fields in this body. The service creates the row on first call. */ @Data @Builder @@ -18,9 +17,15 @@ @AllArgsConstructor public class UpsertTableOperationsRequest { + /** Stable Iceberg table UUID identifying the target table. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseName; + + /** Denormalized table name for display. */ private String tableName; + + /** The type of maintenance operation to create. */ private OperationType operationType; - private OperationMetrics metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 3214a85a6..721c3deaf 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -18,8 +18,15 @@ @AllArgsConstructor public class UpsertTableStatsRequest { + /** Denormalized database name for display. */ private String databaseId; + + /** Denormalized table name for display. */ private String tableName; + + /** Combined snapshot + delta stats payload from this commit. */ private TableStats stats; + + /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java deleted file mode 100644 index 27f0882f5..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.linkedin.openhouse.optimizer.config; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; -import java.io.IOException; -import javax.persistence.AttributeConverter; -import javax.persistence.Converter; - -/** - * JPA {@link AttributeConverter} that serializes {@link OperationMetrics} to/from a JSON string. - */ -@Converter -public class OperationMetricsConverter implements AttributeConverter { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @Override - public String convertToDatabaseColumn(OperationMetrics attribute) { - // Null metrics are valid for PENDING operations that have not yet produced output. - if (attribute == null) { - return null; - } - try { - return OBJECT_MAPPER.writeValueAsString(attribute); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize OperationMetrics to JSON", e); - } - } - - @Override - public OperationMetrics convertToEntityAttribute(String dbData) { - // Null is stored for PENDING rows; return null so the entity reflects that state. - if (dbData == null) { - return null; - } - try { - return OBJECT_MAPPER.readValue(dbData, OperationMetrics.class); - } catch (IOException e) { - throw new IllegalStateException( - "Failed to deserialize OperationMetrics from JSON: " + dbData, e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index 6a47b5022..e7493024c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -21,12 +21,12 @@ import lombok.NoArgsConstructor; /** - * Append-only record of a completed Spark maintenance job. + * Append-only record of a completed maintenance operation. * - *

Written by the Spark app after each table's operation finishes. The {@code id} is the same - * UUID as the originating {@code table_operations.id}, tying each history entry directly back to - * the specific operation cycle that produced it. Multiple runs of the same operation on the same - * table produce multiple rows (each cycle gets a new UUID from the Analyzer). + *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the + * originating {@code table_operations.id}, tying each history entry back to the operation cycle + * that produced it. Multiple runs of the same operation on the same table produce multiple rows + * (each cycle gets a new UUID from the Analyzer). */ @Entity @Table( @@ -63,7 +63,7 @@ public class TableOperationsHistoryRow { @Column(name = "operation_type", nullable = false, length = 50) private OperationType operationType; - /** When the Spark job was submitted / ran, as reported by the job itself. */ + /** When the operation completed, as recorded by the complete endpoint. */ @Column(name = "submitted_at", nullable = false) private Instant submittedAt; @@ -80,12 +80,4 @@ public class TableOperationsHistoryRow { @Convert(converter = JobResultConverter.class) @Column(name = "result") private JobResult result; - - /** Number of orphan files deleted by the Spark job; null for non-OFD operations. */ - @Column(name = "orphan_files_deleted") - private Integer orphanFilesDeleted; - - /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ - @Column(name = "orphan_bytes_deleted") - private Long orphanBytesDeleted; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index 9d835aa20..e5493b510 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -1,12 +1,9 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.config.OperationMetricsConverter; import java.time.Instant; import javax.persistence.Column; -import javax.persistence.Convert; import javax.persistence.Entity; import javax.persistence.EnumType; import javax.persistence.Enumerated; @@ -90,10 +87,9 @@ public class TableOperationsRow { private Long version; /** - * Denormalized stats snapshot captured at analysis time: table size, snapshot count, and file - * counts as of the moment the Analyzer ran. + * Reserved for future per-operation metadata. Stored as JSON text; currently unused. The Analyzer + * reads stats directly from {@code table_stats} instead of duplicating them here. */ - @Convert(converter = OperationMetricsConverter.class) @Column(name = "metrics") - private OperationMetrics metrics; + private String metrics; } diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 53062c5ad..098380e7f 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -47,7 +47,5 @@ CREATE TABLE IF NOT EXISTS table_operations_history ( status VARCHAR(20) NOT NULL, job_id VARCHAR(255), result TEXT, - orphan_files_deleted INT, - orphan_bytes_deleted BIGINT, PRIMARY KEY (id) ); From d419eb31f0449b5893739391047cf1af013cc6e3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 10:57:51 -0700 Subject: [PATCH 03/55] feat(optimizer): add repositories and repository tests Spring Data JPA repositories for all four optimizer tables with filtered query support. Includes tests exercising save/find, filtered queries, upsert semantics, and append-only history. Co-Authored-By: Claude Opus 4.6 --- .../TableOperationsHistoryRepository.java | 60 ++++++ .../repository/TableOperationsRepository.java | 33 +++ .../TableStatsHistoryRepository.java | 41 ++++ .../repository/TableStatsRepository.java | 25 +++ .../OptimizerServiceContextTest.java | 19 ++ .../TableOperationsHistoryRepositoryTest.java | 189 ++++++++++++++++++ .../TableOperationsRepositoryTest.java | 135 +++++++++++++ .../TableStatsHistoryRepositoryTest.java | 127 ++++++++++++ .../repository/TableStatsRepositoryTest.java | 141 +++++++++++++ 9 files changed, 770 insertions(+) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java new file mode 100644 index 000000000..2ba5bdf7a --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -0,0 +1,60 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; +import org.springframework.stereotype.Repository; + +/** + * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is auto-increment {@code id}. + */ +@Repository +public interface TableOperationsHistoryRepository + extends JpaRepository { + + /** + * Return the most recent history rows for a table UUID, newest first, up to {@code limit} rows. + * + * @param tableUuid the stable table UUID + * @param limit maximum number of rows to return + * @return history rows ordered by {@code submitted_at} descending + */ + @Query( + value = + "SELECT * FROM table_operations_history " + + "WHERE table_uuid = :tableUuid " + + "ORDER BY submitted_at DESC LIMIT :limit", + nativeQuery = true) + List find( + @Param("tableUuid") String tableUuid, @Param("limit") int limit); + + /** + * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Every parameter is optional — pass {@code null} to skip that filter. + */ + @Query( + "SELECT r FROM TableOperationsHistoryRow r " + + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:since IS NULL OR r.submittedAt >= :since) " + + "AND (:until IS NULL OR r.submittedAt <= :until) " + + "ORDER BY r.submittedAt DESC") + List findFiltered( + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid, + @Param("operationType") OperationType operationType, + @Param("status") OperationHistoryStatus status, + @Param("since") Instant since, + @Param("until") Instant until, + Pageable pageable); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java new file mode 100644 index 000000000..69476991f --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -0,0 +1,33 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; +import org.springframework.stereotype.Repository; + +/** Repository for {@link TableOperationsRow}. PK is the client-generated UUID {@code id}. */ +@Repository +public interface TableOperationsRepository extends JpaRepository { + + /** + * Return operations matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. No filters returns all rows. + */ + @Query( + "SELECT r FROM TableOperationsRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List findFiltered( + @Param("operationType") OperationType operationType, + @Param("status") OperationStatus status, + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java new file mode 100644 index 000000000..c6ec3befd --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -0,0 +1,41 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Append-only repository for per-commit stats history rows. */ +public interface TableStatsHistoryRepository extends JpaRepository { + + /** + * Return history rows for a table, newest first. + * + * @param tableUuid the stable table UUID + * @param pageable use {@code PageRequest.of(0, limit)} to cap results + */ + @Query( + "SELECT r FROM TableStatsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "ORDER BY r.recordedAt DESC") + List findByTableUuid( + @Param("tableUuid") String tableUuid, Pageable pageable); + + /** + * Return history rows for a table recorded at or after {@code since}, newest first. + * + * @param tableUuid the stable table UUID + * @param since inclusive lower bound on recorded_at + * @param pageable use {@code PageRequest.of(0, limit)} to cap results + */ + @Query( + "SELECT r FROM TableStatsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "AND r.recordedAt >= :since " + + "ORDER BY r.recordedAt DESC") + List findByTableUuidSince( + @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java new file mode 100644 index 000000000..6c071cf5b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for reading and writing {@code table_stats} rows. */ +public interface TableStatsRepository extends JpaRepository { + + /** + * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. No filters returns all rows. + */ + @Query( + "SELECT r FROM TableStatsRow r " + + "WHERE (:databaseId IS NULL OR r.databaseId = :databaseId) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List findFiltered( + @Param("databaseId") String databaseId, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid); +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java new file mode 100644 index 000000000..abb89ec42 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java @@ -0,0 +1,19 @@ +package com.linkedin.openhouse.optimizer; + +import org.junit.jupiter.api.Test; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; + +/** + * Validates that the Spring application context loads successfully against the H2 schema. This test + * exercises schema-SQL-init, JPA entity scanning, and repository wiring. + */ +@SpringBootTest +@ActiveProfiles("test") +class OptimizerServiceContextTest { + + @Test + void contextLoads() { + // Context load is the assertion — no additional assertions needed. + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java new file mode 100644 index 000000000..9bde34334 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -0,0 +1,189 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import java.time.Instant; +import java.util.List; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableOperationsHistoryRepositoryTest { + + @Autowired TableOperationsHistoryRepository repository; + + @Test + void appendAndFindByTableUuid() { + Instant t1 = Instant.parse("2024-01-01T10:00:00Z"); + Instant t2 = Instant.parse("2024-01-02T10:00:00Z"); + String tableUuid = UUID.randomUUID().toString(); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(t1) + .status(OperationHistoryStatus.SUCCESS) + .jobId("job-001") + .build()); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(t2) + .status(OperationHistoryStatus.FAILED) + .jobId("job-002") + .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) + .build()); + + List rows = repository.find(tableUuid, 10); + + assertThat(rows).hasSize(2); + // Newest first + assertThat(rows.get(0).getJobId()).isEqualTo("job-002"); + assertThat(rows.get(1).getJobId()).isEqualTo("job-001"); + } + + @Test + void appendIsNonDestructive_multipleRunsRetained() { + Instant now = Instant.now(); + String tableUuid = UUID.randomUUID().toString(); + for (int i = 0; i < 3; i++) { + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now.plusSeconds(i)) + .status(OperationHistoryStatus.SUCCESS) + .build()); + } + + List rows = repository.find(tableUuid, 10); + assertThat(rows).hasSize(3); + } + + @Test + void find_respectsLimit() { + Instant now = Instant.now(); + String tableUuid = UUID.randomUUID().toString(); + for (int i = 0; i < 5; i++) { + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl3") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now.plusSeconds(i)) + .status(OperationHistoryStatus.SUCCESS) + .build()); + } + + List rows = repository.find(tableUuid, 3); + assertThat(rows).hasSize(3); + } + + @Test + void findFiltered_noParams_returnsAll() { + Instant now = Instant.now(); + String uuid1 = UUID.randomUUID().toString(); + String uuid2 = UUID.randomUUID().toString(); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(uuid1) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now) + .status(OperationHistoryStatus.SUCCESS) + .build()); + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(uuid2) + .databaseName("db2") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now.plusSeconds(1)) + .status(OperationHistoryStatus.FAILED) + .build()); + + List rows = + repository.findFiltered(null, null, null, null, null, null, null, PageRequest.of(0, 100)); + assertThat(rows).hasSize(2); + // Newest first + assertThat(rows.get(0).getStatus()).isEqualTo(OperationHistoryStatus.FAILED); + } + + @Test + void findFiltered_byStatusAndTimeWindow() { + Instant old = Instant.parse("2024-01-01T00:00:00Z"); + Instant recent = Instant.parse("2024-06-01T00:00:00Z"); + String tableUuid = UUID.randomUUID().toString(); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(old) + .status(OperationHistoryStatus.SUCCESS) + .build()); + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(recent) + .status(OperationHistoryStatus.FAILED) + .build()); + + // Filter by status + List failed = + repository.findFiltered( + null, + null, + null, + null, + OperationHistoryStatus.FAILED, + null, + null, + PageRequest.of(0, 100)); + assertThat(failed).hasSize(1); + assertThat(failed.get(0).getSubmittedAt()).isEqualTo(recent); + + // Filter by time window + Instant cutoff = Instant.parse("2024-03-01T00:00:00Z"); + List afterCutoff = + repository.findFiltered(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); + assertThat(afterCutoff).hasSize(1); + assertThat(afterCutoff.get(0).getSubmittedAt()).isEqualTo(recent); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java new file mode 100644 index 000000000..d7b8ee0b8 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -0,0 +1,135 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableOperationsRepositoryTest { + + @Autowired TableOperationsRepository repository; + + @Test + void saveAndFindById() { + String id = UUID.randomUUID().toString(); + + TableOperationsRow row = + TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + + repository.save(row); + + Optional found = repository.findById(id); + assertThat(found).isPresent(); + assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING); + } + + @Test + void findFiltered_noParams_returnsAll() { + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .build()); + + List rows = repository.findFiltered(null, null, null, null, null); + assertThat(rows).hasSize(2); + } + + @Test + void findFiltered_byStatus() { + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .build()); + + List pending = + repository.findFiltered(null, OperationStatus.PENDING, null, null, null); + assertThat(pending).hasSize(1); + assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); + + List scheduled = + repository.findFiltered(null, OperationStatus.SCHEDULED, null, null, null); + assertThat(scheduled).hasSize(1); + assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); + } + + @Test + void findFiltered_byDatabaseAndTable() { + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db2") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + + assertThat(repository.findFiltered(null, null, "db1", null, null)).hasSize(1); + assertThat(repository.findFiltered(null, null, "db2", "tbl2", null)).hasSize(1); + assertThat(repository.findFiltered(null, null, "db1", "tbl2", null)).isEmpty(); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java new file mode 100644 index 000000000..fb86762dc --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -0,0 +1,127 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.List; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableStatsHistoryRepositoryTest { + + @Autowired TableStatsHistoryRepository repository; + + @Test + void saveAndFindByTableUuid() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + repository.save(buildRow(tableUuid, "db1", "tbl1", 10L, 2L, now.minus(2, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); + + List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 100)); + + assertThat(rows).hasSize(3); + // newest first + assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); + assertThat(rows.get(2).getStats().getDelta().getNumFilesAdded()).isEqualTo(10L); + } + + @Test + void findByTableUuid_respectsLimit() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + for (int i = 0; i < 5; i++) { + repository.save(buildRow(tableUuid, "db1", "tbl1", i, 0L, now.minus(i, ChronoUnit.HOURS))); + } + + List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 3)); + + assertThat(rows).hasSize(3); + } + + @Test + void findByTableUuidSince_filtersOlderRows() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + Instant cutoff = now.minus(90, ChronoUnit.MINUTES); + + repository.save(buildRow(tableUuid, "db1", "tbl1", 10L, 2L, now.minus(2, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); + + List rows = + repository.findByTableUuidSince(tableUuid, cutoff, PageRequest.of(0, 100)); + + // only the 2 rows within the last 90 minutes + assertThat(rows).hasSize(2); + assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); + } + + @Test + void findByTableUuid_isolatesByTableUuid() { + String uuid1 = UUID.randomUUID().toString(); + String uuid2 = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + repository.save(buildRow(uuid1, "db1", "tbl1", 10L, 0L, now)); + repository.save(buildRow(uuid2, "db2", "tbl2", 20L, 0L, now)); + + assertThat(repository.findByTableUuid(uuid1, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.findByTableUuid(uuid2, PageRequest.of(0, 100))).hasSize(1); + } + + @Test + void autoIncrementId() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + TableStatsHistoryRow row1 = repository.save(buildRow(tableUuid, "db1", "tbl1", 1L, 0L, now)); + TableStatsHistoryRow row2 = repository.save(buildRow(tableUuid, "db1", "tbl1", 2L, 0L, now)); + + assertThat(row1.getId()).isNotNull(); + assertThat(row2.getId()).isNotNull(); + assertThat(row2.getId()).isGreaterThan(row1.getId()); + } + + private static TableStatsHistoryRow buildRow( + String tableUuid, + String databaseId, + String tableName, + long numFilesAdded, + long numFilesDeleted, + Instant recordedAt) { + return TableStatsHistoryRow.builder() + .tableUuid(tableUuid) + .databaseId(databaseId) + .tableName(tableName) + .stats( + TableStats.builder() + .snapshot( + TableStats.SnapshotMetrics.builder() + .clusterId("cl1") + .tableSizeBytes(1024L) + .build()) + .delta( + TableStats.CommitDelta.builder() + .numFilesAdded(numFilesAdded) + .numFilesDeleted(numFilesDeleted) + .build()) + .build()) + .recordedAt(recordedAt) + .build(); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java new file mode 100644 index 000000000..5efb49148 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -0,0 +1,141 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.time.Instant; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableStatsRepositoryTest { + + @Autowired TableStatsRepository repository; + + @Test + void saveAndFindById() { + String tableUuid = UUID.randomUUID().toString(); + TableStats stats = + TableStats.builder() + .snapshot( + TableStats.SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build()) + .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(1L).build()) + .build(); + + repository.save( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId("db1") + .tableName("tbl1") + .stats(stats) + .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) + .updatedAt(Instant.now()) + .build()); + + Optional found = repository.findById(tableUuid); + assertThat(found).isPresent(); + assertThat(found.get().getDatabaseId()).isEqualTo("db1"); + assertThat(found.get().getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(found.get().getTableProperties()) + .containsEntry("maintenance.optimizer.ofd.enabled", "true"); + } + + @Test + void upsert_overwritesPreviousStats() { + String tableUuid = UUID.randomUUID().toString(); + + repository.save( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + repository.save( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + assertThat(repository.findAll()).hasSize(1); + assertThat(repository.findById(tableUuid).get().getStats().getSnapshot().getTableSizeBytes()) + .isEqualTo(200L); + } + + @Test + void findFiltered_noParams_returnsAll() { + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db2") + .tableName("tbl2") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + assertThat(repository.findFiltered(null, null, null)).hasSize(2); + } + + @Test + void findFiltered_byDatabase() { + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db2") + .tableName("tbl2") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + assertThat(repository.findFiltered("db1", null, null)).hasSize(1); + assertThat(repository.findFiltered("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); + } +} From 7ff3b4360877580f395650223c19542849a5e1f7 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 11:35:45 -0700 Subject: [PATCH 04/55] =?UTF-8?q?fix:=20consolidate=20repo=20methods=20?= =?UTF-8?q?=E2=80=94=20single=20find=20with=20optional=20filters?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address PR review comments: rename findFiltered → find across all repos, remove redundant findByTableUuid/findByTableUuidSince from history repos, add explicit assertion to context test. Co-Authored-By: Claude Opus 4.6 --- .../TableOperationsHistoryRepository.java | 18 +-------------- .../repository/TableOperationsRepository.java | 2 +- .../TableStatsHistoryRepository.java | 22 +++++-------------- .../repository/TableStatsRepository.java | 2 +- .../OptimizerServiceContextTest.java | 8 ++++++- .../TableOperationsHistoryRepositoryTest.java | 19 +++++++++------- .../TableOperationsRepositoryTest.java | 18 +++++++-------- .../TableStatsHistoryRepositoryTest.java | 19 ++++++++-------- .../repository/TableStatsRepositoryTest.java | 10 ++++----- 9 files changed, 49 insertions(+), 69 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 2ba5bdf7a..71ab1cde4 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -18,22 +18,6 @@ public interface TableOperationsHistoryRepository extends JpaRepository { - /** - * Return the most recent history rows for a table UUID, newest first, up to {@code limit} rows. - * - * @param tableUuid the stable table UUID - * @param limit maximum number of rows to return - * @return history rows ordered by {@code submitted_at} descending - */ - @Query( - value = - "SELECT * FROM table_operations_history " - + "WHERE table_uuid = :tableUuid " - + "ORDER BY submitted_at DESC LIMIT :limit", - nativeQuery = true) - List find( - @Param("tableUuid") String tableUuid, @Param("limit") int limit); - /** * Return history rows matching the given filters, ordered by {@code submittedAt} descending. * Every parameter is optional — pass {@code null} to skip that filter. @@ -48,7 +32,7 @@ List find( + "AND (:since IS NULL OR r.submittedAt >= :since) " + "AND (:until IS NULL OR r.submittedAt <= :until) " + "ORDER BY r.submittedAt DESC") - List findFiltered( + List find( @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 69476991f..891322134 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -24,7 +24,7 @@ public interface TableOperationsRepository extends JpaRepository findFiltered( + List find( @Param("operationType") OperationType operationType, @Param("status") OperationStatus status, @Param("databaseName") String databaseName, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java index c6ec3befd..767d60c22 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -12,30 +12,18 @@ public interface TableStatsHistoryRepository extends JpaRepository { /** - * Return history rows for a table, newest first. + * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the + * time filter. * * @param tableUuid the stable table UUID + * @param since inclusive lower bound on recorded_at; {@code null} to skip * @param pageable use {@code PageRequest.of(0, limit)} to cap results */ @Query( "SELECT r FROM TableStatsHistoryRow r " + "WHERE r.tableUuid = :tableUuid " + + "AND (:since IS NULL OR r.recordedAt >= :since) " + "ORDER BY r.recordedAt DESC") - List findByTableUuid( - @Param("tableUuid") String tableUuid, Pageable pageable); - - /** - * Return history rows for a table recorded at or after {@code since}, newest first. - * - * @param tableUuid the stable table UUID - * @param since inclusive lower bound on recorded_at - * @param pageable use {@code PageRequest.of(0, limit)} to cap results - */ - @Query( - "SELECT r FROM TableStatsHistoryRow r " - + "WHERE r.tableUuid = :tableUuid " - + "AND r.recordedAt >= :since " - + "ORDER BY r.recordedAt DESC") - List findByTableUuidSince( + List find( @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 6c071cf5b..ecae70feb 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -18,7 +18,7 @@ public interface TableStatsRepository extends JpaRepository findFiltered( + List find( @Param("databaseId") String databaseId, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java index abb89ec42..fa373c57d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java @@ -1,7 +1,11 @@ package com.linkedin.openhouse.optimizer; +import static org.assertj.core.api.Assertions.assertThat; + import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.ApplicationContext; import org.springframework.test.context.ActiveProfiles; /** @@ -12,8 +16,10 @@ @ActiveProfiles("test") class OptimizerServiceContextTest { + @Autowired ApplicationContext context; + @Test void contextLoads() { - // Context load is the assertion — no additional assertions needed. + assertThat(context).isNotNull(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index 9bde34334..1a35a8fda 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -54,7 +54,8 @@ void appendAndFindByTableUuid() { .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) .build()); - List rows = repository.find(tableUuid, 10); + List rows = + repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); assertThat(rows).hasSize(2); // Newest first @@ -79,7 +80,8 @@ void appendIsNonDestructive_multipleRunsRetained() { .build()); } - List rows = repository.find(tableUuid, 10); + List rows = + repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); assertThat(rows).hasSize(3); } @@ -100,12 +102,13 @@ void find_respectsLimit() { .build()); } - List rows = repository.find(tableUuid, 3); + List rows = + repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @Test - void findFiltered_noParams_returnsAll() { + void find_noParams_returnsAll() { Instant now = Instant.now(); String uuid1 = UUID.randomUUID().toString(); String uuid2 = UUID.randomUUID().toString(); @@ -132,14 +135,14 @@ void findFiltered_noParams_returnsAll() { .build()); List rows = - repository.findFiltered(null, null, null, null, null, null, null, PageRequest.of(0, 100)); + repository.find(null, null, null, null, null, null, null, PageRequest.of(0, 100)); assertThat(rows).hasSize(2); // Newest first assertThat(rows.get(0).getStatus()).isEqualTo(OperationHistoryStatus.FAILED); } @Test - void findFiltered_byStatusAndTimeWindow() { + void find_byStatusAndTimeWindow() { Instant old = Instant.parse("2024-01-01T00:00:00Z"); Instant recent = Instant.parse("2024-06-01T00:00:00Z"); String tableUuid = UUID.randomUUID().toString(); @@ -167,7 +170,7 @@ void findFiltered_byStatusAndTimeWindow() { // Filter by status List failed = - repository.findFiltered( + repository.find( null, null, null, @@ -182,7 +185,7 @@ void findFiltered_byStatusAndTimeWindow() { // Filter by time window Instant cutoff = Instant.parse("2024-03-01T00:00:00Z"); List afterCutoff = - repository.findFiltered(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); + repository.find(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); assertThat(afterCutoff).hasSize(1); assertThat(afterCutoff.get(0).getSubmittedAt()).isEqualTo(recent); } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index d7b8ee0b8..b1342b12d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -45,7 +45,7 @@ void saveAndFindById() { } @Test - void findFiltered_noParams_returnsAll() { + void find_noParams_returnsAll() { repository.save( TableOperationsRow.builder() .id(UUID.randomUUID().toString()) @@ -67,12 +67,12 @@ void findFiltered_noParams_returnsAll() { .createdAt(Instant.now()) .build()); - List rows = repository.findFiltered(null, null, null, null, null); + List rows = repository.find(null, null, null, null, null); assertThat(rows).hasSize(2); } @Test - void findFiltered_byStatus() { + void find_byStatus() { repository.save( TableOperationsRow.builder() .id(UUID.randomUUID().toString()) @@ -95,18 +95,18 @@ void findFiltered_byStatus() { .build()); List pending = - repository.findFiltered(null, OperationStatus.PENDING, null, null, null); + repository.find(null, OperationStatus.PENDING, null, null, null); assertThat(pending).hasSize(1); assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); List scheduled = - repository.findFiltered(null, OperationStatus.SCHEDULED, null, null, null); + repository.find(null, OperationStatus.SCHEDULED, null, null, null); assertThat(scheduled).hasSize(1); assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); } @Test - void findFiltered_byDatabaseAndTable() { + void find_byDatabaseAndTable() { repository.save( TableOperationsRow.builder() .id(UUID.randomUUID().toString()) @@ -128,8 +128,8 @@ void findFiltered_byDatabaseAndTable() { .createdAt(Instant.now()) .build()); - assertThat(repository.findFiltered(null, null, "db1", null, null)).hasSize(1); - assertThat(repository.findFiltered(null, null, "db2", "tbl2", null)).hasSize(1); - assertThat(repository.findFiltered(null, null, "db1", "tbl2", null)).isEmpty(); + assertThat(repository.find(null, null, "db1", null, null)).hasSize(1); + assertThat(repository.find(null, null, "db2", "tbl2", null)).hasSize(1); + assertThat(repository.find(null, null, "db1", "tbl2", null)).isEmpty(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index fb86762dc..a76c7155d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -23,7 +23,7 @@ class TableStatsHistoryRepositoryTest { @Autowired TableStatsHistoryRepository repository; @Test - void saveAndFindByTableUuid() { + void saveAndFind() { String tableUuid = UUID.randomUUID().toString(); Instant now = Instant.now(); @@ -31,7 +31,7 @@ void saveAndFindByTableUuid() { repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); - List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 100)); + List rows = repository.find(tableUuid, null, PageRequest.of(0, 100)); assertThat(rows).hasSize(3); // newest first @@ -40,7 +40,7 @@ void saveAndFindByTableUuid() { } @Test - void findByTableUuid_respectsLimit() { + void find_respectsLimit() { String tableUuid = UUID.randomUUID().toString(); Instant now = Instant.now(); @@ -48,13 +48,13 @@ void findByTableUuid_respectsLimit() { repository.save(buildRow(tableUuid, "db1", "tbl1", i, 0L, now.minus(i, ChronoUnit.HOURS))); } - List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 3)); + List rows = repository.find(tableUuid, null, PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @Test - void findByTableUuidSince_filtersOlderRows() { + void find_withSince_filtersOlderRows() { String tableUuid = UUID.randomUUID().toString(); Instant now = Instant.now(); Instant cutoff = now.minus(90, ChronoUnit.MINUTES); @@ -63,8 +63,7 @@ void findByTableUuidSince_filtersOlderRows() { repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); - List rows = - repository.findByTableUuidSince(tableUuid, cutoff, PageRequest.of(0, 100)); + List rows = repository.find(tableUuid, cutoff, PageRequest.of(0, 100)); // only the 2 rows within the last 90 minutes assertThat(rows).hasSize(2); @@ -72,7 +71,7 @@ void findByTableUuidSince_filtersOlderRows() { } @Test - void findByTableUuid_isolatesByTableUuid() { + void find_isolatesByTableUuid() { String uuid1 = UUID.randomUUID().toString(); String uuid2 = UUID.randomUUID().toString(); Instant now = Instant.now(); @@ -80,8 +79,8 @@ void findByTableUuid_isolatesByTableUuid() { repository.save(buildRow(uuid1, "db1", "tbl1", 10L, 0L, now)); repository.save(buildRow(uuid2, "db2", "tbl2", 20L, 0L, now)); - assertThat(repository.findByTableUuid(uuid1, PageRequest.of(0, 100))).hasSize(1); - assertThat(repository.findByTableUuid(uuid2, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.find(uuid1, null, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.find(uuid2, null, PageRequest.of(0, 100))).hasSize(1); } @Test diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index 5efb49148..a8ac1cbbb 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -83,7 +83,7 @@ void upsert_overwritesPreviousStats() { } @Test - void findFiltered_noParams_returnsAll() { + void find_noParams_returnsAll() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) @@ -107,11 +107,11 @@ void findFiltered_noParams_returnsAll() { .updatedAt(Instant.now()) .build()); - assertThat(repository.findFiltered(null, null, null)).hasSize(2); + assertThat(repository.find(null, null, null)).hasSize(2); } @Test - void findFiltered_byDatabase() { + void find_byDatabase() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) @@ -135,7 +135,7 @@ void findFiltered_byDatabase() { .updatedAt(Instant.now()) .build()); - assertThat(repository.findFiltered("db1", null, null)).hasSize(1); - assertThat(repository.findFiltered("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); + assertThat(repository.find("db1", null, null)).hasSize(1); + assertThat(repository.find("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); } } From ac1da013711ca3ac680bb24e48f3859813f099a2 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 12:09:53 -0700 Subject: [PATCH 05/55] feat(optimizer): add apps/optimizer shared module with find-only repos Shared JPA entities and repositories for optimizer apps (analyzer, scheduler). All repos expose a single find method with optional filters. Co-Authored-By: Claude Opus 4.6 --- apps/optimizer/build.gradle | 13 +++++ .../entity/TableOperationHistoryRow.java | 37 +++++++++++++ .../optimizer/entity/TableOperationRow.java | 55 +++++++++++++++++++ .../optimizer/entity/TableStatsRow.java | 53 ++++++++++++++++++ .../openhouse/optimizer/model/TableStats.java | 45 +++++++++++++++ .../TableOperationHistoryRepository.java | 32 +++++++++++ .../repository/TableOperationsRepository.java | 29 ++++++++++ .../repository/TableStatsRepository.java | 25 +++++++++ settings.gradle | 1 + 9 files changed, 290 insertions(+) create mode 100644 apps/optimizer/build.gradle create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java diff --git a/apps/optimizer/build.gradle b/apps/optimizer/build.gradle new file mode 100644 index 000000000..f14969274 --- /dev/null +++ b/apps/optimizer/build.gradle @@ -0,0 +1,13 @@ +plugins { + id 'openhouse.java-minimal-conventions' +} + +// Avoid build-directory collision with services:optimizer (same project.name 'optimizer'). +buildDir = "${rootProject.buildDir}/apps-optimizer" + +dependencies { + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' + testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' + testRuntimeOnly 'com.h2database:h2' +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java new file mode 100644 index 000000000..4e638e2e1 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java @@ -0,0 +1,37 @@ +package com.linkedin.openhouse.optimizer.entity; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** Lightweight JPA entity for reading {@code table_operations_history} rows. */ +@Entity +@Table(name = "table_operations_history") +@Getter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationHistoryRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "operation_type", nullable = false, length = 50) + private String operationType; + + @Column(name = "submitted_at", nullable = false) + private Instant submittedAt; + + @Column(name = "status", nullable = false, length = 20) + private String status; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java new file mode 100644 index 000000000..fc0104604 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.optimizer.entity; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +/** JPA entity mapping to the {@code table_operations} table in the optimizer DB. */ +@Entity +@Table(name = "table_operations") +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 255) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Column(name = "operation_type", nullable = false, length = 50) + private String operationType; + + @Column(name = "status", nullable = false, length = 20) + private String status; + + @Column(name = "created_at") + private Instant createdAt; + + @Column(name = "scheduled_at") + private Instant scheduledAt; + + @Column(name = "job_id", length = 255) + private String jobId; + + /** Plain version column — not managed by JPA optimistic locking. */ + @Column(name = "version") + private Long version; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java new file mode 100644 index 000000000..5cdf16a97 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -0,0 +1,53 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import java.util.Map; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every + * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table(name = "table_stats") +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsRow { + + @Id + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_id", nullable = false, length = 255) + private String databaseId; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Type(type = "json") + @Column(name = "table_properties", columnDefinition = "TEXT") + private Map tableProperties; + + @Column(name = "updated_at", nullable = false) + private Instant updatedAt; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java new file mode 100644 index 000000000..5e0f51468 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -0,0 +1,45 @@ +package com.linkedin.openhouse.optimizer.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Combined stats payload stored as a single JSON blob per table in {@code table_stats}. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long deletedSizeBytes; + } +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java new file mode 100644 index 000000000..f2ea9e3c8 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -0,0 +1,32 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Repository for reading {@code table_operations_history} in the Analyzer. */ +public interface TableOperationHistoryRepository + extends JpaRepository { + + /** + * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Every parameter is optional — pass {@code null} to skip that filter. + */ + @Query( + "SELECT r FROM TableOperationHistoryRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:since IS NULL OR r.submittedAt >= :since) " + + "ORDER BY r.submittedAt DESC") + List find( + @Param("operationType") String operationType, + @Param("tableUuid") String tableUuid, + @Param("status") String status, + @Param("since") Instant since, + Pageable pageable); +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java new file mode 100644 index 000000000..27424dfdc --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -0,0 +1,29 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ +public interface TableOperationsRepository extends JpaRepository { + + /** + * Return operations matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. + */ + @Query( + "SELECT r FROM TableOperationRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName)") + List find( + @Param("operationType") String operationType, + @Param("status") String status, + @Param("tableUuid") String tableUuid, + @Param("databaseName") String databaseName, + @Param("tableName") String tableName); +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java new file mode 100644 index 000000000..6effe19c2 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for {@code table_stats} rows in the optimizer DB. */ +public interface TableStatsRepository extends JpaRepository { + + /** + * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. + */ + @Query( + "SELECT r FROM TableStatsRow r " + + "WHERE (:databaseId IS NULL OR r.databaseId = :databaseId) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List find( + @Param("databaseId") String databaseId, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid); +} diff --git a/settings.gradle b/settings.gradle index cad06785e..0d64dad53 100644 --- a/settings.gradle +++ b/settings.gradle @@ -50,6 +50,7 @@ include ':services:common' include ':services:housetables' include ':services:jobs' include ':services:optimizer' +include ':apps:optimizer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From 02a5ab31c62a0847e665f674b1fb3e8684bb3433 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 12:19:37 -0700 Subject: [PATCH 06/55] fix: remove orphan fields from CompleteOperationRequest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These fields never belonged in the data model — remove them at the source rather than adding then deleting in a later PR. Co-Authored-By: Claude Opus 4.6 --- .../optimizer/api/model/CompleteOperationRequest.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index c26893197..35f7ba782 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -22,10 +22,4 @@ public class CompleteOperationRequest { /** Error details on failure; {@code null} on success. */ private JobResult result; - - /** Number of orphan files deleted; set by OFD Spark app on success. */ - private Integer orphanFilesDeleted; - - /** Bytes reclaimed by orphan file deletion; set by OFD Spark app on success. */ - private Long orphanBytesDeleted; } From f82d1b3ef3e0b1197487a68b851fa394ef9b9c7a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 10:14:18 -0700 Subject: [PATCH 07/55] fix(optimizer): address PR #527 review feedback - Widen-to-tighten: VARCHAR(255) -> VARCHAR(128) for database_name and table_name across all entities and the schema, aligning with prod conventions (can always be widened later, not tightened). - Rename databaseId -> databaseName in TableStatsRow, TableStatsHistoryRow, TableStatsDto, TableStatsHistoryDto, and UpsertTableStatsRequest for consistency with the operations entities and DTOs. - Drop the unused metrics field from TableOperationsRow, TableOperationsDto, and the schema. Add a TODO note in the schema that per-operation metric columns will be added as operations are onboarded. - Rename submittedAt -> completedAt in TableOperationsHistoryRow, TableOperationsHistoryDto, and the schema (column submitted_at -> completed_at, index idx_submitted_at -> idx_completed_at). The history row is written when the complete endpoint is called, so the timestamp captures completion; submission time is already on table_operations.scheduled_at. - Change TableStatsHistoryRow.id from BIGINT auto-increment to VARCHAR(36) UUID, set by the caller, matching the other id-bearing entities. - Add @JsonIgnoreProperties(ignoreUnknown = true) to CommitDelta for consistency with TableStats and SnapshotMetrics. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/model/TableOperationsDto.java | 3 --- .../api/model/TableOperationsHistoryDto.java | 2 +- .../optimizer/api/model/TableStats.java | 1 + .../optimizer/api/model/TableStatsDto.java | 2 +- .../api/model/TableStatsHistoryDto.java | 6 ++--- .../api/model/UpsertTableStatsRequest.java | 4 ++-- .../entity/TableOperationsHistoryRow.java | 10 ++++----- .../optimizer/entity/TableOperationsRow.java | 11 ++-------- .../entity/TableStatsHistoryRow.java | 13 +++++------ .../optimizer/entity/TableStatsRow.java | 6 ++--- .../main/resources/db/optimizer-schema.sql | 22 +++++++++---------- 11 files changed, 34 insertions(+), 46 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index 9c33d8907..d41bd6906 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -39,7 +39,4 @@ public class TableOperationsDto { /** Job ID returned by the Jobs Service after successful submission. */ private String jobId; - - /** Reserved for future per-operation metadata; currently unused. */ - private String metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index efc9bebbb..2a901ad2b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -24,7 +24,7 @@ public class TableOperationsHistoryDto { private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ - private Instant submittedAt; + private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ private OperationHistoryStatus status; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index 51aa8a712..64c99061a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -40,6 +40,7 @@ public static class SnapshotMetrics { @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) public static class CommitDelta { private Long numFilesAdded; private Long numFilesDeleted; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index a668af434..81dd6b802 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -18,7 +18,7 @@ public class TableStatsDto { private String tableUuid; /** Denormalized database name for display. */ - private String databaseId; + private String databaseName; /** Denormalized table name for display. */ private String tableName; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 0604e07de..4a994fdb3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -13,14 +13,14 @@ @AllArgsConstructor public class TableStatsHistoryDto { - /** Auto-increment primary key. */ - private Long id; + /** UUID primary key set by the caller. */ + private String id; /** Stable Iceberg table UUID. */ private String tableUuid; /** Denormalized database name for display. */ - private String databaseId; + private String databaseName; /** Denormalized table name for display. */ private String tableName; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 721c3deaf..02290bad5 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -9,7 +9,7 @@ /** * Request body for {@code PUT /v1/table-stats/{tableUuid}}. * - *

{@code tableUuid} comes from the path variable. {@code databaseId} and {@code tableName} are + *

{@code tableUuid} comes from the path variable. {@code databaseName} and {@code tableName} are * denormalized display columns carried in the body. */ @Data @@ -19,7 +19,7 @@ public class UpsertTableStatsRequest { /** Denormalized database name for display. */ - private String databaseId; + private String databaseName; /** Denormalized table name for display. */ private String tableName; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index e7493024c..6ac5db173 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -34,7 +34,7 @@ indexes = { @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), @Index(name = "idx_op_type_hist", columnList = "operation_type"), - @Index(name = "idx_submitted_at", columnList = "submitted_at"), + @Index(name = "idx_completed_at", columnList = "completed_at"), @Index(name = "idx_status_hist", columnList = "status"), @Index(name = "idx_job_id", columnList = "job_id") }) @@ -53,10 +53,10 @@ public class TableOperationsHistoryRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_name", nullable = false, length = 255) + @Column(name = "database_name", nullable = false, length = 128) private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Enumerated(EnumType.STRING) @@ -64,8 +64,8 @@ public class TableOperationsHistoryRow { private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ - @Column(name = "submitted_at", nullable = false) - private Instant submittedAt; + @Column(name = "completed_at", nullable = false) + private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ @Enumerated(EnumType.STRING) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index e5493b510..43778495a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -53,10 +53,10 @@ public class TableOperationsRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_name", nullable = false, length = 255) + @Column(name = "database_name", nullable = false, length = 128) private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Enumerated(EnumType.STRING) @@ -85,11 +85,4 @@ public class TableOperationsRow { */ @Column(name = "version") private Long version; - - /** - * Reserved for future per-operation metadata. Stored as JSON text; currently unused. The Analyzer - * reads stats directly from {@code table_stats} instead of duplicating them here. - */ - @Column(name = "metrics") - private String metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java index 85d97a5eb..b0d92fc81 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -5,8 +5,6 @@ import java.time.Instant; import javax.persistence.Column; import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; import javax.persistence.Id; import javax.persistence.Index; import javax.persistence.Table; @@ -42,17 +40,16 @@ public class TableStatsHistoryRow { @Id - @GeneratedValue(strategy = GenerationType.IDENTITY) - @Column(name = "id", nullable = false) - private Long id; + @Column(name = "id", nullable = false, length = 36) + private String id; @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_id", nullable = false, length = 255) - private String databaseId; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Type(type = "json") diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index 71d6a9421..f682a3485 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -37,10 +37,10 @@ public class TableStatsRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_id", nullable = false, length = 255) - private String databaseId; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Type(type = "json") diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 098380e7f..49641efe2 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -3,22 +3,22 @@ CREATE TABLE IF NOT EXISTS table_operations ( id VARCHAR(36) NOT NULL, table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, operation_type VARCHAR(50) NOT NULL, status VARCHAR(20) NOT NULL, created_at TIMESTAMP(6) NOT NULL, scheduled_at TIMESTAMP(6), job_id VARCHAR(255), version BIGINT, - metrics TEXT, + -- TODO: per-operation metric columns will be added as operations are onboarded. PRIMARY KEY (id) ); CREATE TABLE IF NOT EXISTS table_stats ( table_uuid VARCHAR(36) NOT NULL, - database_id VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, stats TEXT, table_properties TEXT, updated_at TIMESTAMP(6) NOT NULL, @@ -26,10 +26,10 @@ CREATE TABLE IF NOT EXISTS table_stats ( ); CREATE TABLE IF NOT EXISTS table_stats_history ( - id BIGINT NOT NULL AUTO_INCREMENT, + id VARCHAR(36) NOT NULL, table_uuid VARCHAR(36) NOT NULL, - database_id VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, stats TEXT, recorded_at TIMESTAMP(6) NOT NULL, PRIMARY KEY (id), @@ -40,10 +40,10 @@ CREATE TABLE IF NOT EXISTS table_stats_history ( CREATE TABLE IF NOT EXISTS table_operations_history ( id VARCHAR(36) NOT NULL, table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, operation_type VARCHAR(50) NOT NULL, - submitted_at TIMESTAMP(6) NOT NULL, + completed_at TIMESTAMP(6) NOT NULL, status VARCHAR(20) NOT NULL, job_id VARCHAR(255), result TEXT, From a109f0231d2edc546b4a1f630ad4e986c14ade02 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 10:16:45 -0700 Subject: [PATCH 08/55] fix(optimizer): propagate optimizer-0 renames into repos and tests - Repositories: update JPQL and parameter names to match the renamed entity fields (databaseName, completedAt). Change TableOperationsHistoryRepository and TableStatsHistoryRepository ID type parameter from Long to String to match the entity PK (UUID set by the caller, not auto-generated). - Tests: update builders and getters to use the renamed fields (databaseName, completedAt). Replace the autoIncrementId test with callerSetIdIsPreserved which verifies the caller-set UUID round-trips through save/findById. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../TableOperationsHistoryRepository.java | 13 +++--- .../TableStatsHistoryRepository.java | 2 +- .../repository/TableStatsRepository.java | 4 +- .../TableOperationsHistoryRepositoryTest.java | 20 +++++----- .../TableStatsHistoryRepositoryTest.java | 40 +++++++++++++++---- .../repository/TableStatsRepositoryTest.java | 18 ++++----- 6 files changed, 61 insertions(+), 36 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 71ab1cde4..65d62818c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -12,14 +12,15 @@ import org.springframework.stereotype.Repository; /** - * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is auto-increment {@code id}. + * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is the UUID set by the caller + * (same UUID as the originating {@code table_operations.id}). */ @Repository public interface TableOperationsHistoryRepository - extends JpaRepository { + extends JpaRepository { /** - * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Return history rows matching the given filters, ordered by {@code completedAt} descending. * Every parameter is optional — pass {@code null} to skip that filter. */ @Query( @@ -29,9 +30,9 @@ public interface TableOperationsHistoryRepository + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.submittedAt >= :since) " - + "AND (:until IS NULL OR r.submittedAt <= :until) " - + "ORDER BY r.submittedAt DESC") + + "AND (:since IS NULL OR r.completedAt >= :since) " + + "AND (:until IS NULL OR r.completedAt <= :until) " + + "ORDER BY r.completedAt DESC") List find( @Param("databaseName") String databaseName, @Param("tableName") String tableName, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java index 767d60c22..aaa1b0050 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -9,7 +9,7 @@ import org.springframework.data.repository.query.Param; /** Append-only repository for per-commit stats history rows. */ -public interface TableStatsHistoryRepository extends JpaRepository { +public interface TableStatsHistoryRepository extends JpaRepository { /** * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index ecae70feb..9bcaab41b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -15,11 +15,11 @@ public interface TableStatsRepository extends JpaRepository find( - @Param("databaseId") String databaseId, + @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index 1a35a8fda..b9735a617 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -36,7 +36,7 @@ void appendAndFindByTableUuid() { .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(t1) + .completedAt(t1) .status(OperationHistoryStatus.SUCCESS) .jobId("job-001") .build()); @@ -48,7 +48,7 @@ void appendAndFindByTableUuid() { .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(t2) + .completedAt(t2) .status(OperationHistoryStatus.FAILED) .jobId("job-002") .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) @@ -75,7 +75,7 @@ void appendIsNonDestructive_multipleRunsRetained() { .databaseName("db1") .tableName("tbl2") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(now.plusSeconds(i)) + .completedAt(now.plusSeconds(i)) .status(OperationHistoryStatus.SUCCESS) .build()); } @@ -97,7 +97,7 @@ void find_respectsLimit() { .databaseName("db1") .tableName("tbl3") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(now.plusSeconds(i)) + .completedAt(now.plusSeconds(i)) .status(OperationHistoryStatus.SUCCESS) .build()); } @@ -120,7 +120,7 @@ void find_noParams_returnsAll() { .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(now) + .completedAt(now) .status(OperationHistoryStatus.SUCCESS) .build()); repository.save( @@ -130,7 +130,7 @@ void find_noParams_returnsAll() { .databaseName("db2") .tableName("tbl2") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(now.plusSeconds(1)) + .completedAt(now.plusSeconds(1)) .status(OperationHistoryStatus.FAILED) .build()); @@ -154,7 +154,7 @@ void find_byStatusAndTimeWindow() { .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(old) + .completedAt(old) .status(OperationHistoryStatus.SUCCESS) .build()); repository.save( @@ -164,7 +164,7 @@ void find_byStatusAndTimeWindow() { .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(recent) + .completedAt(recent) .status(OperationHistoryStatus.FAILED) .build()); @@ -180,13 +180,13 @@ void find_byStatusAndTimeWindow() { null, PageRequest.of(0, 100)); assertThat(failed).hasSize(1); - assertThat(failed.get(0).getSubmittedAt()).isEqualTo(recent); + assertThat(failed.get(0).getCompletedAt()).isEqualTo(recent); // Filter by time window Instant cutoff = Instant.parse("2024-03-01T00:00:00Z"); List afterCutoff = repository.find(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); assertThat(afterCutoff).hasSize(1); - assertThat(afterCutoff.get(0).getSubmittedAt()).isEqualTo(recent); + assertThat(afterCutoff.get(0).getCompletedAt()).isEqualTo(recent); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index a76c7155d..f3e72b52e 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -84,28 +84,52 @@ void find_isolatesByTableUuid() { } @Test - void autoIncrementId() { + void callerSetIdIsPreserved() { String tableUuid = UUID.randomUUID().toString(); + String id1 = UUID.randomUUID().toString(); + String id2 = UUID.randomUUID().toString(); Instant now = Instant.now(); - TableStatsHistoryRow row1 = repository.save(buildRow(tableUuid, "db1", "tbl1", 1L, 0L, now)); - TableStatsHistoryRow row2 = repository.save(buildRow(tableUuid, "db1", "tbl1", 2L, 0L, now)); + TableStatsHistoryRow row1 = + repository.save(buildRow(id1, tableUuid, "db1", "tbl1", 1L, 0L, now)); + TableStatsHistoryRow row2 = + repository.save(buildRow(id2, tableUuid, "db1", "tbl1", 2L, 0L, now)); - assertThat(row1.getId()).isNotNull(); - assertThat(row2.getId()).isNotNull(); - assertThat(row2.getId()).isGreaterThan(row1.getId()); + assertThat(row1.getId()).isEqualTo(id1); + assertThat(row2.getId()).isEqualTo(id2); + assertThat(repository.findById(id1)).isPresent(); + assertThat(repository.findById(id2)).isPresent(); } private static TableStatsHistoryRow buildRow( String tableUuid, - String databaseId, + String databaseName, + String tableName, + long numFilesAdded, + long numFilesDeleted, + Instant recordedAt) { + return buildRow( + UUID.randomUUID().toString(), + tableUuid, + databaseName, + tableName, + numFilesAdded, + numFilesDeleted, + recordedAt); + } + + private static TableStatsHistoryRow buildRow( + String id, + String tableUuid, + String databaseName, String tableName, long numFilesAdded, long numFilesDeleted, Instant recordedAt) { return TableStatsHistoryRow.builder() + .id(id) .tableUuid(tableUuid) - .databaseId(databaseId) + .databaseName(databaseName) .tableName(tableName) .stats( TableStats.builder() diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index a8ac1cbbb..b62371f53 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -34,7 +34,7 @@ void saveAndFindById() { repository.save( TableStatsRow.builder() .tableUuid(tableUuid) - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats(stats) .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) @@ -43,7 +43,7 @@ void saveAndFindById() { Optional found = repository.findById(tableUuid); assertThat(found).isPresent(); - assertThat(found.get().getDatabaseId()).isEqualTo("db1"); + assertThat(found.get().getDatabaseName()).isEqualTo("db1"); assertThat(found.get().getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); assertThat(found.get().getTableProperties()) .containsEntry("maintenance.optimizer.ofd.enabled", "true"); @@ -56,7 +56,7 @@ void upsert_overwritesPreviousStats() { repository.save( TableStatsRow.builder() .tableUuid(tableUuid) - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats( TableStats.builder() @@ -68,7 +68,7 @@ void upsert_overwritesPreviousStats() { repository.save( TableStatsRow.builder() .tableUuid(tableUuid) - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats( TableStats.builder() @@ -87,7 +87,7 @@ void find_noParams_returnsAll() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats( TableStats.builder() @@ -98,7 +98,7 @@ void find_noParams_returnsAll() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) - .databaseId("db2") + .databaseName("db2") .tableName("tbl2") .stats( TableStats.builder() @@ -115,7 +115,7 @@ void find_byDatabase() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats( TableStats.builder() @@ -126,7 +126,7 @@ void find_byDatabase() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) - .databaseId("db2") + .databaseName("db2") .tableName("tbl2") .stats( TableStats.builder() @@ -136,6 +136,6 @@ void find_byDatabase() { .build()); assertThat(repository.find("db1", null, null)).hasSize(1); - assertThat(repository.find("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); + assertThat(repository.find("db1", null, null).get(0).getDatabaseName()).isEqualTo("db1"); } } From 027fccd61c362c1d9b3e2902583579b34d1907f7 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 11:01:56 -0700 Subject: [PATCH 09/55] fix(optimizer): add databaseName + tableName to apps/optimizer history row Address PR #530 review feedback: the lightweight read-side TableOperationHistoryRow in the apps/optimizer shared module did not surface the denormalized database_name and table_name columns, even though the underlying schema carries them. Add them so analyst-style queries from the analyzer/scheduler side can read operation history without joining back to table_operations. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/entity/TableOperationHistoryRow.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java index 4e638e2e1..4e3ace953 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java @@ -26,6 +26,12 @@ public class TableOperationHistoryRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + @Column(name = "operation_type", nullable = false, length = 50) private String operationType; From 79753f1da1ae63f84de9b127d1f7cac301a6666b Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 14:03:10 -0700 Subject: [PATCH 10/55] fix(optimizer): index table_operations_history on (database_name, table_name) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a composite secondary index on (database_name, table_name) to table_operations_history at the schema and entity layers. This backs a new name-based history-lookup endpoint added on optimizer-2; without the index, the query degrades to a full scan on a table that grows with every operation completion. The other three optimizer tables get no new indexes — no new query patterns on them this round. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/optimizer/entity/TableOperationsHistoryRow.java | 3 ++- services/optimizer/src/main/resources/db/optimizer-schema.sql | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index 6ac5db173..3b6ced892 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -36,7 +36,8 @@ @Index(name = "idx_op_type_hist", columnList = "operation_type"), @Index(name = "idx_completed_at", columnList = "completed_at"), @Index(name = "idx_status_hist", columnList = "status"), - @Index(name = "idx_job_id", columnList = "job_id") + @Index(name = "idx_job_id", columnList = "job_id"), + @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") }) @Getter @EqualsAndHashCode diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 49641efe2..4c2d9604b 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -47,5 +47,6 @@ CREATE TABLE IF NOT EXISTS table_operations_history ( status VARCHAR(20) NOT NULL, job_id VARCHAR(255), result TEXT, - PRIMARY KEY (id) + PRIMARY KEY (id), + INDEX idx_toph_db_table (database_name, table_name) ); From bf04488d2ee0f14c0c41095b513c8551333c151d Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:10:50 -0700 Subject: [PATCH 11/55] fix(optimizer): align apps/optimizer entities with services schema The apps/optimizer shared module was created in this PR with field names and column lengths that did not match the schema established in optimizer-0: - TableStatsRow.databaseId -> databaseName - TableOperationHistoryRow.submittedAt -> completedAt - database_name / table_name VARCHAR(255) -> VARCHAR(128) Repos updated to match (TableStatsRepository param, TableOperationHistoryRepository ORDER BY column). No services/optimizer or schema SQL change needed - those already used the correct names. This change was previously folded into a later commit on optimizer-3; moving it down to the PR that owns these files. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/entity/TableOperationHistoryRow.java | 4 ++-- .../openhouse/optimizer/entity/TableOperationRow.java | 4 ++-- .../linkedin/openhouse/optimizer/entity/TableStatsRow.java | 6 +++--- .../repository/TableOperationHistoryRepository.java | 6 +++--- .../optimizer/repository/TableStatsRepository.java | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java index 4e3ace953..d15eb6785 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java @@ -35,8 +35,8 @@ public class TableOperationHistoryRow { @Column(name = "operation_type", nullable = false, length = 50) private String operationType; - @Column(name = "submitted_at", nullable = false) - private Instant submittedAt; + @Column(name = "completed_at", nullable = false) + private Instant completedAt; @Column(name = "status", nullable = false, length = 20) private String status; diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java index fc0104604..33a83bd3f 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java @@ -28,10 +28,10 @@ public class TableOperationRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_name", nullable = false, length = 255) + @Column(name = "database_name", nullable = false, length = 128) private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Column(name = "operation_type", nullable = false, length = 50) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index 5cdf16a97..bc647d86e 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -34,10 +34,10 @@ public class TableStatsRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_id", nullable = false, length = 255) - private String databaseId; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Type(type = "json") diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java index f2ea9e3c8..fd9edd1f4 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -13,7 +13,7 @@ public interface TableOperationHistoryRepository extends JpaRepository { /** - * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Return history rows matching the given filters, ordered by {@code completedAt} descending. * Every parameter is optional — pass {@code null} to skip that filter. */ @Query( @@ -21,8 +21,8 @@ public interface TableOperationHistoryRepository + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.submittedAt >= :since) " - + "ORDER BY r.submittedAt DESC") + + "AND (:since IS NULL OR r.completedAt >= :since) " + + "ORDER BY r.completedAt DESC") List find( @Param("operationType") String operationType, @Param("tableUuid") String tableUuid, diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 6effe19c2..50f515d07 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -15,11 +15,11 @@ public interface TableStatsRepository extends JpaRepository find( - @Param("databaseId") String databaseId, + @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); } From 62f426a0a236f074c0db4c478b10e6e7b7949318 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:20:11 -0700 Subject: [PATCH 12/55] feat(optimizer): add findLatestPerTable to history repo The Analyzer evaluates cadence using only the most-recent history row per (table_uuid, operation_type); pulling the full history scan per analyzer pass is wasted I/O. Add a dedicated query that returns at most one row per (table_uuid, operation_type), restricted to a single operation type. The query uses a correlated MAX subquery for portability across MySQL and H2. For large history volume, a (operation_type, table_uuid, completed_at) index on the schema would make the subquery index-only; TODO noted in javadoc. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../TableOperationHistoryRepository.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java index fd9edd1f4..09930ab08 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -29,4 +29,24 @@ List find( @Param("status") String status, @Param("since") Instant since, Pageable pageable); + + /** + * Return the most-recent history row per {@code (table_uuid, operation_type)}, filtered to a + * single operation type. Used by the Analyzer to evaluate cadence without materializing every + * historical row. + * + *

The correlated subquery is portable across MySQL and H2 (MySQL mode). On a large {@code + * table_operations_history} table this benefits from an index on {@code (operation_type, + * table_uuid, completed_at)} — TODO add it to the schema. + * + *

Ties on {@code completed_at} for the same {@code (table_uuid, operation_type)} return all + * tied rows; callers should dedupe in memory. + */ + @Query( + "SELECT r FROM TableOperationHistoryRow r " + + "WHERE r.operationType = :operationType " + + "AND r.completedAt = (" + + " SELECT MAX(r2.completedAt) FROM TableOperationHistoryRow r2 " + + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") + List findLatestPerTable(@Param("operationType") String operationType); } From 3483b25f394e44b03c5bf94a22c1d644193466ba Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 08:55:28 -0700 Subject: [PATCH 13/55] perf(optimizer): index table_operations_history for findLatestPerTable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) on table_operations_history. TableOperationHistoryRepository.findLatestPerTable uses a correlated MAX(completed_at) subquery; without this index it degenerates to O(N²) and does not complete at 1M-row history scale. With it the inner subquery becomes an index-only lookup per outer row. Update the repo method's javadoc to point at the new index by name and drop the resolved TODO. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../repository/TableOperationHistoryRepository.java | 6 +++--- .../optimizer/src/main/resources/db/optimizer-schema.sql | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java index 09930ab08..26166271f 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -35,9 +35,9 @@ List find( * single operation type. Used by the Analyzer to evaluate cadence without materializing every * historical row. * - *

The correlated subquery is portable across MySQL and H2 (MySQL mode). On a large {@code - * table_operations_history} table this benefits from an index on {@code (operation_type, - * table_uuid, completed_at)} — TODO add it to the schema. + *

The correlated subquery is portable across MySQL and H2 (MySQL mode). Backed by index {@code + * idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at)} on {@code + * table_operations_history}, the subquery becomes an index-only lookup per outer row. * *

Ties on {@code completed_at} for the same {@code (table_uuid, operation_type)} return all * tied rows; callers should dedupe in memory. diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 4c2d9604b..322f3bf92 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -48,5 +48,9 @@ CREATE TABLE IF NOT EXISTS table_operations_history ( job_id VARCHAR(255), result TEXT, PRIMARY KEY (id), - INDEX idx_toph_db_table (database_name, table_name) + INDEX idx_toph_db_table (database_name, table_name), + -- Drives TableOperationHistoryRepository.findLatestPerTable: the correlated + -- MAX(completed_at) subquery becomes an index-only lookup per (operation_type, + -- table_uuid) instead of an O(N²) scan. + INDEX idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) ); From 02930094479750d10f25745849e9d511f5aa0aea Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 11:46:45 -0700 Subject: [PATCH 14/55] feat(optimizer): add findDistinctDatabaseNames to TableStatsRepository Enables per-database iteration in the analyzer. Returns the bounded set of database_name values present in table_stats; the analyzer uses it to drive the outer loop when no specific databaseName filter is supplied. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/repository/TableStatsRepository.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 50f515d07..4215237bc 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -22,4 +22,12 @@ List find( @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); + + /** + * Return the distinct {@code database_name} values present in {@code table_stats}. Used by the + * Analyzer to enumerate databases when iterating per-db; the result set size is bounded by the + * number of databases (small even at million-table scale). + */ + @Query("SELECT DISTINCT r.databaseName FROM TableStatsRow r") + List findDistinctDatabaseNames(); } From eba1392e44d9170a3f0a484a7d631c1683cccf91 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 17:01:41 -0700 Subject: [PATCH 15/55] feat(optimizer): promote internal model types to shared apps/optimizer Move Table, TableOperation, OperationType, OperationStatus, HistoryStatus from the analyzer-internal package into the shared apps/optimizer module. The scheduler will consume the same domain types as the analyzer. Per-layer types still hold (wire-API, internal model, DB each define their own representation); this just consolidates the internal layer so multiple internal consumers (analyzer, scheduler) share one set of classes. TableOperation gains a nullable, non-persisted fileCount field. Consumers that need it (OFD bin-packing) populate it at read time from table_stats; the DB row does not carry it. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/model/HistoryStatus.java | 13 +++ .../optimizer/model/OperationStatus.java | 15 +++ .../optimizer/model/OperationType.java | 10 ++ .../openhouse/optimizer/model/Table.java | 41 +++++++ .../optimizer/model/TableOperation.java | 106 ++++++++++++++++++ 5 files changed, 185 insertions(+) create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java new file mode 100644 index 000000000..d29c88719 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal lifecycle outcomes for a completed operation. Mirrors the values written to {@code + * table_operations_history.status}; parsed at the boundary so callers switch on a typed value + * instead of comparing strings. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum HistoryStatus { + SUCCESS, + FAILED +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java new file mode 100644 index 000000000..66f213c73 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java @@ -0,0 +1,15 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal lifecycle states for an operation. The analyzer writes {@link #PENDING}; the scheduler + * transitions through {@link #SCHEDULING} and {@link #SCHEDULED}. {@link #CANCELED} marks + * deduplicated PENDING rows. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum OperationStatus { + PENDING, + SCHEDULING, + SCHEDULED, + CANCELED +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java new file mode 100644 index 000000000..bea44018b --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -0,0 +1,10 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal enum for the operation types the analyzer and scheduler know about. Intentionally + * separate from the wire-API and DB representations so the internal model can evolve its set of + * supported operations without churning either boundary. + */ +public enum OperationType { + ORPHAN_FILES_DELETION +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java new file mode 100644 index 000000000..e232803dd --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -0,0 +1,41 @@ +package com.linkedin.openhouse.optimizer.model; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.Collections; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An OpenHouse table enriched with stats and properties, built by combining data sources. Consumed + * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads + * stats for bin-packing). + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class Table { + + private String tableUuid; + private String databaseName; + private String tableId; + + @Builder.Default private Map tableProperties = Collections.emptyMap(); + + private TableStats stats; + + /** Build a {@code Table} from a {@code table_stats} row. */ + public static Table from(TableStatsRow row) { + return Table.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableId(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + .stats(row.getStats()) + .build(); + } +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java new file mode 100644 index 000000000..d1390ee79 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -0,0 +1,106 @@ +package com.linkedin.openhouse.optimizer.model; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import java.time.Instant; +import java.util.Comparator; +import java.util.UUID; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks + * up and submits. Built either from an existing {@link TableOperationRow} (when loading current + * state) or from a {@link Table} (when creating a new PENDING operation). Converts back to a JPA + * row via {@link #toRow()}. + * + *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., + * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not + * carry it. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperation { + + /** Unique operation ID (UUID). */ + private String id; + + /** The table this operation targets. */ + private String tableUuid; + + /** Database name. */ + private String databaseName; + + /** Table name. */ + private String tableName; + + /** Operation type. */ + private OperationType operationType; + + /** Current lifecycle status. */ + private OperationStatus status; + + /** When this operation record was created. */ + private Instant createdAt; + + /** When the scheduler last submitted a job for this operation. */ + private Instant scheduledAt; + + /** + * Number of current data files on the table at evaluation time. Non-persisted enrichment; + * populated by consumers that need it. Null when not enriched. + */ + private Long fileCount; + + /** Build a {@code TableOperation} from an existing JPA row. */ + public static TableOperation from(TableOperationRow row) { + return TableOperation.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.valueOf(row.getOperationType())) + .status(OperationStatus.valueOf(row.getStatus())) + .createdAt(row.getCreatedAt()) + .scheduledAt(row.getScheduledAt()) + .build(); + } + + /** Create a new PENDING operation for the given table and operation type. */ + public static TableOperation pending(Table table, OperationType operationType) { + return TableOperation.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(table.getTableUuid()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .operationType(operationType) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + } + + /** Convert to a JPA entity for persistence. */ + public TableOperationRow toRow() { + return TableOperationRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType.name()) + .status(status.name()) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .version(0L) + .build(); + } + + /** Return the more recently created of two operations. */ + public static TableOperation mostRecent(TableOperation a, TableOperation b) { + Comparator byCreatedAt = + Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); + return byCreatedAt.compare(a, b) >= 0 ? a : b; + } +} From e57659391cc238cc4609682af943843502d8b9b8 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 17:51:44 -0700 Subject: [PATCH 16/55] refactor(optimizer): rename apps/optimizer entities + repos to plural; add TableStatsHistory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aligns apps/optimizer with the SQL table names (table_operations, table_operations_history) and the existing services/optimizer convention: - TableOperationRow → TableOperationsRow - TableOperationHistoryRow → TableOperationsHistoryRow - TableOperationHistoryRepository → TableOperationsHistoryRepository Adds the missing TableStatsHistoryRow + TableStatsHistoryRepository so apps/optimizer is a complete entity set covering all four optimizer DB tables. services/optimizer will consume these in a follow-up commit on optimizer-2 (the services-side duplicates will be deleted). Adds an explanatory javadoc on TableOperationsRow.version documenting the application-level optimistic-concurrency-control role used by the scheduler's CAS transitions (resolves PR #530 thread 3231557313). Co-Authored-By: Claude Opus 4.7 (1M context) --- ...ow.java => TableOperationsHistoryRow.java} | 2 +- ...rationRow.java => TableOperationsRow.java} | 10 ++- .../entity/TableStatsHistoryRow.java | 61 +++++++++++++++++++ .../optimizer/model/TableOperation.java | 10 +-- ... => TableOperationsHistoryRepository.java} | 16 ++--- .../repository/TableOperationsRepository.java | 8 +-- .../TableStatsHistoryRepository.java | 29 +++++++++ 7 files changed, 116 insertions(+), 20 deletions(-) rename apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/{TableOperationHistoryRow.java => TableOperationsHistoryRow.java} (96%) rename apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/{TableOperationRow.java => TableOperationsRow.java} (71%) create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java rename apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/{TableOperationHistoryRepository.java => TableOperationsHistoryRepository.java} (79%) create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java similarity index 96% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java rename to apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index d15eb6785..e5ff2bd01 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -17,7 +17,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperationHistoryRow { +public class TableOperationsHistoryRow { @Id @Column(name = "id", nullable = false, length = 36) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java similarity index 71% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java rename to apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index 33a83bd3f..0e23761ae 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -19,7 +19,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperationRow { +public class TableOperationsRow { @Id @Column(name = "id", nullable = false, length = 36) @@ -49,7 +49,13 @@ public class TableOperationRow { @Column(name = "job_id", length = 255) private String jobId; - /** Plain version column — not managed by JPA optimistic locking. */ + /** + * Monotonically-increasing version for application-level optimistic concurrency control. The + * scheduler's CAS transitions (e.g. {@code markScheduling}, {@code markScheduled}) match this + * value in the WHERE clause and bump it by one on UPDATE, ensuring two scheduler instances can't + * both move the same row out of PENDING. Not managed by JPA optimistic locking — kept as a plain + * column so the WHERE-clause-based CAS pattern works portably across MySQL and H2. + */ @Column(name = "version") private Long version; } diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java new file mode 100644 index 000000000..6f41881d6 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -0,0 +1,61 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * Append-only record of per-commit stats reported by the Tables Service. + * + *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers + * query this table to reconstruct change rates over arbitrary time windows. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table( + name = "table_stats_history", + indexes = { + @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsHistoryRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Column(name = "recorded_at", nullable = false) + private Instant recordedAt; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java index d1390ee79..d49625a57 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.model; -import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import java.time.Instant; import java.util.Comparator; import java.util.UUID; @@ -11,7 +11,7 @@ /** * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks - * up and submits. Built either from an existing {@link TableOperationRow} (when loading current + * up and submits. Built either from an existing {@link TableOperationsRow} (when loading current * state) or from a {@link Table} (when creating a new PENDING operation). Converts back to a JPA * row via {@link #toRow()}. * @@ -56,7 +56,7 @@ public class TableOperation { private Long fileCount; /** Build a {@code TableOperation} from an existing JPA row. */ - public static TableOperation from(TableOperationRow row) { + public static TableOperation from(TableOperationsRow row) { return TableOperation.builder() .id(row.getId()) .tableUuid(row.getTableUuid()) @@ -83,8 +83,8 @@ public static TableOperation pending(Table table, OperationType operationType) { } /** Convert to a JPA entity for persistence. */ - public TableOperationRow toRow() { - return TableOperationRow.builder() + public TableOperationsRow toRow() { + return TableOperationsRow.builder() .id(id) .tableUuid(tableUuid) .databaseName(databaseName) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java similarity index 79% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java rename to apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 26166271f..f8fe90b0c 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; import java.time.Instant; import java.util.List; import org.springframework.data.domain.Pageable; @@ -9,21 +9,21 @@ import org.springframework.data.repository.query.Param; /** Repository for reading {@code table_operations_history} in the Analyzer. */ -public interface TableOperationHistoryRepository - extends JpaRepository { +public interface TableOperationsHistoryRepository + extends JpaRepository { /** * Return history rows matching the given filters, ordered by {@code completedAt} descending. * Every parameter is optional — pass {@code null} to skip that filter. */ @Query( - "SELECT r FROM TableOperationHistoryRow r " + "SELECT r FROM TableOperationsHistoryRow r " + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:status IS NULL OR r.status = :status) " + "AND (:since IS NULL OR r.completedAt >= :since) " + "ORDER BY r.completedAt DESC") - List find( + List find( @Param("operationType") String operationType, @Param("tableUuid") String tableUuid, @Param("status") String status, @@ -43,10 +43,10 @@ List find( * tied rows; callers should dedupe in memory. */ @Query( - "SELECT r FROM TableOperationHistoryRow r " + "SELECT r FROM TableOperationsHistoryRow r " + "WHERE r.operationType = :operationType " + "AND r.completedAt = (" - + " SELECT MAX(r2.completedAt) FROM TableOperationHistoryRow r2 " + + " SELECT MAX(r2.completedAt) FROM TableOperationsHistoryRow r2 " + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") - List findLatestPerTable(@Param("operationType") String operationType); + List findLatestPerTable(@Param("operationType") String operationType); } diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 27424dfdc..c7a08cabc 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -1,26 +1,26 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; /** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ -public interface TableOperationsRepository extends JpaRepository { +public interface TableOperationsRepository extends JpaRepository { /** * Return operations matching the given filters. Every parameter is optional — pass {@code null} * to skip that filter. */ @Query( - "SELECT r FROM TableOperationRow r " + "SELECT r FROM TableOperationsRow r " + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:status IS NULL OR r.status = :status) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + "AND (:tableName IS NULL OR r.tableName = :tableName)") - List find( + List find( @Param("operationType") String operationType, @Param("status") String status, @Param("tableUuid") String tableUuid, diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java new file mode 100644 index 000000000..aaa1b0050 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -0,0 +1,29 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Append-only repository for per-commit stats history rows. */ +public interface TableStatsHistoryRepository extends JpaRepository { + + /** + * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the + * time filter. + * + * @param tableUuid the stable table UUID + * @param since inclusive lower bound on recorded_at; {@code null} to skip + * @param pageable use {@code PageRequest.of(0, limit)} to cap results + */ + @Query( + "SELECT r FROM TableStatsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "AND (:since IS NULL OR r.recordedAt >= :since) " + + "ORDER BY r.recordedAt DESC") + List find( + @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); +} From d90c26fcb36952e94d29e53a12049afc37be395b Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 10:05:07 -0700 Subject: [PATCH 17/55] refactor(optimizer): move apps/optimizer module into services/optimizer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit apps/optimizer was a misplaced shared library duplicating the JPA layer. services/optimizer is the canonical optimizer module — schedulers and analyzers embed it directly as a library. This commit consolidates by moving the JPA entities, repositories, and in-memory domain model from apps/optimizer into services/optimizer, deleting the apps module, and updating the service-side wiring + tests accordingly. - git mv 13 files (entities/repos/model) from apps/optimizer to services/optimizer; preserves history. - Delete services-side pre-R7 duplicates: 4 entities, 4 repos, the duplicate api/model/TableStats DTO, the now-unneeded JobResultConverter. - Rename services-side wire-API enum OperationHistoryStatus → HistoryStatus. - Drop the apps/optimizer module entry from settings.gradle. - OptimizerMapper: add String↔OperationType, String↔OperationStatus, String↔HistoryStatus, String↔JobResult default helpers so MapStruct can bridge the entity (String at JPA boundary) and the wire DTOs. - Update DTOs that import TableStats/HistoryStatus to the new package locations. - Rewrite repo tests against the simplified history repo shape and fix a stale find(...) positional-arg signature in the operations repo test. --- apps/optimizer/build.gradle | 13 -- .../entity/TableOperationsHistoryRow.java | 43 ------ .../optimizer/entity/TableOperationsRow.java | 61 -------- .../entity/TableStatsHistoryRow.java | 61 -------- .../optimizer/entity/TableStatsRow.java | 53 ------- .../TableOperationsHistoryRepository.java | 52 ------- .../repository/TableOperationsRepository.java | 29 ---- .../TableStatsHistoryRepository.java | 29 ---- .../repository/TableStatsRepository.java | 33 ----- .../optimizer/api/mapper/OptimizerMapper.java | 59 ++++++++ .../api/model/CompleteOperationRequest.java | 2 +- ...nHistoryStatus.java => HistoryStatus.java} | 2 +- .../api/model/TableOperationsHistoryDto.java | 2 +- .../optimizer/api/model/TableStats.java | 50 ------- .../optimizer/api/model/TableStatsDto.java | 1 + .../api/model/TableStatsHistoryDto.java | 1 + .../api/model/UpsertTableStatsRequest.java | 1 + .../optimizer/config/JobResultConverter.java | 39 ------ .../entity/TableOperationsHistoryRow.java | 53 ++----- .../optimizer/entity/TableOperationsRow.java | 57 ++------ .../entity/TableStatsHistoryRow.java | 4 +- .../optimizer/entity/TableStatsRow.java | 20 ++- .../optimizer/model/HistoryStatus.java | 0 .../optimizer/model/OperationStatus.java | 0 .../optimizer/model/OperationType.java | 0 .../openhouse/optimizer/model/Table.java | 0 .../optimizer/model/TableOperation.java | 0 .../openhouse/optimizer/model/TableStats.java | 0 .../TableOperationsHistoryRepository.java | 51 +++---- .../repository/TableOperationsRepository.java | 20 ++- .../repository/TableStatsRepository.java | 12 +- .../TableOperationsHistoryRepositoryTest.java | 131 +++++------------- .../TableOperationsRepositoryTest.java | 44 +++--- .../TableStatsHistoryRepositoryTest.java | 2 +- .../repository/TableStatsRepositoryTest.java | 2 +- settings.gradle | 1 - 36 files changed, 200 insertions(+), 728 deletions(-) delete mode 100644 apps/optimizer/build.gradle delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationHistoryStatus.java => HistoryStatus.java} (78%) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java (100%) rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java (100%) rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java (100%) rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java (100%) rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java (100%) rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java (100%) diff --git a/apps/optimizer/build.gradle b/apps/optimizer/build.gradle deleted file mode 100644 index f14969274..000000000 --- a/apps/optimizer/build.gradle +++ /dev/null @@ -1,13 +0,0 @@ -plugins { - id 'openhouse.java-minimal-conventions' -} - -// Avoid build-directory collision with services:optimizer (same project.name 'optimizer'). -buildDir = "${rootProject.buildDir}/apps-optimizer" - -dependencies { - implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' - implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' - testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' - testRuntimeOnly 'com.h2database:h2' -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java deleted file mode 100644 index e5ff2bd01..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ /dev/null @@ -1,43 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Getter; -import lombok.NoArgsConstructor; - -/** Lightweight JPA entity for reading {@code table_operations_history} rows. */ -@Entity -@Table(name = "table_operations_history") -@Getter -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class TableOperationsHistoryRow { - - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Column(name = "operation_type", nullable = false, length = 50) - private String operationType; - - @Column(name = "completed_at", nullable = false) - private Instant completedAt; - - @Column(name = "status", nullable = false, length = 20) - private String status; -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java deleted file mode 100644 index 0e23761ae..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ /dev/null @@ -1,61 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; - -/** JPA entity mapping to the {@code table_operations} table in the optimizer DB. */ -@Entity -@Table(name = "table_operations") -@Getter -@Setter -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class TableOperationsRow { - - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Column(name = "operation_type", nullable = false, length = 50) - private String operationType; - - @Column(name = "status", nullable = false, length = 20) - private String status; - - @Column(name = "created_at") - private Instant createdAt; - - @Column(name = "scheduled_at") - private Instant scheduledAt; - - @Column(name = "job_id", length = 255) - private String jobId; - - /** - * Monotonically-increasing version for application-level optimistic concurrency control. The - * scheduler's CAS transitions (e.g. {@code markScheduling}, {@code markScheduled}) match this - * value in the WHERE clause and bump it by one on UPDATE, ensuring two scheduler instances can't - * both move the same row out of PENDING. Not managed by JPA optimistic locking — kept as a plain - * column so the WHERE-clause-based CAS pattern works portably across MySQL and H2. - */ - @Column(name = "version") - private Long version; -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java deleted file mode 100644 index 6f41881d6..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ /dev/null @@ -1,61 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * Append-only record of per-commit stats reported by the Tables Service. - * - *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers - * query this table to reconstruct change rates over arbitrary time windows. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table( - name = "table_stats_history", - indexes = { - @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableStatsHistoryRow { - - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Column(name = "recorded_at", nullable = false) - private Instant recordedAt; -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java deleted file mode 100644 index bc647d86e..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import java.util.Map; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every - * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table(name = "table_stats") -@Getter -@Setter -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class TableStatsRow { - - @Id - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Type(type = "json") - @Column(name = "table_properties", columnDefinition = "TEXT") - private Map tableProperties; - - @Column(name = "updated_at", nullable = false) - private Instant updatedAt; -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java deleted file mode 100644 index f8fe90b0c..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ /dev/null @@ -1,52 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import java.time.Instant; -import java.util.List; -import org.springframework.data.domain.Pageable; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Repository for reading {@code table_operations_history} in the Analyzer. */ -public interface TableOperationsHistoryRepository - extends JpaRepository { - - /** - * Return history rows matching the given filters, ordered by {@code completedAt} descending. - * Every parameter is optional — pass {@code null} to skip that filter. - */ - @Query( - "SELECT r FROM TableOperationsHistoryRow r " - + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " - + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.completedAt >= :since) " - + "ORDER BY r.completedAt DESC") - List find( - @Param("operationType") String operationType, - @Param("tableUuid") String tableUuid, - @Param("status") String status, - @Param("since") Instant since, - Pageable pageable); - - /** - * Return the most-recent history row per {@code (table_uuid, operation_type)}, filtered to a - * single operation type. Used by the Analyzer to evaluate cadence without materializing every - * historical row. - * - *

The correlated subquery is portable across MySQL and H2 (MySQL mode). Backed by index {@code - * idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at)} on {@code - * table_operations_history}, the subquery becomes an index-only lookup per outer row. - * - *

Ties on {@code completed_at} for the same {@code (table_uuid, operation_type)} return all - * tied rows; callers should dedupe in memory. - */ - @Query( - "SELECT r FROM TableOperationsHistoryRow r " - + "WHERE r.operationType = :operationType " - + "AND r.completedAt = (" - + " SELECT MAX(r2.completedAt) FROM TableOperationsHistoryRow r2 " - + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") - List findLatestPerTable(@Param("operationType") String operationType); -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java deleted file mode 100644 index c7a08cabc..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; -import java.util.List; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ -public interface TableOperationsRepository extends JpaRepository { - - /** - * Return operations matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. - */ - @Query( - "SELECT r FROM TableOperationsRow r " - + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " - + "AND (:status IS NULL OR r.status = :status) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " - + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName)") - List find( - @Param("operationType") String operationType, - @Param("status") String status, - @Param("tableUuid") String tableUuid, - @Param("databaseName") String databaseName, - @Param("tableName") String tableName); -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java deleted file mode 100644 index aaa1b0050..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import java.time.Instant; -import java.util.List; -import org.springframework.data.domain.Pageable; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Append-only repository for per-commit stats history rows. */ -public interface TableStatsHistoryRepository extends JpaRepository { - - /** - * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the - * time filter. - * - * @param tableUuid the stable table UUID - * @param since inclusive lower bound on recorded_at; {@code null} to skip - * @param pageable use {@code PageRequest.of(0, limit)} to cap results - */ - @Query( - "SELECT r FROM TableStatsHistoryRow r " - + "WHERE r.tableUuid = :tableUuid " - + "AND (:since IS NULL OR r.recordedAt >= :since) " - + "ORDER BY r.recordedAt DESC") - List find( - @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java deleted file mode 100644 index 4215237bc..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; -import java.util.List; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Spring Data JPA repository for {@code table_stats} rows in the optimizer DB. */ -public interface TableStatsRepository extends JpaRepository { - - /** - * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. - */ - @Query( - "SELECT r FROM TableStatsRow r " - + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") - List find( - @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid); - - /** - * Return the distinct {@code database_name} values present in {@code table_stats}. Used by the - * Analyzer to enumerate databases when iterating per-db; the result set size is bounded by the - * number of databases (small even at million-table scale). - */ - @Query("SELECT DISTINCT r.databaseName FROM TableStatsRow r") - List findDistinctDatabaseNames(); -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java index 8c0b17462..ddf33a30f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java @@ -1,5 +1,11 @@ package com.linkedin.openhouse.optimizer.api.mapper; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; @@ -14,10 +20,15 @@ * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. * *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. + * + *

Type-conversion helpers bridge the entity's raw String/JSON shape (the entities use Strings at + * the JPA boundary for portability) and the wire DTO's typed enums and nested objects. */ @Mapper(componentModel = "spring") public interface OptimizerMapper { + ObjectMapper JSON = new ObjectMapper(); + /** Map a {@link TableOperationsRow} to its DTO. */ TableOperationsDto toDto(TableOperationsRow row); @@ -29,4 +40,52 @@ public interface OptimizerMapper { /** Map a {@link TableStatsHistoryRow} to its DTO. */ TableStatsHistoryDto toDto(TableStatsHistoryRow row); + + // --- entity String ↔ wire enum/object helpers --- + + default OperationType toOperationType(String value) { + return value == null ? null : OperationType.valueOf(value); + } + + default String fromOperationType(OperationType value) { + return value == null ? null : value.name(); + } + + default OperationStatus toOperationStatus(String value) { + return value == null ? null : OperationStatus.valueOf(value); + } + + default String fromOperationStatus(OperationStatus value) { + return value == null ? null : value.name(); + } + + default HistoryStatus toHistoryStatus(String value) { + return value == null ? null : HistoryStatus.valueOf(value); + } + + default String fromHistoryStatus(HistoryStatus value) { + return value == null ? null : value.name(); + } + + default JobResult toJobResult(String json) { + if (json == null) { + return null; + } + try { + return JSON.readValue(json, JobResult.class); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to parse JobResult JSON from DB", e); + } + } + + default String fromJobResult(JobResult value) { + if (value == null) { + return null; + } + try { + return JSON.writeValueAsString(value); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize JobResult to JSON", e); + } + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 35f7ba782..4f3f6535a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -18,7 +18,7 @@ public class CompleteOperationRequest { /** Outcome of the operation. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Error details on failure; {@code null} on success. */ private JobResult result; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java similarity index 78% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index 791d910a6..2fbcf6235 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Terminal states for a completed Spark maintenance job. */ -public enum OperationHistoryStatus { +public enum HistoryStatus { SUCCESS, FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 2a901ad2b..a7a9d9dc6 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -27,7 +27,7 @@ public class TableOperationsHistoryDto { private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Job ID from the Jobs Service. */ private String jobId; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java deleted file mode 100644 index 64c99061a..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ /dev/null @@ -1,50 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** Combined stats payload stored as a single JSON blob per table. */ -@Data -@Builder(toBuilder = true) -@NoArgsConstructor -@AllArgsConstructor -@JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { - - /** Snapshot fields — overwritten on every upsert. */ - private SnapshotMetrics snapshot; - - /** Delta fields — accumulated across commit events. */ - private CommitDelta delta; - - /** Point-in-time metadata read from Iceberg at scan time. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class SnapshotMetrics { - private String clusterId; - private String tableVersion; - private String tableLocation; - private Long tableSizeBytes; - /** Total number of data files as of the latest snapshot — used for bin-packing. */ - private Long numCurrentFiles; - } - - /** Per-commit incremental counters; accumulated across all recorded commit events. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class CommitDelta { - private Long numFilesAdded; - private Long numFilesDeleted; - private Long addedSizeBytes; - private Long deletedSizeBytes; - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 81dd6b802..4aad1e18f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 4a994fdb3..6d515a543 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 02290bad5..8bb317676 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java deleted file mode 100644 index 4c9bfbe76..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.linkedin.openhouse.optimizer.config; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import java.io.IOException; -import javax.persistence.AttributeConverter; -import javax.persistence.Converter; - -/** JPA {@link AttributeConverter} that serializes {@link JobResult} to/from a JSON string. */ -@Converter -public class JobResultConverter implements AttributeConverter { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @Override - public String convertToDatabaseColumn(JobResult attribute) { - if (attribute == null) { - return null; - } - try { - return OBJECT_MAPPER.writeValueAsString(attribute); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize JobResult to JSON", e); - } - } - - @Override - public JobResult convertToEntityAttribute(String dbData) { - if (dbData == null) { - return null; - } - try { - return OBJECT_MAPPER.readValue(dbData, JobResult.class); - } catch (IOException e) { - throw new IllegalStateException("Failed to deserialize JobResult from JSON: " + dbData, e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index 3b6ced892..09eb7fc21 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -1,52 +1,24 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.config.JobResultConverter; import java.time.Instant; import javax.persistence.Column; -import javax.persistence.Convert; import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; import javax.persistence.Id; -import javax.persistence.Index; import javax.persistence.Table; -import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; -/** - * Append-only record of a completed maintenance operation. - * - *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the - * originating {@code table_operations.id}, tying each history entry back to the operation cycle - * that produced it. Multiple runs of the same operation on the same table produce multiple rows - * (each cycle gets a new UUID from the Analyzer). - */ +/** Lightweight JPA entity for reading {@code table_operations_history} rows. */ @Entity -@Table( - name = "table_operations_history", - indexes = { - @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), - @Index(name = "idx_op_type_hist", columnList = "operation_type"), - @Index(name = "idx_completed_at", columnList = "completed_at"), - @Index(name = "idx_status_hist", columnList = "status"), - @Index(name = "idx_job_id", columnList = "job_id"), - @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") - }) +@Table(name = "table_operations_history") @Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) +@Builder +@NoArgsConstructor +@AllArgsConstructor public class TableOperationsHistoryRow { - /** Same UUID as the originating {@code table_operations.id}. Set by the caller; not generated. */ @Id @Column(name = "id", nullable = false, length = 36) private String id; @@ -60,25 +32,18 @@ public class TableOperationsHistoryRow { @Column(name = "table_name", nullable = false, length = 128) private String tableName; - @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; + private String operationType; - /** When the operation completed, as recorded by the complete endpoint. */ @Column(name = "completed_at", nullable = false) private Instant completedAt; - /** {@code SUCCESS} or {@code FAILED}. */ - @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) - private OperationHistoryStatus status; + private String status; - /** Spark job ID; indexed for job → result lookups. */ @Column(name = "job_id", length = 255) private String jobId; - /** Job result: error details on failure, both fields null on success. */ - @Convert(converter = JobResultConverter.class) - @Column(name = "result") - private JobResult result; + @Column(name = "result", columnDefinition = "TEXT") + private String result; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index 43778495a..0e23761ae 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -1,55 +1,30 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; import java.time.Instant; import javax.persistence.Column; import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; import javax.persistence.Id; -import javax.persistence.Index; import javax.persistence.Table; -import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; +import lombok.Setter; -/** - * JPA entity representing an Analyzer recommendation for a table maintenance operation. - * - *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row - * when it first recommends an operation for a table, or when re-recommending after a prior terminal - * state (SUCCESS/FAILED). Old terminal rows accumulate — they serve as implicit history. {@code - * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). - * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, - * operation_type)} at a time. - */ +/** JPA entity mapping to the {@code table_operations} table in the optimizer DB. */ @Entity -@Table( - name = "table_operations", - indexes = { - @Index(name = "idx_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_op_type", columnList = "operation_type"), - @Index(name = "idx_status", columnList = "status"), - @Index(name = "idx_created_at", columnList = "created_at"), - @Index(name = "idx_scheduled_at", columnList = "scheduled_at") - }) +@Table(name = "table_operations") @Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor public class TableOperationsRow { - /** Client-generated UUID identifying this specific operation recommendation. */ @Id @Column(name = "id", nullable = false, length = 36) private String id; - /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; @@ -59,29 +34,27 @@ public class TableOperationsRow { @Column(name = "table_name", nullable = false, length = 128) private String tableName; - @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; + private String operationType; - @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) - private OperationStatus status; + private String status; - /** When the Analyzer first created this row. Set by the service on insert; never updated. */ - @Column(name = "created_at", nullable = false) + @Column(name = "created_at") private Instant createdAt; - /** Set when the operation is claimed; {@code null} while {@code PENDING}. */ @Column(name = "scheduled_at") private Instant scheduledAt; - /** Job ID returned by the Jobs Service after successful submission. */ @Column(name = "job_id", length = 255) private String jobId; /** - * Manual optimistic lock for the Scheduler claim. Incremented by the raw {@code claimOperation} - * UPDATE query; must NOT use JPA {@code @Version} since the claim bypasses JPA entity management. + * Monotonically-increasing version for application-level optimistic concurrency control. The + * scheduler's CAS transitions (e.g. {@code markScheduling}, {@code markScheduled}) match this + * value in the WHERE clause and bump it by one on UPDATE, ensuring two scheduler instances can't + * both move the same row out of PENDING. Not managed by JPA optimistic locking — kept as a plain + * column so the WHERE-clause-based CAS pattern works portably across MySQL and H2. */ @Column(name = "version") private Long version; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java index b0d92fc81..6f41881d6 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.vladmihalcea.hibernate.type.json.JsonStringType; import java.time.Instant; import javax.persistence.Column; @@ -21,7 +21,7 @@ * Append-only record of per-commit stats reported by the Tables Service. * *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers * query this table to reconstruct change rates over arbitrary time windows. */ @TypeDef(name = "json", typeClass = JsonStringType.class) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index f682a3485..bc647d86e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.vladmihalcea.hibernate.type.json.JsonStringType; import java.time.Instant; import java.util.Map; @@ -8,29 +8,26 @@ import javax.persistence.Entity; import javax.persistence.Id; import javax.persistence.Table; -import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; +import lombok.Setter; import org.hibernate.annotations.Type; import org.hibernate.annotations.TypeDef; /** - * JPA entity representing a per-table stats snapshot in the optimizer DB. - * - *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA - * to enumerate tables and check scheduling eligibility. + * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every + * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. */ @TypeDef(name = "json", typeClass = JsonStringType.class) @Entity @Table(name = "table_stats") @Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor public class TableStatsRow { @Id @@ -51,7 +48,6 @@ public class TableStatsRow { @Column(name = "table_properties", columnDefinition = "TEXT") private Map tableProperties; - /** Set on every upsert. Used for stats pipeline staleness monitoring. */ @Column(name = "updated_at", nullable = false) private Instant updatedAt; } diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 65d62818c..ba2ce35a8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -1,45 +1,40 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import java.time.Instant; import java.util.List; import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; -import org.springframework.stereotype.Repository; -/** - * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is the UUID set by the caller - * (same UUID as the originating {@code table_operations.id}). - */ -@Repository +/** Repository for reading {@code table_operations_history}. */ public interface TableOperationsHistoryRepository extends JpaRepository { /** - * Return history rows matching the given filters, ordered by {@code completedAt} descending. - * Every parameter is optional — pass {@code null} to skip that filter. + * Return history rows for a single {@code tableUuid}, newest first. Used by the service-layer + * {@code getHistory} endpoint. + */ + List findByTableUuidOrderByCompletedAtDesc( + String tableUuid, Pageable pageable); + + /** + * Return the most-recent history row per {@code (table_uuid, operation_type)}, filtered to a + * single operation type. Used by the analyzer to evaluate cadence without materializing every + * historical row. + * + *

The correlated subquery is portable across MySQL and H2 (MySQL mode). Backed by index {@code + * idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at)} on {@code + * table_operations_history}, the subquery becomes an index-only lookup per outer row. + * + *

Ties on {@code completed_at} for the same {@code (table_uuid, operation_type)} return all + * tied rows; callers should dedupe in memory. */ @Query( "SELECT r FROM TableOperationsHistoryRow r " - + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " - + "AND (:operationType IS NULL OR r.operationType = :operationType) " - + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.completedAt >= :since) " - + "AND (:until IS NULL OR r.completedAt <= :until) " - + "ORDER BY r.completedAt DESC") - List find( - @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid, - @Param("operationType") OperationType operationType, - @Param("status") OperationHistoryStatus status, - @Param("since") Instant since, - @Param("until") Instant until, - Pageable pageable); + + "WHERE r.operationType = :operationType " + + "AND r.completedAt = (" + + " SELECT MAX(r2.completedAt) FROM TableOperationsHistoryRow r2 " + + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") + List findLatestPerTable(@Param("operationType") String operationType); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 891322134..c7a08cabc 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -1,33 +1,29 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; -import org.springframework.stereotype.Repository; -/** Repository for {@link TableOperationsRow}. PK is the client-generated UUID {@code id}. */ -@Repository +/** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ public interface TableOperationsRepository extends JpaRepository { /** * Return operations matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. No filters returns all rows. + * to skip that filter. */ @Query( "SELECT r FROM TableOperationsRow r " + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:status IS NULL OR r.status = :status) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + + "AND (:tableName IS NULL OR r.tableName = :tableName)") List find( - @Param("operationType") OperationType operationType, - @Param("status") OperationStatus status, + @Param("operationType") String operationType, + @Param("status") String status, + @Param("tableUuid") String tableUuid, @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid); + @Param("tableName") String tableName); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 9bcaab41b..4215237bc 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -6,12 +6,12 @@ import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; -/** Spring Data JPA repository for reading and writing {@code table_stats} rows. */ +/** Spring Data JPA repository for {@code table_stats} rows in the optimizer DB. */ public interface TableStatsRepository extends JpaRepository { /** * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. No filters returns all rows. + * to skip that filter. */ @Query( "SELECT r FROM TableStatsRow r " @@ -22,4 +22,12 @@ List find( @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); + + /** + * Return the distinct {@code database_name} values present in {@code table_stats}. Used by the + * Analyzer to enumerate databases when iterating per-db; the result set size is bounded by the + * number of databases (small even at million-table scale). + */ + @Query("SELECT DISTINCT r.databaseName FROM TableStatsRow r") + List findDistinctDatabaseNames(); } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index b9735a617..436d08066 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -2,8 +2,7 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; import java.time.Instant; @@ -24,7 +23,7 @@ class TableOperationsHistoryRepositoryTest { @Autowired TableOperationsHistoryRepository repository; @Test - void appendAndFindByTableUuid() { + void findByTableUuid_returnsRowsNewestFirst() { Instant t1 = Instant.parse("2024-01-01T10:00:00Z"); Instant t2 = Instant.parse("2024-01-02T10:00:00Z"); String tableUuid = UUID.randomUUID().toString(); @@ -35,9 +34,9 @@ void appendAndFindByTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(t1) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .jobId("job-001") .build()); @@ -47,46 +46,23 @@ void appendAndFindByTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(t2) - .status(OperationHistoryStatus.FAILED) + .status(HistoryStatus.FAILED.name()) .jobId("job-002") - .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) + .result("{\"errorMessage\":\"out of memory\",\"errorType\":\"OOM\"}") .build()); List rows = - repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); + repository.findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, 10)); assertThat(rows).hasSize(2); - // Newest first assertThat(rows.get(0).getJobId()).isEqualTo("job-002"); assertThat(rows.get(1).getJobId()).isEqualTo("job-001"); } @Test - void appendIsNonDestructive_multipleRunsRetained() { - Instant now = Instant.now(); - String tableUuid = UUID.randomUUID().toString(); - for (int i = 0; i < 3; i++) { - repository.save( - TableOperationsHistoryRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(tableUuid) - .databaseName("db1") - .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .completedAt(now.plusSeconds(i)) - .status(OperationHistoryStatus.SUCCESS) - .build()); - } - - List rows = - repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); - assertThat(rows).hasSize(3); - } - - @Test - void find_respectsLimit() { + void findByTableUuid_respectsLimit() { Instant now = Instant.now(); String tableUuid = UUID.randomUUID().toString(); for (int i = 0; i < 5; i++) { @@ -96,97 +72,62 @@ void find_respectsLimit() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl3") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(now.plusSeconds(i)) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .build()); } List rows = - repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 3)); + repository.findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @Test - void find_noParams_returnsAll() { - Instant now = Instant.now(); - String uuid1 = UUID.randomUUID().toString(); - String uuid2 = UUID.randomUUID().toString(); + void findLatestPerTable_returnsOneRowPerTableUuid() { + Instant t1 = Instant.parse("2024-01-01T10:00:00Z"); + Instant t2 = Instant.parse("2024-02-01T10:00:00Z"); + String tableUuid = UUID.randomUUID().toString(); + String otherUuid = UUID.randomUUID().toString(); repository.save( TableOperationsHistoryRow.builder() .id(UUID.randomUUID().toString()) - .tableUuid(uuid1) + .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .completedAt(now) - .status(OperationHistoryStatus.SUCCESS) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .completedAt(t1) + .status(HistoryStatus.SUCCESS.name()) .build()); - repository.save( - TableOperationsHistoryRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(uuid2) - .databaseName("db2") - .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .completedAt(now.plusSeconds(1)) - .status(OperationHistoryStatus.FAILED) - .build()); - - List rows = - repository.find(null, null, null, null, null, null, null, PageRequest.of(0, 100)); - assertThat(rows).hasSize(2); - // Newest first - assertThat(rows.get(0).getStatus()).isEqualTo(OperationHistoryStatus.FAILED); - } - - @Test - void find_byStatusAndTimeWindow() { - Instant old = Instant.parse("2024-01-01T00:00:00Z"); - Instant recent = Instant.parse("2024-06-01T00:00:00Z"); - String tableUuid = UUID.randomUUID().toString(); - repository.save( TableOperationsHistoryRow.builder() .id(UUID.randomUUID().toString()) .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .completedAt(old) - .status(OperationHistoryStatus.SUCCESS) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .completedAt(t2) + .status(HistoryStatus.FAILED.name()) .build()); repository.save( TableOperationsHistoryRow.builder() .id(UUID.randomUUID().toString()) - .tableUuid(tableUuid) + .tableUuid(otherUuid) .databaseName("db1") - .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .completedAt(recent) - .status(OperationHistoryStatus.FAILED) + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .completedAt(t1) + .status(HistoryStatus.SUCCESS.name()) .build()); - // Filter by status - List failed = - repository.find( - null, - null, - null, - null, - OperationHistoryStatus.FAILED, - null, - null, - PageRequest.of(0, 100)); - assertThat(failed).hasSize(1); - assertThat(failed.get(0).getCompletedAt()).isEqualTo(recent); + List latest = + repository.findLatestPerTable(OperationType.ORPHAN_FILES_DELETION.name()); - // Filter by time window - Instant cutoff = Instant.parse("2024-03-01T00:00:00Z"); - List afterCutoff = - repository.find(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); - assertThat(afterCutoff).hasSize(1); - assertThat(afterCutoff.get(0).getCompletedAt()).isEqualTo(recent); + assertThat(latest).hasSize(2); + TableOperationsHistoryRow forTarget = + latest.stream().filter(r -> r.getTableUuid().equals(tableUuid)).findFirst().orElseThrow(); + assertThat(forTarget.getCompletedAt()).isEqualTo(t2); + assertThat(forTarget.getStatus()).isEqualTo(HistoryStatus.FAILED.name()); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index b1342b12d..2ca8dc61e 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -32,8 +32,8 @@ void saveAndFindById() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build(); @@ -41,7 +41,7 @@ void saveAndFindById() { Optional found = repository.findById(id); assertThat(found).isPresent(); - assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING); + assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING.name()); } @Test @@ -52,8 +52,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); repository.save( @@ -62,8 +62,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.SCHEDULED.name()) .createdAt(Instant.now()) .build()); @@ -79,8 +79,8 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); repository.save( @@ -89,20 +89,20 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.SCHEDULED.name()) .createdAt(Instant.now()) .build()); List pending = - repository.find(null, OperationStatus.PENDING, null, null, null); + repository.find(null, OperationStatus.PENDING.name(), null, null, null); assertThat(pending).hasSize(1); - assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); + assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING.name()); List scheduled = - repository.find(null, OperationStatus.SCHEDULED, null, null, null); + repository.find(null, OperationStatus.SCHEDULED.name(), null, null, null); assertThat(scheduled).hasSize(1); - assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); + assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED.name()); } @Test @@ -113,8 +113,8 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); repository.save( @@ -123,13 +123,13 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db2") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); - assertThat(repository.find(null, null, "db1", null, null)).hasSize(1); - assertThat(repository.find(null, null, "db2", "tbl2", null)).hasSize(1); - assertThat(repository.find(null, null, "db1", "tbl2", null)).isEmpty(); + assertThat(repository.find(null, null, null, "db1", null)).hasSize(1); + assertThat(repository.find(null, null, null, "db2", "tbl2")).hasSize(1); + assertThat(repository.find(null, null, null, "db1", "tbl2")).isEmpty(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index f3e72b52e..475196630 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.List; diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index b62371f53..240d512ef 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import java.util.Optional; diff --git a/settings.gradle b/settings.gradle index 0d64dad53..cad06785e 100644 --- a/settings.gradle +++ b/settings.gradle @@ -50,7 +50,6 @@ include ':services:common' include ':services:housetables' include ':services:jobs' include ':services:optimizer' -include ':apps:optimizer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From 9a129a8ca7848bf1db15dbf05f847bc7c593e8ef Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:00:04 -0700 Subject: [PATCH 18/55] =?UTF-8?q?refactor(optimizer):=20align=20data=20mod?= =?UTF-8?q?el=20=E2=80=94=20rename=20HistoryStatus;=20String=20at=20JPA=20?= =?UTF-8?q?boundary?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Forward fix in response to review feedback that data-model decisions belong in this PR (optimizer-0), not in downstream stack layers. Brings the data-model end-state to where optimizer-1+ already are, so the optimizer-0..optimizer-1 diff is just repositories + wiring. - Rename api/model/OperationHistoryStatus → HistoryStatus. - Move api/model/TableStats → model/TableStats (the in-memory stats domain type is used by both entities and DTOs, so it lives in a neutral package rather than under api/model/). - Delete config/JobResultConverter. Entities now store the JobResult as a JSON String column directly; serialization happens at the wire-API boundary via OptimizerMapper helpers. - Switch the operation/status columns on TableOperationsRow and TableOperationsHistoryRow from JPA-bound enums to String. Keeps the entity layer decoupled from wire-API enum identity. - Add String↔OperationType, String↔OperationStatus, String↔HistoryStatus, and String↔JobResult default helpers to OptimizerMapper so MapStruct can bridge entity (String) and DTO (typed) columns. - Update DTO/entity imports to follow the renamed/moved types. --- .../optimizer/api/mapper/OptimizerMapper.java | 60 +++++++++++++++++++ .../api/model/CompleteOperationRequest.java | 2 +- ...nHistoryStatus.java => HistoryStatus.java} | 2 +- .../api/model/TableOperationsHistoryDto.java | 2 +- .../optimizer/api/model/TableStatsDto.java | 1 + .../api/model/TableStatsHistoryDto.java | 1 + .../api/model/UpsertTableStatsRequest.java | 1 + .../optimizer/config/JobResultConverter.java | 39 ------------ .../entity/TableOperationsHistoryRow.java | 25 ++++---- .../optimizer/entity/TableOperationsRow.java | 15 +++-- .../entity/TableStatsHistoryRow.java | 2 +- .../optimizer/entity/TableStatsRow.java | 2 +- .../optimizer/{api => }/model/TableStats.java | 2 +- 13 files changed, 86 insertions(+), 68 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationHistoryStatus.java => HistoryStatus.java} (78%) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/{api => }/model/TableStats.java (96%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java index 8c0b17462..36d4b5f4b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java @@ -1,5 +1,11 @@ package com.linkedin.openhouse.optimizer.api.mapper; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; @@ -14,10 +20,16 @@ * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. * *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. + * + *

Type-conversion helpers bridge the entity's raw String/JSON shape (the entities keep enum and + * structured-result columns as Strings to stay decoupled from wire-API identity) and the wire DTO's + * typed enums and nested objects. */ @Mapper(componentModel = "spring") public interface OptimizerMapper { + ObjectMapper JSON = new ObjectMapper(); + /** Map a {@link TableOperationsRow} to its DTO. */ TableOperationsDto toDto(TableOperationsRow row); @@ -29,4 +41,52 @@ public interface OptimizerMapper { /** Map a {@link TableStatsHistoryRow} to its DTO. */ TableStatsHistoryDto toDto(TableStatsHistoryRow row); + + // --- entity String ↔ wire enum/object helpers --- + + default OperationType toOperationType(String value) { + return value == null ? null : OperationType.valueOf(value); + } + + default String fromOperationType(OperationType value) { + return value == null ? null : value.name(); + } + + default OperationStatus toOperationStatus(String value) { + return value == null ? null : OperationStatus.valueOf(value); + } + + default String fromOperationStatus(OperationStatus value) { + return value == null ? null : value.name(); + } + + default HistoryStatus toHistoryStatus(String value) { + return value == null ? null : HistoryStatus.valueOf(value); + } + + default String fromHistoryStatus(HistoryStatus value) { + return value == null ? null : value.name(); + } + + default JobResult toJobResult(String json) { + if (json == null) { + return null; + } + try { + return JSON.readValue(json, JobResult.class); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to parse JobResult JSON from DB", e); + } + } + + default String fromJobResult(JobResult value) { + if (value == null) { + return null; + } + try { + return JSON.writeValueAsString(value); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize JobResult to JSON", e); + } + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 35f7ba782..4f3f6535a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -18,7 +18,7 @@ public class CompleteOperationRequest { /** Outcome of the operation. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Error details on failure; {@code null} on success. */ private JobResult result; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java similarity index 78% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index 791d910a6..2fbcf6235 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Terminal states for a completed Spark maintenance job. */ -public enum OperationHistoryStatus { +public enum HistoryStatus { SUCCESS, FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 2a901ad2b..a7a9d9dc6 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -27,7 +27,7 @@ public class TableOperationsHistoryDto { private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Job ID from the Jobs Service. */ private String jobId; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 81dd6b802..4aad1e18f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 4a994fdb3..6d515a543 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 02290bad5..8bb317676 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java deleted file mode 100644 index 4c9bfbe76..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.linkedin.openhouse.optimizer.config; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import java.io.IOException; -import javax.persistence.AttributeConverter; -import javax.persistence.Converter; - -/** JPA {@link AttributeConverter} that serializes {@link JobResult} to/from a JSON string. */ -@Converter -public class JobResultConverter implements AttributeConverter { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @Override - public String convertToDatabaseColumn(JobResult attribute) { - if (attribute == null) { - return null; - } - try { - return OBJECT_MAPPER.writeValueAsString(attribute); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize JobResult to JSON", e); - } - } - - @Override - public JobResult convertToEntityAttribute(String dbData) { - if (dbData == null) { - return null; - } - try { - return OBJECT_MAPPER.readValue(dbData, JobResult.class); - } catch (IOException e) { - throw new IllegalStateException("Failed to deserialize JobResult from JSON: " + dbData, e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index 3b6ced892..8303a4579 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -1,15 +1,8 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.config.JobResultConverter; import java.time.Instant; import javax.persistence.Column; -import javax.persistence.Convert; import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; import javax.persistence.Id; import javax.persistence.Index; import javax.persistence.Table; @@ -27,6 +20,11 @@ * originating {@code table_operations.id}, tying each history entry back to the operation cycle * that produced it. Multiple runs of the same operation on the same table produce multiple rows * (each cycle gets a new UUID from the Analyzer). + * + *

{@code operationType}, {@code status}, and {@code result} are stored as plain {@code String} + * (the last as a JSON blob) so the entity layer stays decoupled from the wire-API enum and + * structured-result types. The wire layer is responsible for converting at the boundary via {@link + * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. */ @Entity @Table( @@ -60,25 +58,22 @@ public class TableOperationsHistoryRow { @Column(name = "table_name", nullable = false, length = 128) private String tableName; - @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; + private String operationType; /** When the operation completed, as recorded by the complete endpoint. */ @Column(name = "completed_at", nullable = false) private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) - private OperationHistoryStatus status; + private String status; /** Spark job ID; indexed for job → result lookups. */ @Column(name = "job_id", length = 255) private String jobId; - /** Job result: error details on failure, both fields null on success. */ - @Convert(converter = JobResultConverter.class) - @Column(name = "result") - private JobResult result; + /** Job result JSON blob: error details on failure, both fields null on success. */ + @Column(name = "result", columnDefinition = "TEXT") + private String result; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index 43778495a..5d90f3d12 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -1,12 +1,8 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; import java.time.Instant; import javax.persistence.Column; import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; import javax.persistence.Id; import javax.persistence.Index; import javax.persistence.Table; @@ -26,6 +22,11 @@ * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, * operation_type)} at a time. + * + *

{@code operationType} and {@code status} are stored as {@code String} rather than JPA-bound + * enums so the entity layer stays decoupled from the wire-API enum identity. The wire layer is + * responsible for converting at the boundary via {@link + * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. */ @Entity @Table( @@ -59,13 +60,11 @@ public class TableOperationsRow { @Column(name = "table_name", nullable = false, length = 128) private String tableName; - @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; + private String operationType; - @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) - private OperationStatus status; + private String status; /** When the Analyzer first created this row. Set by the service on insert; never updated. */ @Column(name = "created_at", nullable = false) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java index b0d92fc81..6ead5e42c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.vladmihalcea.hibernate.type.json.JsonStringType; import java.time.Instant; import javax.persistence.Column; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index f682a3485..2a1414567 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.vladmihalcea.hibernate.type.json.JsonStringType; import java.time.Instant; import java.util.Map; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 64c99061a..3b56196ea 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.model; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.AllArgsConstructor; From dfb910291443bcfe4b6adfb724808dadcb0c8c5a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:07:53 -0700 Subject: [PATCH 19/55] refactor(optimizer): realign entity shapes with optimizer-0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit R7-1 imported the looser apps-side variant of TableStatsRow, TableStatsHistoryRow, and model/TableStats into the services-side paths, regressing the locked-down shape that optimizer-0 had. R8-1's git mv carried the regression forward. This commit makes optimizer-1's HEAD match optimizer-0's canonical shape so the optimizer-0..optimizer-1 diff no longer shows ghost model edits. - TableStatsRow: restore @EqualsAndHashCode, AccessLevel.PROTECTED on NoArgsConstructor + AllArgsConstructor, and toBuilder=true on @Builder. Drop @Setter (no callers — repo tests and downstream consumers use the builder). - TableStatsHistoryRow: restore the dropped "can" in the javadoc. - model/TableStats: restore @JsonIgnoreProperties(ignoreUnknown = true) on the outer class + both inner classes, and restore the CommitDelta.addedSizeBytes field that R7-1 dropped. --- .../optimizer/entity/TableStatsHistoryRow.java | 2 +- .../optimizer/entity/TableStatsRow.java | 18 +++++++++++------- .../openhouse/optimizer/model/TableStats.java | 9 +++++++-- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java index 6f41881d6..6ead5e42c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -21,7 +21,7 @@ * Append-only record of per-commit stats reported by the Tables Service. * *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can * query this table to reconstruct change rates over arbitrary time windows. */ @TypeDef(name = "json", typeClass = JsonStringType.class) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index bc647d86e..2a1414567 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -8,26 +8,29 @@ import javax.persistence.Entity; import javax.persistence.Id; import javax.persistence.Table; +import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; +import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; -import lombok.Setter; import org.hibernate.annotations.Type; import org.hibernate.annotations.TypeDef; /** - * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every - * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. + * JPA entity representing a per-table stats snapshot in the optimizer DB. + * + *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA + * to enumerate tables and check scheduling eligibility. */ @TypeDef(name = "json", typeClass = JsonStringType.class) @Entity @Table(name = "table_stats") @Getter -@Setter -@Builder -@NoArgsConstructor -@AllArgsConstructor +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) public class TableStatsRow { @Id @@ -48,6 +51,7 @@ public class TableStatsRow { @Column(name = "table_properties", columnDefinition = "TEXT") private Map tableProperties; + /** Set on every upsert. Used for stats pipeline staleness monitoring. */ @Column(name = "updated_at", nullable = false) private Instant updatedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 5e0f51468..3b56196ea 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -1,15 +1,17 @@ package com.linkedin.openhouse.optimizer.model; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; -/** Combined stats payload stored as a single JSON blob per table in {@code table_stats}. */ +/** Combined stats payload stored as a single JSON blob per table. */ @Data @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) public class TableStats { /** Snapshot fields — overwritten on every upsert. */ @@ -23,6 +25,7 @@ public class TableStats { @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { private String clusterId; private String tableVersion; @@ -32,14 +35,16 @@ public static class SnapshotMetrics { private Long numCurrentFiles; } - /** Per-commit incremental counters accumulated across all recorded commit events. */ + /** Per-commit incremental counters; accumulated across all recorded commit events. */ @Data @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) public static class CommitDelta { private Long numFilesAdded; private Long numFilesDeleted; + private Long addedSizeBytes; private Long deletedSizeBytes; } } From 681407ef6a1a1d2dc34dee2a4ca308c5d008ca3f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:18:39 -0700 Subject: [PATCH 20/55] feat(optimizer): add internal model layer Per-layer types: wire-API enums (api/model/), DB-side String at JPA boundary, and an internal in-memory model layer that is what the analyzer and scheduler operate on. The wire and DB sides convert at their boundary; consumers of the optimizer library work in the internal types. - model/HistoryStatus, model/OperationStatus, model/OperationType: internal enums mirroring the wire-API counterparts. Decoupled so the analyzer/scheduler can evolve their state machines without churning the wire or DB shapes. - model/Table: an OpenHouse table enriched with stats + properties. Built from a TableStatsRow. - model/TableOperation: analyzer's decision-to-schedule + scheduler's unit of work. Constructed from TableOperationsRow or from a Table; converts back via toRow(). --- .../optimizer/model/HistoryStatus.java | 13 +++ .../optimizer/model/OperationStatus.java | 15 +++ .../optimizer/model/OperationType.java | 10 ++ .../openhouse/optimizer/model/Table.java | 41 +++++++ .../optimizer/model/TableOperation.java | 106 ++++++++++++++++++ 5 files changed, 185 insertions(+) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java new file mode 100644 index 000000000..d29c88719 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal lifecycle outcomes for a completed operation. Mirrors the values written to {@code + * table_operations_history.status}; parsed at the boundary so callers switch on a typed value + * instead of comparing strings. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum HistoryStatus { + SUCCESS, + FAILED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java new file mode 100644 index 000000000..66f213c73 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java @@ -0,0 +1,15 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal lifecycle states for an operation. The analyzer writes {@link #PENDING}; the scheduler + * transitions through {@link #SCHEDULING} and {@link #SCHEDULED}. {@link #CANCELED} marks + * deduplicated PENDING rows. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum OperationStatus { + PENDING, + SCHEDULING, + SCHEDULED, + CANCELED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java new file mode 100644 index 000000000..bea44018b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -0,0 +1,10 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal enum for the operation types the analyzer and scheduler know about. Intentionally + * separate from the wire-API and DB representations so the internal model can evolve its set of + * supported operations without churning either boundary. + */ +public enum OperationType { + ORPHAN_FILES_DELETION +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java new file mode 100644 index 000000000..e232803dd --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -0,0 +1,41 @@ +package com.linkedin.openhouse.optimizer.model; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.Collections; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An OpenHouse table enriched with stats and properties, built by combining data sources. Consumed + * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads + * stats for bin-packing). + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class Table { + + private String tableUuid; + private String databaseName; + private String tableId; + + @Builder.Default private Map tableProperties = Collections.emptyMap(); + + private TableStats stats; + + /** Build a {@code Table} from a {@code table_stats} row. */ + public static Table from(TableStatsRow row) { + return Table.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableId(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + .stats(row.getStats()) + .build(); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java new file mode 100644 index 000000000..d49625a57 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -0,0 +1,106 @@ +package com.linkedin.openhouse.optimizer.model; + +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import java.time.Instant; +import java.util.Comparator; +import java.util.UUID; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks + * up and submits. Built either from an existing {@link TableOperationsRow} (when loading current + * state) or from a {@link Table} (when creating a new PENDING operation). Converts back to a JPA + * row via {@link #toRow()}. + * + *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., + * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not + * carry it. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperation { + + /** Unique operation ID (UUID). */ + private String id; + + /** The table this operation targets. */ + private String tableUuid; + + /** Database name. */ + private String databaseName; + + /** Table name. */ + private String tableName; + + /** Operation type. */ + private OperationType operationType; + + /** Current lifecycle status. */ + private OperationStatus status; + + /** When this operation record was created. */ + private Instant createdAt; + + /** When the scheduler last submitted a job for this operation. */ + private Instant scheduledAt; + + /** + * Number of current data files on the table at evaluation time. Non-persisted enrichment; + * populated by consumers that need it. Null when not enriched. + */ + private Long fileCount; + + /** Build a {@code TableOperation} from an existing JPA row. */ + public static TableOperation from(TableOperationsRow row) { + return TableOperation.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.valueOf(row.getOperationType())) + .status(OperationStatus.valueOf(row.getStatus())) + .createdAt(row.getCreatedAt()) + .scheduledAt(row.getScheduledAt()) + .build(); + } + + /** Create a new PENDING operation for the given table and operation type. */ + public static TableOperation pending(Table table, OperationType operationType) { + return TableOperation.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(table.getTableUuid()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .operationType(operationType) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + } + + /** Convert to a JPA entity for persistence. */ + public TableOperationsRow toRow() { + return TableOperationsRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType.name()) + .status(status.name()) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .version(0L) + .build(); + } + + /** Return the more recently created of two operations. */ + public static TableOperation mostRecent(TableOperation a, TableOperation b) { + Comparator byCreatedAt = + Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); + return byCreatedAt.compare(a, b) >= 0 ? a : b; + } +} From e3fb7770613e8635bb4f68bded2945e1845d7510 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:26:51 -0700 Subject: [PATCH 21/55] perf(optimizer): index table_operations_history for findLatestPerTable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add idx_toph_optype_uuid_completed on table_operations_history. Drives the correlated MAX(completed_at) subquery in TableOperationsHistoryRepository.findLatestPerTable (introduced in optimizer-1), turning it into an index-only lookup per (operation_type, table_uuid) instead of an O(N²) scan. Lands with the schema in optimizer-0 since the index is part of the data model definition; the query that depends on it lands with the repository in optimizer-1. --- .../optimizer/src/main/resources/db/optimizer-schema.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 4c2d9604b..322f3bf92 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -48,5 +48,9 @@ CREATE TABLE IF NOT EXISTS table_operations_history ( job_id VARCHAR(255), result TEXT, PRIMARY KEY (id), - INDEX idx_toph_db_table (database_name, table_name) + INDEX idx_toph_db_table (database_name, table_name), + -- Drives TableOperationHistoryRepository.findLatestPerTable: the correlated + -- MAX(completed_at) subquery becomes an index-only lookup per (operation_type, + -- table_uuid) instead of an O(N²) scan. + INDEX idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) ); From d3e17262f5ec8b0e97b54d8312da746278680a6f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:11:11 -0700 Subject: [PATCH 22/55] refactor(optimizer): enforce layer boundaries in api/ + model/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make data types in api/ and model/ self-contained — no cross-layer imports between them and no references into the DB layer. The internal model layer owns conversion to the api edge via a new mapper sub-package. api/ changes: - Add api/model/TableStats (api-layer copy of the JSON payload). - Drop cross-layer imports from TableStatsDto, TableStatsHistoryDto, UpsertTableStatsRequest; they pick up TableStats from the same package. model/ changes: - Add model/JobResult (internal copy of the result payload). - Add model/TableOperationsHistory (internal container mirroring the history-row field set in typed form). - Remove cross-layer factory methods Table.from(TableStatsRow), TableOperation.from(TableOperationsRow), and TableOperation.toRow(). Construction at the DB boundary moves to a future model/mapper/ ModelDbMapper that ships with the db/ rename on optimizer-1. - Add model/mapper/ApiModelMapper — converts api/ DTOs ↔ model/ types. Only place inside model/ where api/ types appear. Per-PR ownership: - api/ and model/ live on this PR. - db/ (currently entity/) and its boundary-side mapper (model/mapper/ModelDbMapper) land on optimizer-1. - The existing api/mapper/OptimizerMapper still references entity/ on this branch; it gets retired on optimizer-2 once the service routes through the new mappers. --- .../optimizer/api/model/TableStats.java | 55 ++++ .../optimizer/api/model/TableStatsDto.java | 1 - .../api/model/TableStatsHistoryDto.java | 1 - .../api/model/UpsertTableStatsRequest.java | 1 - .../openhouse/optimizer/model/JobResult.java | 25 ++ .../openhouse/optimizer/model/Table.java | 16 +- .../optimizer/model/TableOperation.java | 38 +-- .../model/TableOperationsHistory.java | 47 ++++ .../model/mapper/ApiModelMapper.java | 234 ++++++++++++++++++ 9 files changed, 369 insertions(+), 49 deletions(-) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java new file mode 100644 index 000000000..de268ffe7 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Combined stats payload exposed on the optimizer wire API. + * + *

API-layer copy of the stats payload — self-contained, evolved only when the wire contract + * changes. + */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters; accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long addedSizeBytes; + private Long deletedSizeBytes; + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 4aad1e18f..81dd6b802 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 6d515a543..4a994fdb3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 8bb317676..02290bad5 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java new file mode 100644 index 000000000..7e48dd0ef --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Internal-model result payload for a completed Spark maintenance job. + * + *

Internal-layer copy of the structured result. Both fields are {@code null} on success; + * populated on failure. Intentionally separate from the wire-API and DB representations. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class JobResult { + + /** Human-readable error message; {@code null} if the job succeeded. */ + private String errorMessage; + + /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ + private String errorType; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index e232803dd..c8bede225 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.model; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; @@ -12,6 +11,9 @@ * An OpenHouse table enriched with stats and properties, built by combining data sources. Consumed * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads * stats for bin-packing). + * + *

Pure internal-model type — no references to wire-API or DB types. Construct via {@link + * com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper#toTable} at the DB boundary. */ @Data @Builder @@ -26,16 +28,4 @@ public class Table { @Builder.Default private Map tableProperties = Collections.emptyMap(); private TableStats stats; - - /** Build a {@code Table} from a {@code table_stats} row. */ - public static Table from(TableStatsRow row) { - return Table.builder() - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableId(row.getTableName()) - .tableProperties( - row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) - .stats(row.getStats()) - .build(); - } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java index d49625a57..1f14dddff 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.model; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import java.time.Instant; import java.util.Comparator; import java.util.UUID; @@ -11,9 +10,11 @@ /** * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks - * up and submits. Built either from an existing {@link TableOperationsRow} (when loading current - * state) or from a {@link Table} (when creating a new PENDING operation). Converts back to a JPA - * row via {@link #toRow()}. + * up and submits. + * + *

Pure internal-model type — no references to wire-API or DB types. Cross-layer construction + * happens via {@link com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper} (DB boundary) or + * {@link com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper} (API boundary). * *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not @@ -55,20 +56,6 @@ public class TableOperation { */ private Long fileCount; - /** Build a {@code TableOperation} from an existing JPA row. */ - public static TableOperation from(TableOperationsRow row) { - return TableOperation.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .operationType(OperationType.valueOf(row.getOperationType())) - .status(OperationStatus.valueOf(row.getStatus())) - .createdAt(row.getCreatedAt()) - .scheduledAt(row.getScheduledAt()) - .build(); - } - /** Create a new PENDING operation for the given table and operation type. */ public static TableOperation pending(Table table, OperationType operationType) { return TableOperation.builder() @@ -82,21 +69,6 @@ public static TableOperation pending(Table table, OperationType operationType) { .build(); } - /** Convert to a JPA entity for persistence. */ - public TableOperationsRow toRow() { - return TableOperationsRow.builder() - .id(id) - .tableUuid(tableUuid) - .databaseName(databaseName) - .tableName(tableName) - .operationType(operationType.name()) - .status(status.name()) - .createdAt(createdAt) - .scheduledAt(scheduledAt) - .version(0L) - .build(); - } - /** Return the more recently created of two operations. */ public static TableOperation mostRecent(TableOperation a, TableOperation b) { Comparator byCreatedAt = diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java new file mode 100644 index 000000000..64e0d57b3 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -0,0 +1,47 @@ +package com.linkedin.openhouse.optimizer.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Internal-model view of a completed operation history record. + * + *

Mirrors the field set of the underlying history row but in internal types only. Used by + * components that need to reason about completed operations (e.g., scheduling-cadence analyzers). + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationsHistory { + + /** Same UUID as the originating live-operations row. */ + private String id; + + /** Stable table identity from the Tables Service. */ + private String tableUuid; + + /** Denormalized database name. */ + private String databaseName; + + /** Denormalized table name. */ + private String tableName; + + /** Operation type for this completed run. */ + private OperationType operationType; + + /** When the operation completed, as recorded by the complete endpoint. */ + private Instant completedAt; + + /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ + private HistoryStatus status; + + /** Spark job ID for the run that produced this record. */ + private String jobId; + + /** Job result payload; both inner fields {@code null} on success. */ + private JobResult result; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java new file mode 100644 index 000000000..2ae477e0d --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -0,0 +1,234 @@ +package com.linkedin.openhouse.optimizer.model.mapper; + +import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.model.JobResult; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStats; +import org.springframework.stereotype.Component; + +/** + * Converts between wire-API DTOs and internal {@code model/} domain objects. + * + *

The only place inside {@code model/} where {@code api/} types are referenced — this is the + * boundary at which the internal model meets the wire-API. Pure data types under {@code model/} + * stay free of any api-side imports. + * + *

API-layer enums + payloads are intentionally separate Java types from the internal-model + * counterparts; the two sides evolve independently. This mapper translates by name. + */ +@Component +public class ApiModelMapper { + + // --- TableOperationsDto <-> TableOperation --- + + public TableOperation toOperation(TableOperationsDto dto) { + if (dto == null) { + return null; + } + return TableOperation.builder() + .id(dto.getId()) + .tableUuid(dto.getTableUuid()) + .databaseName(dto.getDatabaseName()) + .tableName(dto.getTableName()) + .operationType(toModelOperationType(dto.getOperationType())) + .status(toModelOperationStatus(dto.getStatus())) + .createdAt(dto.getCreatedAt()) + .scheduledAt(dto.getScheduledAt()) + .build(); + } + + public TableOperationsDto toDto(TableOperation op) { + if (op == null) { + return null; + } + return TableOperationsDto.builder() + .id(op.getId()) + .tableUuid(op.getTableUuid()) + .databaseName(op.getDatabaseName()) + .tableName(op.getTableName()) + .operationType(toApiOperationType(op.getOperationType())) + .status(toApiOperationStatus(op.getStatus())) + .createdAt(op.getCreatedAt()) + .scheduledAt(op.getScheduledAt()) + .build(); + } + + // --- TableOperationsHistoryDto <-> TableOperationsHistory --- + + public TableOperationsHistory toHistory(TableOperationsHistoryDto dto) { + if (dto == null) { + return null; + } + return TableOperationsHistory.builder() + .id(dto.getId()) + .tableUuid(dto.getTableUuid()) + .databaseName(dto.getDatabaseName()) + .tableName(dto.getTableName()) + .operationType(toModelOperationType(dto.getOperationType())) + .completedAt(dto.getCompletedAt()) + .status(toModelHistoryStatus(dto.getStatus())) + .jobId(dto.getJobId()) + .result(toModelJobResult(dto.getResult())) + .build(); + } + + public TableOperationsHistoryDto toDto(TableOperationsHistory history) { + if (history == null) { + return null; + } + return TableOperationsHistoryDto.builder() + .id(history.getId()) + .tableUuid(history.getTableUuid()) + .databaseName(history.getDatabaseName()) + .tableName(history.getTableName()) + .operationType(toApiOperationType(history.getOperationType())) + .completedAt(history.getCompletedAt()) + .status(toApiHistoryStatus(history.getStatus())) + .jobId(history.getJobId()) + .result(toApiJobResult(history.getResult())) + .build(); + } + + // --- TableStats payload --- + + public TableStats toModelStats(com.linkedin.openhouse.optimizer.api.model.TableStats apiStats) { + if (apiStats == null) { + return null; + } + return TableStats.builder() + .snapshot(toModelSnapshot(apiStats.getSnapshot())) + .delta(toModelDelta(apiStats.getDelta())) + .build(); + } + + public com.linkedin.openhouse.optimizer.api.model.TableStats toApiStats(TableStats modelStats) { + if (modelStats == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.TableStats.builder() + .snapshot(toApiSnapshot(modelStats.getSnapshot())) + .delta(toApiDelta(modelStats.getDelta())) + .build(); + } + + // --- enum helpers --- + + public OperationType toModelOperationType( + com.linkedin.openhouse.optimizer.api.model.OperationType apiValue) { + return apiValue == null ? null : OperationType.valueOf(apiValue.name()); + } + + public com.linkedin.openhouse.optimizer.api.model.OperationType toApiOperationType( + OperationType modelValue) { + return modelValue == null + ? null + : com.linkedin.openhouse.optimizer.api.model.OperationType.valueOf(modelValue.name()); + } + + public OperationStatus toModelOperationStatus( + com.linkedin.openhouse.optimizer.api.model.OperationStatus apiValue) { + return apiValue == null ? null : OperationStatus.valueOf(apiValue.name()); + } + + public com.linkedin.openhouse.optimizer.api.model.OperationStatus toApiOperationStatus( + OperationStatus modelValue) { + return modelValue == null + ? null + : com.linkedin.openhouse.optimizer.api.model.OperationStatus.valueOf(modelValue.name()); + } + + public HistoryStatus toModelHistoryStatus( + com.linkedin.openhouse.optimizer.api.model.HistoryStatus apiValue) { + return apiValue == null ? null : HistoryStatus.valueOf(apiValue.name()); + } + + public com.linkedin.openhouse.optimizer.api.model.HistoryStatus toApiHistoryStatus( + HistoryStatus modelValue) { + return modelValue == null + ? null + : com.linkedin.openhouse.optimizer.api.model.HistoryStatus.valueOf(modelValue.name()); + } + + // --- JobResult --- + + public JobResult toModelJobResult(com.linkedin.openhouse.optimizer.api.model.JobResult apiValue) { + if (apiValue == null) { + return null; + } + return JobResult.builder() + .errorMessage(apiValue.getErrorMessage()) + .errorType(apiValue.getErrorType()) + .build(); + } + + public com.linkedin.openhouse.optimizer.api.model.JobResult toApiJobResult(JobResult modelValue) { + if (modelValue == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.JobResult.builder() + .errorMessage(modelValue.getErrorMessage()) + .errorType(modelValue.getErrorType()) + .build(); + } + + // --- TableStats inner classes --- + + private TableStats.SnapshotMetrics toModelSnapshot( + com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics apiValue) { + if (apiValue == null) { + return null; + } + return TableStats.SnapshotMetrics.builder() + .clusterId(apiValue.getClusterId()) + .tableVersion(apiValue.getTableVersion()) + .tableLocation(apiValue.getTableLocation()) + .tableSizeBytes(apiValue.getTableSizeBytes()) + .numCurrentFiles(apiValue.getNumCurrentFiles()) + .build(); + } + + private com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics toApiSnapshot( + TableStats.SnapshotMetrics modelValue) { + if (modelValue == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics.builder() + .clusterId(modelValue.getClusterId()) + .tableVersion(modelValue.getTableVersion()) + .tableLocation(modelValue.getTableLocation()) + .tableSizeBytes(modelValue.getTableSizeBytes()) + .numCurrentFiles(modelValue.getNumCurrentFiles()) + .build(); + } + + private TableStats.CommitDelta toModelDelta( + com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta apiValue) { + if (apiValue == null) { + return null; + } + return TableStats.CommitDelta.builder() + .numFilesAdded(apiValue.getNumFilesAdded()) + .numFilesDeleted(apiValue.getNumFilesDeleted()) + .addedSizeBytes(apiValue.getAddedSizeBytes()) + .deletedSizeBytes(apiValue.getDeletedSizeBytes()) + .build(); + } + + private com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta toApiDelta( + TableStats.CommitDelta modelValue) { + if (modelValue == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta.builder() + .numFilesAdded(modelValue.getNumFilesAdded()) + .numFilesDeleted(modelValue.getNumFilesDeleted()) + .addedSizeBytes(modelValue.getAddedSizeBytes()) + .deletedSizeBytes(modelValue.getDeletedSizeBytes()) + .build(); + } +} From 1d469a72fdb68133c95cd8def12027f428ab2acd Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:17:49 -0700 Subject: [PATCH 23/55] refactor(optimizer): remove db-layer types from optimizer-0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DB layer (entities + api↔db mapper) belongs to optimizer-1, not optimizer-0. optimizer-0 owns only the wire-API surface and the internal model. Delete from this PR: - entity/ package (TableOperationsRow, TableOperationsHistoryRow, TableStatsRow, TableStatsHistoryRow, package-info). - api/mapper/OptimizerMapper — was the api↔entity bridge. With the entity files moving out of this PR and the new model/mapper/ taking over conversion duties, this mapper is no longer needed here. optimizer-1 will re-introduce these as db/ (renamed) with db-side per-layer types and a model/mapper/ModelDbMapper. --- .../optimizer/api/mapper/OptimizerMapper.java | 92 ------------------- .../entity/TableOperationsHistoryRow.java | 79 ---------------- .../optimizer/entity/TableOperationsRow.java | 87 ------------------ .../entity/TableStatsHistoryRow.java | 61 ------------ .../optimizer/entity/TableStatsRow.java | 57 ------------ .../optimizer/entity/package-info.java | 2 - 6 files changed, 378 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java deleted file mode 100644 index 36d4b5f4b..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java +++ /dev/null @@ -1,92 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.mapper; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; -import org.mapstruct.Mapper; - -/** - * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. - * - *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. - * - *

Type-conversion helpers bridge the entity's raw String/JSON shape (the entities keep enum and - * structured-result columns as Strings to stay decoupled from wire-API identity) and the wire DTO's - * typed enums and nested objects. - */ -@Mapper(componentModel = "spring") -public interface OptimizerMapper { - - ObjectMapper JSON = new ObjectMapper(); - - /** Map a {@link TableOperationsRow} to its DTO. */ - TableOperationsDto toDto(TableOperationsRow row); - - /** Map a {@link TableOperationsHistoryRow} to its DTO. */ - TableOperationsHistoryDto toDto(TableOperationsHistoryRow row); - - /** Map a {@link TableStatsRow} to its DTO. */ - TableStatsDto toDto(TableStatsRow row); - - /** Map a {@link TableStatsHistoryRow} to its DTO. */ - TableStatsHistoryDto toDto(TableStatsHistoryRow row); - - // --- entity String ↔ wire enum/object helpers --- - - default OperationType toOperationType(String value) { - return value == null ? null : OperationType.valueOf(value); - } - - default String fromOperationType(OperationType value) { - return value == null ? null : value.name(); - } - - default OperationStatus toOperationStatus(String value) { - return value == null ? null : OperationStatus.valueOf(value); - } - - default String fromOperationStatus(OperationStatus value) { - return value == null ? null : value.name(); - } - - default HistoryStatus toHistoryStatus(String value) { - return value == null ? null : HistoryStatus.valueOf(value); - } - - default String fromHistoryStatus(HistoryStatus value) { - return value == null ? null : value.name(); - } - - default JobResult toJobResult(String json) { - if (json == null) { - return null; - } - try { - return JSON.readValue(json, JobResult.class); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to parse JobResult JSON from DB", e); - } - } - - default String fromJobResult(JobResult value) { - if (value == null) { - return null; - } - try { - return JSON.writeValueAsString(value); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize JobResult to JSON", e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java deleted file mode 100644 index 8303a4579..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ /dev/null @@ -1,79 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; - -/** - * Append-only record of a completed maintenance operation. - * - *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the - * originating {@code table_operations.id}, tying each history entry back to the operation cycle - * that produced it. Multiple runs of the same operation on the same table produce multiple rows - * (each cycle gets a new UUID from the Analyzer). - * - *

{@code operationType}, {@code status}, and {@code result} are stored as plain {@code String} - * (the last as a JSON blob) so the entity layer stays decoupled from the wire-API enum and - * structured-result types. The wire layer is responsible for converting at the boundary via {@link - * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. - */ -@Entity -@Table( - name = "table_operations_history", - indexes = { - @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), - @Index(name = "idx_op_type_hist", columnList = "operation_type"), - @Index(name = "idx_completed_at", columnList = "completed_at"), - @Index(name = "idx_status_hist", columnList = "status"), - @Index(name = "idx_job_id", columnList = "job_id"), - @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableOperationsHistoryRow { - - /** Same UUID as the originating {@code table_operations.id}. Set by the caller; not generated. */ - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Column(name = "operation_type", nullable = false, length = 50) - private String operationType; - - /** When the operation completed, as recorded by the complete endpoint. */ - @Column(name = "completed_at", nullable = false) - private Instant completedAt; - - /** {@code SUCCESS} or {@code FAILED}. */ - @Column(name = "status", nullable = false, length = 20) - private String status; - - /** Spark job ID; indexed for job → result lookups. */ - @Column(name = "job_id", length = 255) - private String jobId; - - /** Job result JSON blob: error details on failure, both fields null on success. */ - @Column(name = "result", columnDefinition = "TEXT") - private String result; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java deleted file mode 100644 index 5d90f3d12..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ /dev/null @@ -1,87 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; - -/** - * JPA entity representing an Analyzer recommendation for a table maintenance operation. - * - *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row - * when it first recommends an operation for a table, or when re-recommending after a prior terminal - * state (SUCCESS/FAILED). Old terminal rows accumulate — they serve as implicit history. {@code - * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). - * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, - * operation_type)} at a time. - * - *

{@code operationType} and {@code status} are stored as {@code String} rather than JPA-bound - * enums so the entity layer stays decoupled from the wire-API enum identity. The wire layer is - * responsible for converting at the boundary via {@link - * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. - */ -@Entity -@Table( - name = "table_operations", - indexes = { - @Index(name = "idx_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_op_type", columnList = "operation_type"), - @Index(name = "idx_status", columnList = "status"), - @Index(name = "idx_created_at", columnList = "created_at"), - @Index(name = "idx_scheduled_at", columnList = "scheduled_at") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableOperationsRow { - - /** Client-generated UUID identifying this specific operation recommendation. */ - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Column(name = "operation_type", nullable = false, length = 50) - private String operationType; - - @Column(name = "status", nullable = false, length = 20) - private String status; - - /** When the Analyzer first created this row. Set by the service on insert; never updated. */ - @Column(name = "created_at", nullable = false) - private Instant createdAt; - - /** Set when the operation is claimed; {@code null} while {@code PENDING}. */ - @Column(name = "scheduled_at") - private Instant scheduledAt; - - /** Job ID returned by the Jobs Service after successful submission. */ - @Column(name = "job_id", length = 255) - private String jobId; - - /** - * Manual optimistic lock for the Scheduler claim. Incremented by the raw {@code claimOperation} - * UPDATE query; must NOT use JPA {@code @Version} since the claim bypasses JPA entity management. - */ - @Column(name = "version") - private Long version; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java deleted file mode 100644 index 6ead5e42c..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ /dev/null @@ -1,61 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * Append-only record of per-commit stats reported by the Tables Service. - * - *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can - * query this table to reconstruct change rates over arbitrary time windows. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table( - name = "table_stats_history", - indexes = { - @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableStatsHistoryRow { - - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Column(name = "recorded_at", nullable = false) - private Instant recordedAt; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java deleted file mode 100644 index 2a1414567..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import java.util.Map; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * JPA entity representing a per-table stats snapshot in the optimizer DB. - * - *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA - * to enumerate tables and check scheduling eligibility. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table(name = "table_stats") -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableStatsRow { - - @Id - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Type(type = "json") - @Column(name = "table_properties", columnDefinition = "TEXT") - private Map tableProperties; - - /** Set on every upsert. Used for stats pipeline staleness monitoring. */ - @Column(name = "updated_at", nullable = false) - private Instant updatedAt; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java deleted file mode 100644 index 7c0ca1f67..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java +++ /dev/null @@ -1,2 +0,0 @@ -/** JPA entities for the optimizer service. */ -package com.linkedin.openhouse.optimizer.entity; From eee8ecae794fecdc7676e02c0fb286cd3c98e9fa Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:20:28 -0700 Subject: [PATCH 24/55] refactor(optimizer): remove DB schema + schema-init properties The DDL is part of the db/ layer's ownership (optimizer-1). Move the schema file and its schema-init properties out of optimizer-0 so this PR is purely api/ + model/. Delete: - src/main/resources/db/optimizer-schema.sql. - spring.sql.init.mode, spring.sql.init.schema-locations, and spring.jpa.defer-datasource-initialization from application.properties (they reference the deleted schema file). optimizer-1 re-introduces these alongside the db/ entities and repositories. --- .../src/main/resources/application.properties | 4 -- .../main/resources/db/optimizer-schema.sql | 56 ------------------- 2 files changed, 60 deletions(-) delete mode 100644 services/optimizer/src/main/resources/db/optimizer-schema.sql diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index c6c3f8437..00982d80e 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -2,10 +2,6 @@ spring.application.name=openhouse-optimizer-service server.port=8080 spring.jpa.hibernate.ddl-auto=none -spring.sql.init.mode=always -spring.jpa.defer-datasource-initialization=true -spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql - spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQL8Dialect spring.jpa.properties.hibernate.show_sql=false spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql deleted file mode 100644 index 322f3bf92..000000000 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ /dev/null @@ -1,56 +0,0 @@ --- Optimizer Service Schema --- Compatible with MySQL (production) and H2 in MySQL mode (tests). -CREATE TABLE IF NOT EXISTS table_operations ( - id VARCHAR(36) NOT NULL, - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - operation_type VARCHAR(50) NOT NULL, - status VARCHAR(20) NOT NULL, - created_at TIMESTAMP(6) NOT NULL, - scheduled_at TIMESTAMP(6), - job_id VARCHAR(255), - version BIGINT, - -- TODO: per-operation metric columns will be added as operations are onboarded. - PRIMARY KEY (id) -); - -CREATE TABLE IF NOT EXISTS table_stats ( - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - stats TEXT, - table_properties TEXT, - updated_at TIMESTAMP(6) NOT NULL, - PRIMARY KEY (table_uuid) -); - -CREATE TABLE IF NOT EXISTS table_stats_history ( - id VARCHAR(36) NOT NULL, - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - stats TEXT, - recorded_at TIMESTAMP(6) NOT NULL, - PRIMARY KEY (id), - INDEX idx_tsh_table_uuid (table_uuid), - INDEX idx_tsh_recorded_at (recorded_at) -); - -CREATE TABLE IF NOT EXISTS table_operations_history ( - id VARCHAR(36) NOT NULL, - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - operation_type VARCHAR(50) NOT NULL, - completed_at TIMESTAMP(6) NOT NULL, - status VARCHAR(20) NOT NULL, - job_id VARCHAR(255), - result TEXT, - PRIMARY KEY (id), - INDEX idx_toph_db_table (database_name, table_name), - -- Drives TableOperationHistoryRepository.findLatestPerTable: the correlated - -- MAX(completed_at) subquery becomes an index-only lookup per (operation_type, - -- table_uuid) instead of an O(N²) scan. - INDEX idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) -); From 328e5b91b4c1db0f5abf22e37a4dea787d351bef Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:23:10 -0700 Subject: [PATCH 25/55] refactor(optimizer): scrub MySQL / JPA / datasource references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DB-layer dependencies belong to optimizer-1. With entities, schema, and the api/mapper deleted from this PR, the JPA + MySQL stack is unused — remove the dependency declarations and configuration that referenced them. build.gradle: - Drop spring-boot-starter-data-jpa, mysql-connector-java, the vladmihalcea hibernate-types JSON serializer, and the h2 test runtime. application.properties: - Drop spring.jpa.* and spring.datasource.* lines. Delete services/optimizer/src/test/resources/application-test.properties (H2 test datasource config — re-introduced on optimizer-1 alongside the repositories and repo tests). --- services/optimizer/build.gradle | 4 ---- .../src/main/resources/application.properties | 11 ----------- .../src/test/resources/application-test.properties | 12 ------------ 3 files changed, 27 deletions(-) delete mode 100644 services/optimizer/src/test/resources/application-test.properties diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle index c05c7f9c3..2de8fd5c7 100644 --- a/services/optimizer/build.gradle +++ b/services/optimizer/build.gradle @@ -4,11 +4,7 @@ plugins { } dependencies { - implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' - implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' - implementation 'mysql:mysql-connector-java:8.+' - testImplementation 'com.h2database:h2:2.2.224' testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' } diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index 00982d80e..64c40d1f2 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -1,16 +1,5 @@ spring.application.name=openhouse-optimizer-service server.port=8080 -spring.jpa.hibernate.ddl-auto=none -spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQL8Dialect -spring.jpa.properties.hibernate.show_sql=false -spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl - -spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver -spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:mysql://localhost:3306/oh_db} -spring.datasource.username=${OPTIMIZER_DB_USERNAME:oh_user} -spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} -spring.datasource.hikari.maximum-pool-size=20 - management.endpoints.web.exposure.include=health,prometheus management.endpoint.health.enabled=true diff --git a/services/optimizer/src/test/resources/application-test.properties b/services/optimizer/src/test/resources/application-test.properties deleted file mode 100644 index 97b7841dc..000000000 --- a/services/optimizer/src/test/resources/application-test.properties +++ /dev/null @@ -1,12 +0,0 @@ -spring.datasource.url=jdbc:h2:mem:optimizer_test;MODE=MySQL;DATABASE_TO_LOWER=TRUE;DB_CLOSE_DELAY=-1 -spring.datasource.driver-class-name=org.h2.Driver -spring.datasource.username=sa -spring.datasource.password= - -spring.jpa.hibernate.ddl-auto=none -spring.sql.init.mode=always -spring.jpa.defer-datasource-initialization=true -spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect -spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl - -spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql From f7a5d208e106cb5c1c051bc450f14833be1bb093 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:25:41 -0700 Subject: [PATCH 26/55] refactor(optimizer): drop UpsertTableOperationsRequest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No external system creates table operations — operations are written by the in-process analyzer directly through the model layer. The request type has no wire consumer and no internal consumer, so it's dead code. Delete services/optimizer/.../api/model/UpsertTableOperationsRequest.java. --- .../model/UpsertTableOperationsRequest.java | 31 ------------------- 1 file changed, 31 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java deleted file mode 100644 index 21174c337..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * PUT request body for {@code /v1/table-operations/{id}}. - * - *

The Analyzer supplies the operation {@code id} (client-generated UUID) in the path and all - * table-identifying fields in this body. The service creates the row on first call. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class UpsertTableOperationsRequest { - - /** Stable Iceberg table UUID identifying the target table. */ - private String tableUuid; - - /** Denormalized database name for display. */ - private String databaseName; - - /** Denormalized table name for display. */ - private String tableName; - - /** The type of maintenance operation to create. */ - private OperationType operationType; -} From 2a532b577ed51507c72e836ea4d8778967f43062 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:47:42 -0700 Subject: [PATCH 27/55] refactor(optimizer): drop JobResult from the wire and internal model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JobResult is removed from the optimizer API. CompleteOperationRequest (user-edited) now carries only operationId + status — the failure detail abstraction has been retired. The internal model and DTOs no longer carry it either, and the type itself is deleted from both api/ and model/. CompleteOperationRequest: - operationId moved from path to body (user manual edit). - jobId field removed. - result field removed. api/model/TableOperationsHistoryDto: - Drop jobId and result fields. model/TableOperationsHistory: - Drop jobId and result fields. model/mapper/ApiModelMapper: - Remove toModelJobResult / toApiJobResult helpers + JobResult import. - toHistory()/toDto() no longer touch jobId or result. Delete: - services/optimizer/.../api/model/JobResult.java - services/optimizer/.../model/JobResult.java Downstream propagation: opt-2's service signature changes (completeOperation now takes only the request body); db/HistoryStatus remains needed on opt-1 but db/JobResult no longer is. See memory/tasks/mkuchenb-optimizer-3-fixes.md for the full propagation list. --- .../api/model/CompleteOperationRequest.java | 15 ++++++----- .../optimizer/api/model/JobResult.java | 25 ----------------- .../api/model/TableOperationsHistoryDto.java | 6 ----- .../openhouse/optimizer/model/JobResult.java | 25 ----------------- .../model/TableOperationsHistory.java | 6 ----- .../model/mapper/ApiModelMapper.java | 27 ------------------- 6 files changed, 9 insertions(+), 95 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 4f3f6535a..30648d497 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -8,8 +8,12 @@ /** * Request body for {@code POST /v1/table-operations/{id}/complete}. * - *

Reports the outcome of a completed operation. The backend looks up the operation row by {@code - * id} and writes a history entry with the operation's table metadata and the supplied result. + *

Reports the outcome of a single completed operation. The path's {@code id} is the per-cycle + * operation UUID — the service looks up that one row and writes a history entry for it. + * + *

A single Spark job typically processes N tables and yields N independent (status, result) + * pairs — one per operation. Callers issue one complete request per operation; the service does not + * bulk-complete by job. */ @Data @Builder @@ -17,9 +21,8 @@ @AllArgsConstructor public class CompleteOperationRequest { - /** Outcome of the operation. */ - private HistoryStatus status; + private String operationId; - /** Error details on failure; {@code null} on success. */ - private JobResult result; + /** Terminal outcome for this single operation. */ + private HistoryStatus status; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java deleted file mode 100644 index 74942243c..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Result payload for a completed Spark maintenance job. - * - *

Stored as JSON in the {@code result} column of {@code table_operations_history}. Both fields - * are {@code null} on success; populated on failure. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class JobResult { - - /** Human-readable error message; {@code null} if the job succeeded. */ - private String errorMessage; - - /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ - private String errorType; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index a7a9d9dc6..d9fa1f387 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -28,10 +28,4 @@ public class TableOperationsHistoryDto { /** {@code SUCCESS} or {@code FAILED}. */ private HistoryStatus status; - - /** Job ID from the Jobs Service. */ - private String jobId; - - /** Job result payload; both fields null on success. */ - private JobResult result; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java deleted file mode 100644 index 7e48dd0ef..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.linkedin.openhouse.optimizer.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Internal-model result payload for a completed Spark maintenance job. - * - *

Internal-layer copy of the structured result. Both fields are {@code null} on success; - * populated on failure. Intentionally separate from the wire-API and DB representations. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class JobResult { - - /** Human-readable error message; {@code null} if the job succeeded. */ - private String errorMessage; - - /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ - private String errorType; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index 64e0d57b3..fe5bee5f7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -38,10 +38,4 @@ public class TableOperationsHistory { /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ private HistoryStatus status; - - /** Spark job ID for the run that produced this record. */ - private String jobId; - - /** Job result payload; both inner fields {@code null} on success. */ - private JobResult result; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java index 2ae477e0d..35af7fb25 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -3,7 +3,6 @@ import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.JobResult; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.TableOperation; @@ -72,8 +71,6 @@ public TableOperationsHistory toHistory(TableOperationsHistoryDto dto) { .operationType(toModelOperationType(dto.getOperationType())) .completedAt(dto.getCompletedAt()) .status(toModelHistoryStatus(dto.getStatus())) - .jobId(dto.getJobId()) - .result(toModelJobResult(dto.getResult())) .build(); } @@ -89,8 +86,6 @@ public TableOperationsHistoryDto toDto(TableOperationsHistory history) { .operationType(toApiOperationType(history.getOperationType())) .completedAt(history.getCompletedAt()) .status(toApiHistoryStatus(history.getStatus())) - .jobId(history.getJobId()) - .result(toApiJobResult(history.getResult())) .build(); } @@ -154,28 +149,6 @@ public com.linkedin.openhouse.optimizer.api.model.HistoryStatus toApiHistoryStat : com.linkedin.openhouse.optimizer.api.model.HistoryStatus.valueOf(modelValue.name()); } - // --- JobResult --- - - public JobResult toModelJobResult(com.linkedin.openhouse.optimizer.api.model.JobResult apiValue) { - if (apiValue == null) { - return null; - } - return JobResult.builder() - .errorMessage(apiValue.getErrorMessage()) - .errorType(apiValue.getErrorType()) - .build(); - } - - public com.linkedin.openhouse.optimizer.api.model.JobResult toApiJobResult(JobResult modelValue) { - if (modelValue == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.JobResult.builder() - .errorMessage(modelValue.getErrorMessage()) - .errorType(modelValue.getErrorType()) - .build(); - } - // --- TableStats inner classes --- private TableStats.SnapshotMetrics toModelSnapshot( From 2e3a2316295d67105802f4a4c73032396048be9d Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:52:15 -0700 Subject: [PATCH 28/55] feat(optimizer): add debug echo fields to CompleteOperationRequest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tableUuid, databaseName, tableName, and operationType to the complete request body. They're debug-only — the server keys lookup off operationId — but preserving them on logs and traces helps an operator diagnose a failing complete call without joining back to the operation row. --- .../api/model/CompleteOperationRequest.java | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 30648d497..0add634b5 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -6,14 +6,20 @@ import lombok.NoArgsConstructor; /** - * Request body for {@code POST /v1/table-operations/{id}/complete}. + * Request body for {@code POST /v1/table-operations/complete}. * - *

Reports the outcome of a single completed operation. The path's {@code id} is the per-cycle - * operation UUID — the service looks up that one row and writes a history entry for it. + *

Reports the outcome of a single completed operation. The service looks up the operation row by + * {@link #operationId} and writes a history entry for it. * - *

A single Spark job typically processes N tables and yields N independent (status, result) - * pairs — one per operation. Callers issue one complete request per operation; the service does not + *

A single Spark job typically processes N tables and yields N independent (status) outcomes — + * one per operation. Callers issue one complete request per operation; the service does not * bulk-complete by job. + * + *

The remaining fields ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}, {@link + * #operationType}) are debug-only echo information. The server does not key off them; they are + * preserved on log lines and traces so an operator looking at a failing complete call can see which + * (db, table, operation) the caller believed it was completing without joining back to the + * operation row. */ @Data @Builder @@ -21,8 +27,21 @@ @AllArgsConstructor public class CompleteOperationRequest { + /** Operation row's UUID — the primary lookup key. */ private String operationId; /** Terminal outcome for this single operation. */ private HistoryStatus status; + + /** Debug echo: stable table identity the caller believed it was completing. */ + private String tableUuid; + + /** Debug echo: database name. */ + private String databaseName; + + /** Debug echo: table name. */ + private String tableName; + + /** Debug echo: operation type. */ + private OperationType operationType; } From db5eb2959a0fbbfba5d821ee36f00435248f9f5c Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:53:16 -0700 Subject: [PATCH 29/55] refactor(optimizer): move application.properties out of optimizer-0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every line in application.properties is run-time config (server.port, spring.application.name, actuator endpoints). optimizer-0 has no controllers and no endpoint to serve — the file is doing nothing here. The first PR that actually runs a web service is optimizer-2. Delete the file from this PR. optimizer-2 will re-introduce it alongside the REST controllers. The OptimizerServiceApplication @SpringBootApplication shell stays on this branch — optimizer-1's repository tests use @SpringBootTest and need an application class to discover. --- services/optimizer/src/main/resources/application.properties | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 services/optimizer/src/main/resources/application.properties diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties deleted file mode 100644 index 64c40d1f2..000000000 --- a/services/optimizer/src/main/resources/application.properties +++ /dev/null @@ -1,5 +0,0 @@ -spring.application.name=openhouse-optimizer-service -server.port=8080 - -management.endpoints.web.exposure.include=health,prometheus -management.endpoint.health.enabled=true From ac3abc06fec4b3cb1433649e16536a3e0008a4a2 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 14:04:51 -0700 Subject: [PATCH 30/55] feat(optimizer): introduce db/ layer with per-layer types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit optimizer-0 retired entity/, the schema, JPA/MySQL deps, and the api/mapper. This PR brings the DB layer back as db/ with its own self-contained types and a model↔db boundary mapper. db/ package: - TableOperationsRow, TableOperationsHistoryRow, TableStatsRow, TableStatsHistoryRow — JPA entities (same field set as the pre-deletion entity/ versions, with two exceptions: enum fields on the operations rows are now typed db/-side enums via @Enumerated(STRING), and TableOperationsHistoryRow loses the jobId/result columns since they were removed from the wire on optimizer-0). - OperationType, OperationStatus, HistoryStatus — db-layer enums. - TableStats (+ inner SnapshotMetrics, CommitDelta) — db-layer JSON payload, mirrors the model/ + api/ counterparts in shape but is its own class. model/mapper/ModelDbMapper: - Translates between model/ domain objects and db/ rows. - Lives in model/ per the boundary rule (model/ owns conversions to both edges; api/, model/, db/ data types are self-contained). Repositories: imports switched to db/; find() and findLatestPerTable take typed db enums instead of String. Repository tests: builders pass typed db enums; remove jobId/result fields no longer on TableOperationsHistoryRow. Schema (db/optimizer-schema.sql): restored. table_operations_history no longer has job_id / result columns. The idx_toph_optype_uuid_completed index for findLatestPerTable is preserved. build.gradle: restore spring-boot-starter-data-jpa, hibernate-types, mysql-connector-java, h2 dependencies. application-test.properties: restored (H2 test datasource). --- services/optimizer/build.gradle | 4 + .../openhouse/optimizer/db/HistoryStatus.java | 11 + .../optimizer/db/OperationStatus.java | 13 + .../openhouse/optimizer/db/OperationType.java | 12 + .../db/TableOperationsHistoryRow.java | 69 +++++ .../optimizer/db/TableOperationsRow.java | 87 +++++++ .../openhouse/optimizer/db/TableStats.java | 55 ++++ .../optimizer/db/TableStatsHistoryRow.java | 63 +++++ .../openhouse/optimizer/db/TableStatsRow.java | 59 +++++ .../optimizer/model/mapper/ModelDbMapper.java | 235 ++++++++++++++++++ .../TableOperationsHistoryRepository.java | 6 +- .../repository/TableOperationsRepository.java | 8 +- .../TableStatsHistoryRepository.java | 2 +- .../repository/TableStatsRepository.java | 2 +- .../main/resources/db/optimizer-schema.sql | 54 ++++ .../TableOperationsHistoryRepositoryTest.java | 47 ++-- .../TableOperationsRepositoryTest.java | 44 ++-- .../TableStatsHistoryRepositoryTest.java | 4 +- .../repository/TableStatsRepositoryTest.java | 4 +- .../resources/application-test.properties | 12 + 20 files changed, 734 insertions(+), 57 deletions(-) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java create mode 100644 services/optimizer/src/main/resources/db/optimizer-schema.sql create mode 100644 services/optimizer/src/test/resources/application-test.properties diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle index 2de8fd5c7..c05c7f9c3 100644 --- a/services/optimizer/build.gradle +++ b/services/optimizer/build.gradle @@ -4,7 +4,11 @@ plugins { } dependencies { + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' + implementation 'mysql:mysql-connector-java:8.+' + testImplementation 'com.h2database:h2:2.2.224' testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java new file mode 100644 index 000000000..94e573968 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java @@ -0,0 +1,11 @@ +package com.linkedin.openhouse.optimizer.db; + +/** + * DB-layer enum for the {@code status} column of {@code table_operations_history}. + * + *

Self-contained: no references to api/ or model/ types. + */ +public enum HistoryStatus { + SUCCESS, + FAILED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java new file mode 100644 index 000000000..4e9161693 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.optimizer.db; + +/** + * DB-layer enum for the {@code status} column of {@code table_operations}. + * + *

Self-contained: no references to api/ or model/ types. + */ +public enum OperationStatus { + PENDING, + SCHEDULING, + SCHEDULED, + CANCELED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java new file mode 100644 index 000000000..3a896e415 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java @@ -0,0 +1,12 @@ +package com.linkedin.openhouse.optimizer.db; + +/** + * DB-layer enum for the operation types persisted in {@code table_operations.operation_type} and + * {@code table_operations_history.operation_type}. + * + *

Self-contained: no references to api/ or model/ types. JPA binds this via + * {@code @Enumerated(EnumType.STRING)}. + */ +public enum OperationType { + ORPHAN_FILES_DELETION +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java new file mode 100644 index 000000000..2e1230181 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java @@ -0,0 +1,69 @@ +package com.linkedin.openhouse.optimizer.db; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Append-only record of a completed maintenance operation. + * + *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the + * originating live-operations row, tying each history entry back to the operation cycle that + * produced it. Multiple runs of the same operation on the same table produce multiple rows. + * + *

Self-contained DB-layer type: enums are {@link OperationType} / {@link HistoryStatus} from the + * same package, JPA-bound as strings. + */ +@Entity +@Table( + name = "table_operations_history", + indexes = { + @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), + @Index(name = "idx_op_type_hist", columnList = "operation_type"), + @Index(name = "idx_completed_at", columnList = "completed_at"), + @Index(name = "idx_status_hist", columnList = "status"), + @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableOperationsHistoryRow { + + /** Same UUID as the originating live-operations row. Set by the caller; not generated. */ + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + + @Enumerated(EnumType.STRING) + @Column(name = "operation_type", nullable = false, length = 50) + private OperationType operationType; + + @Column(name = "completed_at", nullable = false) + private Instant completedAt; + + @Enumerated(EnumType.STRING) + @Column(name = "status", nullable = false, length = 20) + private HistoryStatus status; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java new file mode 100644 index 000000000..9652214d3 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java @@ -0,0 +1,87 @@ +package com.linkedin.openhouse.optimizer.db; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * JPA entity representing an Analyzer recommendation for a table maintenance operation. + * + *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row + * when it first recommends an operation for a table, or when re-recommending after a prior terminal + * state. {@code table_uuid} is the stable identity for the table (survives renames; rotates on + * drop+recreate). The application enforces one active (PENDING / SCHEDULING / SCHEDULED) row per + * {@code (table_uuid, operation_type)} at a time. + * + *

Self-contained DB-layer type: enums are {@link OperationType} / {@link OperationStatus} from + * the same package, JPA-bound as strings. + */ +@Entity +@Table( + name = "table_operations", + indexes = { + @Index(name = "idx_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_op_type", columnList = "operation_type"), + @Index(name = "idx_status", columnList = "status"), + @Index(name = "idx_created_at", columnList = "created_at"), + @Index(name = "idx_scheduled_at", columnList = "scheduled_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableOperationsRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + + @Enumerated(EnumType.STRING) + @Column(name = "operation_type", nullable = false, length = 50) + private OperationType operationType; + + @Enumerated(EnumType.STRING) + @Column(name = "status", nullable = false, length = 20) + private OperationStatus status; + + @Column(name = "created_at", nullable = false) + private Instant createdAt; + + @Column(name = "scheduled_at") + private Instant scheduledAt; + + /** Spark job ID written by the scheduler at claim time. Internal-only; never exposed on wire. */ + @Column(name = "job_id", length = 255) + private String jobId; + + /** + * Monotonically-increasing version for application-level optimistic concurrency control. The + * scheduler's batch CAS transitions match this in the WHERE clause and bump it by one on UPDATE, + * ensuring two scheduler instances can't both move the same row out of PENDING. Not managed by + * JPA optimistic locking — kept as a plain column so the WHERE-clause-based CAS pattern works + * portably across MySQL and H2. + */ + @Column(name = "version") + private Long version; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java new file mode 100644 index 000000000..ceebb5ad5 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.optimizer.db; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * DB-layer stats payload — stored as a JSON blob in the {@code stats} column of {@code table_stats} + * and {@code table_stats_history}. + * + *

Self-contained: no references to api/ or model/ types. + */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters; accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long addedSizeBytes; + private Long deletedSizeBytes; + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java new file mode 100644 index 000000000..2b7628de1 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java @@ -0,0 +1,63 @@ +package com.linkedin.openhouse.optimizer.db; + +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * Append-only record of per-commit stats reported by the Tables Service. + * + *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can + * query this table to reconstruct change rates over arbitrary time windows. + * + *

Self-contained DB-layer type: the JSON payload type is {@link TableStats} from the same + * package. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table( + name = "table_stats_history", + indexes = { + @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsHistoryRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Column(name = "recorded_at", nullable = false) + private Instant recordedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java new file mode 100644 index 000000000..950cf5327 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java @@ -0,0 +1,59 @@ +package com.linkedin.openhouse.optimizer.db; + +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import java.util.Map; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * JPA entity representing a per-table stats snapshot in the optimizer DB. + * + *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA + * to enumerate tables and check scheduling eligibility. + * + *

Self-contained DB-layer type: the JSON payload type is {@link TableStats} from the same + * package. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table(name = "table_stats") +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsRow { + + @Id + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Type(type = "json") + @Column(name = "table_properties", columnDefinition = "TEXT") + private Map tableProperties; + + /** Set on every upsert. Used for stats pipeline staleness monitoring. */ + @Column(name = "updated_at", nullable = false) + private Instant updatedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java new file mode 100644 index 000000000..f77773928 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java @@ -0,0 +1,235 @@ +package com.linkedin.openhouse.optimizer.model.mapper; + +import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStats; +import java.util.Collections; +import org.springframework.stereotype.Component; + +/** + * Converts between internal {@code model/} domain objects and database row entities. + * + *

The only place inside {@code model/} where {@code db/} types are referenced — this is the + * boundary at which the internal model meets the database layer. Pure data types under {@code + * model/} stay free of any DB-side imports. + * + *

Each layer carries its own per-layer enum + payload types. This mapper translates between + * model/-side and db/-side counterparts by name. + */ +@Component +public class ModelDbMapper { + + // --- TableOperationsRow <-> TableOperation --- + + public TableOperation toOperation(TableOperationsRow row) { + if (row == null) { + return null; + } + return TableOperation.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(toModelOperationType(row.getOperationType())) + .status(toModelOperationStatus(row.getStatus())) + .createdAt(row.getCreatedAt()) + .scheduledAt(row.getScheduledAt()) + .build(); + } + + public TableOperationsRow toRow(TableOperation op) { + if (op == null) { + return null; + } + return TableOperationsRow.builder() + .id(op.getId()) + .tableUuid(op.getTableUuid()) + .databaseName(op.getDatabaseName()) + .tableName(op.getTableName()) + .operationType(toDbOperationType(op.getOperationType())) + .status(toDbOperationStatus(op.getStatus())) + .createdAt(op.getCreatedAt()) + .scheduledAt(op.getScheduledAt()) + .version(0L) + .build(); + } + + // --- TableOperationsHistoryRow <-> TableOperationsHistory --- + + public TableOperationsHistory toHistory(TableOperationsHistoryRow row) { + if (row == null) { + return null; + } + return TableOperationsHistory.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(toModelOperationType(row.getOperationType())) + .completedAt(row.getCompletedAt()) + .status(toModelHistoryStatus(row.getStatus())) + .build(); + } + + public TableOperationsHistoryRow toRow(TableOperationsHistory history) { + if (history == null) { + return null; + } + return TableOperationsHistoryRow.builder() + .id(history.getId()) + .tableUuid(history.getTableUuid()) + .databaseName(history.getDatabaseName()) + .tableName(history.getTableName()) + .operationType(toDbOperationType(history.getOperationType())) + .completedAt(history.getCompletedAt()) + .status(toDbHistoryStatus(history.getStatus())) + .build(); + } + + // --- TableStatsRow -> Table --- + + public Table toTable(TableStatsRow row) { + if (row == null) { + return null; + } + return Table.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableId(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + .stats(toModelStats(row.getStats())) + .build(); + } + + // --- TableStats payload --- + + public TableStats toModelStats(com.linkedin.openhouse.optimizer.db.TableStats dbStats) { + if (dbStats == null) { + return null; + } + return TableStats.builder() + .snapshot(toModelSnapshot(dbStats.getSnapshot())) + .delta(toModelDelta(dbStats.getDelta())) + .build(); + } + + public com.linkedin.openhouse.optimizer.db.TableStats toDbStats(TableStats modelStats) { + if (modelStats == null) { + return null; + } + return com.linkedin.openhouse.optimizer.db.TableStats.builder() + .snapshot(toDbSnapshot(modelStats.getSnapshot())) + .delta(toDbDelta(modelStats.getDelta())) + .build(); + } + + public TableStatsHistoryRow toStatsHistoryRow( + String id, + String tableUuid, + String databaseName, + String tableName, + TableStats stats, + java.time.Instant recordedAt) { + return TableStatsHistoryRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .stats(toDbStats(stats)) + .recordedAt(recordedAt) + .build(); + } + + // --- enum helpers --- + + public OperationType toModelOperationType(com.linkedin.openhouse.optimizer.db.OperationType v) { + return v == null ? null : OperationType.valueOf(v.name()); + } + + public com.linkedin.openhouse.optimizer.db.OperationType toDbOperationType(OperationType v) { + return v == null ? null : com.linkedin.openhouse.optimizer.db.OperationType.valueOf(v.name()); + } + + public OperationStatus toModelOperationStatus( + com.linkedin.openhouse.optimizer.db.OperationStatus v) { + return v == null ? null : OperationStatus.valueOf(v.name()); + } + + public com.linkedin.openhouse.optimizer.db.OperationStatus toDbOperationStatus( + OperationStatus v) { + return v == null ? null : com.linkedin.openhouse.optimizer.db.OperationStatus.valueOf(v.name()); + } + + public HistoryStatus toModelHistoryStatus(com.linkedin.openhouse.optimizer.db.HistoryStatus v) { + return v == null ? null : HistoryStatus.valueOf(v.name()); + } + + public com.linkedin.openhouse.optimizer.db.HistoryStatus toDbHistoryStatus(HistoryStatus v) { + return v == null ? null : com.linkedin.openhouse.optimizer.db.HistoryStatus.valueOf(v.name()); + } + + // --- TableStats inner classes --- + + private TableStats.SnapshotMetrics toModelSnapshot( + com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics v) { + if (v == null) { + return null; + } + return TableStats.SnapshotMetrics.builder() + .clusterId(v.getClusterId()) + .tableVersion(v.getTableVersion()) + .tableLocation(v.getTableLocation()) + .tableSizeBytes(v.getTableSizeBytes()) + .numCurrentFiles(v.getNumCurrentFiles()) + .build(); + } + + private com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics toDbSnapshot( + TableStats.SnapshotMetrics v) { + if (v == null) { + return null; + } + return com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics.builder() + .clusterId(v.getClusterId()) + .tableVersion(v.getTableVersion()) + .tableLocation(v.getTableLocation()) + .tableSizeBytes(v.getTableSizeBytes()) + .numCurrentFiles(v.getNumCurrentFiles()) + .build(); + } + + private TableStats.CommitDelta toModelDelta( + com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta v) { + if (v == null) { + return null; + } + return TableStats.CommitDelta.builder() + .numFilesAdded(v.getNumFilesAdded()) + .numFilesDeleted(v.getNumFilesDeleted()) + .addedSizeBytes(v.getAddedSizeBytes()) + .deletedSizeBytes(v.getDeletedSizeBytes()) + .build(); + } + + private com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta toDbDelta( + TableStats.CommitDelta v) { + if (v == null) { + return null; + } + return com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta.builder() + .numFilesAdded(v.getNumFilesAdded()) + .numFilesDeleted(v.getNumFilesDeleted()) + .addedSizeBytes(v.getAddedSizeBytes()) + .deletedSizeBytes(v.getDeletedSizeBytes()) + .build(); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index ba2ce35a8..5faf349e3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -1,6 +1,7 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import java.util.List; import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; @@ -36,5 +37,6 @@ List findByTableUuidOrderByCompletedAtDesc( + "AND r.completedAt = (" + " SELECT MAX(r2.completedAt) FROM TableOperationsHistoryRow r2 " + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") - List findLatestPerTable(@Param("operationType") String operationType); + List findLatestPerTable( + @Param("operationType") OperationType operationType); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index c7a08cabc..e9bc1c8b3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -1,6 +1,8 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import com.linkedin.openhouse.optimizer.db.OperationStatus; +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; @@ -21,8 +23,8 @@ public interface TableOperationsRepository extends JpaRepository find( - @Param("operationType") String operationType, - @Param("status") String status, + @Param("operationType") OperationType operationType, + @Param("status") OperationStatus status, @Param("tableUuid") String tableUuid, @Param("databaseName") String databaseName, @Param("tableName") String tableName); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java index aaa1b0050..6f9595275 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import java.time.Instant; import java.util.List; import org.springframework.data.domain.Pageable; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 4215237bc..dbf1de0ae 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql new file mode 100644 index 000000000..92e79976b --- /dev/null +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -0,0 +1,54 @@ +-- Optimizer Service Schema +-- Compatible with MySQL (production) and H2 in MySQL mode (tests). +CREATE TABLE IF NOT EXISTS table_operations ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + status VARCHAR(20) NOT NULL, + created_at TIMESTAMP(6) NOT NULL, + scheduled_at TIMESTAMP(6), + job_id VARCHAR(255), + version BIGINT, + -- TODO: per-operation metric columns will be added as operations are onboarded. + PRIMARY KEY (id) +); + +CREATE TABLE IF NOT EXISTS table_stats ( + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, + stats TEXT, + table_properties TEXT, + updated_at TIMESTAMP(6) NOT NULL, + PRIMARY KEY (table_uuid) +); + +CREATE TABLE IF NOT EXISTS table_stats_history ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, + stats TEXT, + recorded_at TIMESTAMP(6) NOT NULL, + PRIMARY KEY (id), + INDEX idx_tsh_table_uuid (table_uuid), + INDEX idx_tsh_recorded_at (recorded_at) +); + +CREATE TABLE IF NOT EXISTS table_operations_history ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + completed_at TIMESTAMP(6) NOT NULL, + status VARCHAR(20) NOT NULL, + PRIMARY KEY (id), + INDEX idx_toph_db_table (database_name, table_name), + -- Drives TableOperationHistoryRepository.findLatestPerTable: the correlated + -- MAX(completed_at) subquery becomes an index-only lookup per (operation_type, + -- table_uuid) instead of an O(N²) scan. + INDEX idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) +); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index 436d08066..706ecd877 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -2,9 +2,9 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.db.HistoryStatus; +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import java.time.Instant; import java.util.List; import java.util.UUID; @@ -27,38 +27,37 @@ void findByTableUuid_returnsRowsNewestFirst() { Instant t1 = Instant.parse("2024-01-01T10:00:00Z"); Instant t2 = Instant.parse("2024-01-02T10:00:00Z"); String tableUuid = UUID.randomUUID().toString(); + String idOlder = UUID.randomUUID().toString(); + String idNewer = UUID.randomUUID().toString(); repository.save( TableOperationsHistoryRow.builder() - .id(UUID.randomUUID().toString()) + .id(idOlder) .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(t1) - .status(HistoryStatus.SUCCESS.name()) - .jobId("job-001") + .status(HistoryStatus.SUCCESS) .build()); repository.save( TableOperationsHistoryRow.builder() - .id(UUID.randomUUID().toString()) + .id(idNewer) .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(t2) - .status(HistoryStatus.FAILED.name()) - .jobId("job-002") - .result("{\"errorMessage\":\"out of memory\",\"errorType\":\"OOM\"}") + .status(HistoryStatus.FAILED) .build()); List rows = repository.findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, 10)); assertThat(rows).hasSize(2); - assertThat(rows.get(0).getJobId()).isEqualTo("job-002"); - assertThat(rows.get(1).getJobId()).isEqualTo("job-001"); + assertThat(rows.get(0).getId()).isEqualTo(idNewer); + assertThat(rows.get(1).getId()).isEqualTo(idOlder); } @Test @@ -72,9 +71,9 @@ void findByTableUuid_respectsLimit() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl3") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(now.plusSeconds(i)) - .status(HistoryStatus.SUCCESS.name()) + .status(HistoryStatus.SUCCESS) .build()); } @@ -96,9 +95,9 @@ void findLatestPerTable_returnsOneRowPerTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(t1) - .status(HistoryStatus.SUCCESS.name()) + .status(HistoryStatus.SUCCESS) .build()); repository.save( TableOperationsHistoryRow.builder() @@ -106,9 +105,9 @@ void findLatestPerTable_returnsOneRowPerTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(t2) - .status(HistoryStatus.FAILED.name()) + .status(HistoryStatus.FAILED) .build()); repository.save( TableOperationsHistoryRow.builder() @@ -116,18 +115,18 @@ void findLatestPerTable_returnsOneRowPerTableUuid() { .tableUuid(otherUuid) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(t1) - .status(HistoryStatus.SUCCESS.name()) + .status(HistoryStatus.SUCCESS) .build()); List latest = - repository.findLatestPerTable(OperationType.ORPHAN_FILES_DELETION.name()); + repository.findLatestPerTable(OperationType.ORPHAN_FILES_DELETION); assertThat(latest).hasSize(2); TableOperationsHistoryRow forTarget = latest.stream().filter(r -> r.getTableUuid().equals(tableUuid)).findFirst().orElseThrow(); assertThat(forTarget.getCompletedAt()).isEqualTo(t2); - assertThat(forTarget.getStatus()).isEqualTo(HistoryStatus.FAILED.name()); + assertThat(forTarget.getStatus()).isEqualTo(HistoryStatus.FAILED); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index 2ca8dc61e..44a03ba9e 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -2,9 +2,9 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import com.linkedin.openhouse.optimizer.db.OperationStatus; +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import java.time.Instant; import java.util.List; import java.util.Optional; @@ -32,8 +32,8 @@ void saveAndFindById() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.PENDING.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) .createdAt(Instant.now()) .build(); @@ -41,7 +41,7 @@ void saveAndFindById() { Optional found = repository.findById(id); assertThat(found).isPresent(); - assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING.name()); + assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING); } @Test @@ -52,8 +52,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.PENDING.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) .createdAt(Instant.now()) .build()); repository.save( @@ -62,8 +62,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.SCHEDULED.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) .createdAt(Instant.now()) .build()); @@ -79,8 +79,8 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.PENDING.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) .createdAt(Instant.now()) .build()); repository.save( @@ -89,20 +89,20 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.SCHEDULED.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) .createdAt(Instant.now()) .build()); List pending = - repository.find(null, OperationStatus.PENDING.name(), null, null, null); + repository.find(null, OperationStatus.PENDING, null, null, null); assertThat(pending).hasSize(1); - assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING.name()); + assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); List scheduled = - repository.find(null, OperationStatus.SCHEDULED.name(), null, null, null); + repository.find(null, OperationStatus.SCHEDULED, null, null, null); assertThat(scheduled).hasSize(1); - assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED.name()); + assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); } @Test @@ -113,8 +113,8 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.PENDING.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) .createdAt(Instant.now()) .build()); repository.save( @@ -123,8 +123,8 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db2") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.PENDING.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) .createdAt(Instant.now()) .build()); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index 475196630..18241ce8d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.db.TableStats; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.List; diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index 240d512ef..e70704f51 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.db.TableStats; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.time.Instant; import java.util.Map; import java.util.Optional; diff --git a/services/optimizer/src/test/resources/application-test.properties b/services/optimizer/src/test/resources/application-test.properties new file mode 100644 index 000000000..97b7841dc --- /dev/null +++ b/services/optimizer/src/test/resources/application-test.properties @@ -0,0 +1,12 @@ +spring.datasource.url=jdbc:h2:mem:optimizer_test;MODE=MySQL;DATABASE_TO_LOWER=TRUE;DB_CLOSE_DELAY=-1 +spring.datasource.driver-class-name=org.h2.Driver +spring.datasource.username=sa +spring.datasource.password= + +spring.jpa.hibernate.ddl-auto=none +spring.sql.init.mode=always +spring.jpa.defer-datasource-initialization=true +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect +spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl + +spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql From e79eec7b01dd0890df975c3e3ac311f2ef2cc96c Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 14:25:33 -0700 Subject: [PATCH 31/55] refactor(optimizer): split TableStats envelope into snapshot + delta columns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DB layer no longer mirrors the wire-side TableStats JSON envelope. Instead the two structurally-separate concepts inside it — point-in-time snapshot metrics and per-commit delta counters — are persisted as two independent JSON columns. Per-layer decoupling: the api/ envelope can evolve without forcing the DB column shape to change in lockstep. Tables and class names are unchanged: table_stats / table_stats_history on the SQL side; TableStatsRow / TableStatsHistoryRow on the Java side. Changes: - Delete db/TableStats (the envelope wrapper is no longer needed). - Add db/SnapshotMetrics (plain POJO; serialized into the `snapshot` JSON column). - Add db/CommitDeltaMetrics (plain POJO; serialized into the `delta` JSON column). - TableStatsRow: replace `stats: TableStats` with `snapshot: SnapshotMetrics` and `delta: CommitDeltaMetrics`. - TableStatsHistoryRow: same split. - Schema: replace `stats TEXT` with `snapshot TEXT` and `delta TEXT` on both tables. - ModelDbMapper: split/join at the boundary. New helpers `toDbSnapshot`, `toDbDelta`, `joinStats` translate between the single model-layer TableStats and the two DB columns. `toStatsHistoryRow` projects a TableStats into the two-column row. - Repository tests: build rows with the new two-field shape. --- .../optimizer/db/CommitDeltaMetrics.java | 21 +++++++ .../optimizer/db/SnapshotMetrics.java | 24 ++++++++ .../openhouse/optimizer/db/TableStats.java | 55 ------------------ .../optimizer/db/TableStatsHistoryRow.java | 18 +++--- .../openhouse/optimizer/db/TableStatsRow.java | 13 +++-- .../optimizer/model/mapper/ModelDbMapper.java | 57 ++++++++++--------- .../main/resources/db/optimizer-schema.sql | 6 +- .../TableStatsHistoryRepositoryTest.java | 26 ++++----- .../repository/TableStatsRepositoryTest.java | 51 ++++++----------- 9 files changed, 125 insertions(+), 146 deletions(-) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java new file mode 100644 index 000000000..8094d28b8 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java @@ -0,0 +1,21 @@ +package com.linkedin.openhouse.optimizer.db; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Per-commit incremental counters. Serialized as JSON into the {@code delta} column. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class CommitDeltaMetrics { + + private Long numFilesAdded; + private Long numFilesDeleted; + private Long addedSizeBytes; + private Long deletedSizeBytes; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java new file mode 100644 index 000000000..22d222172 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java @@ -0,0 +1,24 @@ +package com.linkedin.openhouse.optimizer.db; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Point-in-time snapshot fields. Serialized as JSON into the {@code snapshot} column. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class SnapshotMetrics { + + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java deleted file mode 100644 index ceebb5ad5..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java +++ /dev/null @@ -1,55 +0,0 @@ -package com.linkedin.openhouse.optimizer.db; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * DB-layer stats payload — stored as a JSON blob in the {@code stats} column of {@code table_stats} - * and {@code table_stats_history}. - * - *

Self-contained: no references to api/ or model/ types. - */ -@Data -@Builder(toBuilder = true) -@NoArgsConstructor -@AllArgsConstructor -@JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { - - /** Snapshot fields — overwritten on every upsert. */ - private SnapshotMetrics snapshot; - - /** Delta fields — accumulated across commit events. */ - private CommitDelta delta; - - /** Point-in-time metadata read from Iceberg at scan time. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class SnapshotMetrics { - private String clusterId; - private String tableVersion; - private String tableLocation; - private Long tableSizeBytes; - /** Total number of data files as of the latest snapshot. */ - private Long numCurrentFiles; - } - - /** Per-commit incremental counters; accumulated across all recorded commit events. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class CommitDelta { - private Long numFilesAdded; - private Long numFilesDeleted; - private Long addedSizeBytes; - private Long deletedSizeBytes; - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java index 2b7628de1..71c17b582 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java @@ -19,12 +19,12 @@ /** * Append-only record of per-commit stats reported by the Tables Service. * - *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can - * query this table to reconstruct change rates over arbitrary time windows. + *

Each Iceberg commit produces one row. Consumers can query this table to reconstruct change + * rates over arbitrary time windows. * - *

Self-contained DB-layer type: the JSON payload type is {@link TableStats} from the same - * package. + *

Self-contained DB-layer type. The stats payload is split across two JSON columns — {@link + * SnapshotMetrics} (point-in-time fields at commit time) and {@link CommitDeltaMetrics} (per-commit + * counters). */ @TypeDef(name = "json", typeClass = JsonStringType.class) @Entity @@ -55,8 +55,12 @@ public class TableStatsHistoryRow { private String tableName; @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; + @Column(name = "snapshot", columnDefinition = "TEXT") + private SnapshotMetrics snapshot; + + @Type(type = "json") + @Column(name = "delta", columnDefinition = "TEXT") + private CommitDeltaMetrics delta; @Column(name = "recorded_at", nullable = false) private Instant recordedAt; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java index 950cf5327..2566763ce 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java @@ -22,8 +22,9 @@ *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA * to enumerate tables and check scheduling eligibility. * - *

Self-contained DB-layer type: the JSON payload type is {@link TableStats} from the same - * package. + *

Self-contained DB-layer type. The stats payload is split across two JSON columns — {@link + * SnapshotMetrics} (point-in-time fields, overwritten each commit) and {@link CommitDeltaMetrics} + * (per-commit counters). */ @TypeDef(name = "json", typeClass = JsonStringType.class) @Entity @@ -46,8 +47,12 @@ public class TableStatsRow { private String tableName; @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; + @Column(name = "snapshot", columnDefinition = "TEXT") + private SnapshotMetrics snapshot; + + @Type(type = "json") + @Column(name = "delta", columnDefinition = "TEXT") + private CommitDeltaMetrics delta; @Type(type = "json") @Column(name = "table_properties", columnDefinition = "TEXT") diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java index f77773928..0ae9167e1 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java @@ -1,5 +1,7 @@ package com.linkedin.openhouse.optimizer.model.mapper; +import com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics; +import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; @@ -21,8 +23,9 @@ * boundary at which the internal model meets the database layer. Pure data types under {@code * model/} stay free of any DB-side imports. * - *

Each layer carries its own per-layer enum + payload types. This mapper translates between - * model/-side and db/-side counterparts by name. + *

Each layer carries its own per-layer enum + payload types. The DB layer flattens the wire-side + * {@code TableStats} envelope into two separate columns ({@code snapshot} and {@code delta}); this + * mapper joins / splits them at the boundary. */ @Component public class ModelDbMapper { @@ -106,30 +109,31 @@ public Table toTable(TableStatsRow row) { .tableId(row.getTableName()) .tableProperties( row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) - .stats(toModelStats(row.getStats())) + .stats(joinStats(row.getSnapshot(), row.getDelta())) .build(); } - // --- TableStats payload --- + // --- TableStats payload <-> (snapshot, delta) --- - public TableStats toModelStats(com.linkedin.openhouse.optimizer.db.TableStats dbStats) { - if (dbStats == null) { + /** Join the two DB-side columns into a single internal-model {@link TableStats}. */ + public TableStats joinStats(SnapshotMetrics dbSnapshot, CommitDeltaMetrics dbDelta) { + if (dbSnapshot == null && dbDelta == null) { return null; } return TableStats.builder() - .snapshot(toModelSnapshot(dbStats.getSnapshot())) - .delta(toModelDelta(dbStats.getDelta())) + .snapshot(toModelSnapshot(dbSnapshot)) + .delta(toModelDelta(dbDelta)) .build(); } - public com.linkedin.openhouse.optimizer.db.TableStats toDbStats(TableStats modelStats) { - if (modelStats == null) { - return null; - } - return com.linkedin.openhouse.optimizer.db.TableStats.builder() - .snapshot(toDbSnapshot(modelStats.getSnapshot())) - .delta(toDbDelta(modelStats.getDelta())) - .build(); + /** Project the internal-model {@link TableStats#getSnapshot()} side. */ + public SnapshotMetrics toDbSnapshot(TableStats modelStats) { + return modelStats == null ? null : toDbSnapshot(modelStats.getSnapshot()); + } + + /** Project the internal-model {@link TableStats#getDelta()} side. */ + public CommitDeltaMetrics toDbDelta(TableStats modelStats) { + return modelStats == null ? null : toDbDelta(modelStats.getDelta()); } public TableStatsHistoryRow toStatsHistoryRow( @@ -144,7 +148,8 @@ public TableStatsHistoryRow toStatsHistoryRow( .tableUuid(tableUuid) .databaseName(databaseName) .tableName(tableName) - .stats(toDbStats(stats)) + .snapshot(toDbSnapshot(stats)) + .delta(toDbDelta(stats)) .recordedAt(recordedAt) .build(); } @@ -177,10 +182,9 @@ public com.linkedin.openhouse.optimizer.db.HistoryStatus toDbHistoryStatus(Histo return v == null ? null : com.linkedin.openhouse.optimizer.db.HistoryStatus.valueOf(v.name()); } - // --- TableStats inner classes --- + // --- inner-payload field copies --- - private TableStats.SnapshotMetrics toModelSnapshot( - com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics v) { + private TableStats.SnapshotMetrics toModelSnapshot(SnapshotMetrics v) { if (v == null) { return null; } @@ -193,12 +197,11 @@ private TableStats.SnapshotMetrics toModelSnapshot( .build(); } - private com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics toDbSnapshot( - TableStats.SnapshotMetrics v) { + private SnapshotMetrics toDbSnapshot(TableStats.SnapshotMetrics v) { if (v == null) { return null; } - return com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics.builder() + return SnapshotMetrics.builder() .clusterId(v.getClusterId()) .tableVersion(v.getTableVersion()) .tableLocation(v.getTableLocation()) @@ -207,8 +210,7 @@ private com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics toDbSnaps .build(); } - private TableStats.CommitDelta toModelDelta( - com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta v) { + private TableStats.CommitDelta toModelDelta(CommitDeltaMetrics v) { if (v == null) { return null; } @@ -220,12 +222,11 @@ private TableStats.CommitDelta toModelDelta( .build(); } - private com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta toDbDelta( - TableStats.CommitDelta v) { + private CommitDeltaMetrics toDbDelta(TableStats.CommitDelta v) { if (v == null) { return null; } - return com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta.builder() + return CommitDeltaMetrics.builder() .numFilesAdded(v.getNumFilesAdded()) .numFilesDeleted(v.getNumFilesDeleted()) .addedSizeBytes(v.getAddedSizeBytes()) diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 92e79976b..3f3d11629 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -19,7 +19,8 @@ CREATE TABLE IF NOT EXISTS table_stats ( table_uuid VARCHAR(36) NOT NULL, database_name VARCHAR(128) NOT NULL, table_name VARCHAR(128) NOT NULL, - stats TEXT, + snapshot TEXT, + delta TEXT, table_properties TEXT, updated_at TIMESTAMP(6) NOT NULL, PRIMARY KEY (table_uuid) @@ -30,7 +31,8 @@ CREATE TABLE IF NOT EXISTS table_stats_history ( table_uuid VARCHAR(36) NOT NULL, database_name VARCHAR(128) NOT NULL, table_name VARCHAR(128) NOT NULL, - stats TEXT, + snapshot TEXT, + delta TEXT, recorded_at TIMESTAMP(6) NOT NULL, PRIMARY KEY (id), INDEX idx_tsh_table_uuid (table_uuid), diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index 18241ce8d..dbd8cc686 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -2,7 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.db.TableStats; +import com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics; +import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import java.time.Instant; import java.time.temporal.ChronoUnit; @@ -35,8 +36,8 @@ void saveAndFind() { assertThat(rows).hasSize(3); // newest first - assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); - assertThat(rows.get(2).getStats().getDelta().getNumFilesAdded()).isEqualTo(10L); + assertThat(rows.get(0).getDelta().getNumFilesAdded()).isEqualTo(3L); + assertThat(rows.get(2).getDelta().getNumFilesAdded()).isEqualTo(10L); } @Test @@ -67,7 +68,7 @@ void find_withSince_filtersOlderRows() { // only the 2 rows within the last 90 minutes assertThat(rows).hasSize(2); - assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); + assertThat(rows.get(0).getDelta().getNumFilesAdded()).isEqualTo(3L); } @Test @@ -131,18 +132,11 @@ private static TableStatsHistoryRow buildRow( .tableUuid(tableUuid) .databaseName(databaseName) .tableName(tableName) - .stats( - TableStats.builder() - .snapshot( - TableStats.SnapshotMetrics.builder() - .clusterId("cl1") - .tableSizeBytes(1024L) - .build()) - .delta( - TableStats.CommitDelta.builder() - .numFilesAdded(numFilesAdded) - .numFilesDeleted(numFilesDeleted) - .build()) + .snapshot(SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build()) + .delta( + CommitDeltaMetrics.builder() + .numFilesAdded(numFilesAdded) + .numFilesDeleted(numFilesDeleted) .build()) .recordedAt(recordedAt) .build(); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index e70704f51..5f6a4ef4f 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -2,7 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.db.TableStats; +import com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics; +import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.time.Instant; import java.util.Map; @@ -24,19 +25,18 @@ class TableStatsRepositoryTest { @Test void saveAndFindById() { String tableUuid = UUID.randomUUID().toString(); - TableStats stats = - TableStats.builder() - .snapshot( - TableStats.SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build()) - .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(1L).build()) - .build(); + SnapshotMetrics snapshot = + SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build(); + CommitDeltaMetrics delta = + CommitDeltaMetrics.builder().numFilesAdded(3L).numFilesDeleted(1L).build(); repository.save( TableStatsRow.builder() .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .stats(stats) + .snapshot(snapshot) + .delta(delta) .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) .updatedAt(Instant.now()) .build()); @@ -44,7 +44,8 @@ void saveAndFindById() { Optional found = repository.findById(tableUuid); assertThat(found).isPresent(); assertThat(found.get().getDatabaseName()).isEqualTo("db1"); - assertThat(found.get().getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(found.get().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(found.get().getDelta().getNumFilesAdded()).isEqualTo(3L); assertThat(found.get().getTableProperties()) .containsEntry("maintenance.optimizer.ofd.enabled", "true"); } @@ -58,10 +59,7 @@ void upsert_overwritesPreviousStats() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(100L).build()) .updatedAt(Instant.now()) .build()); @@ -70,15 +68,12 @@ void upsert_overwritesPreviousStats() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(200L).build()) .updatedAt(Instant.now()) .build()); assertThat(repository.findAll()).hasSize(1); - assertThat(repository.findById(tableUuid).get().getStats().getSnapshot().getTableSizeBytes()) + assertThat(repository.findById(tableUuid).get().getSnapshot().getTableSizeBytes()) .isEqualTo(200L); } @@ -89,10 +84,7 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(100L).build()) .updatedAt(Instant.now()) .build()); repository.save( @@ -100,10 +92,7 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db2") .tableName("tbl2") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(200L).build()) .updatedAt(Instant.now()) .build()); @@ -117,10 +106,7 @@ void find_byDatabase() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(100L).build()) .updatedAt(Instant.now()) .build()); repository.save( @@ -128,10 +114,7 @@ void find_byDatabase() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db2") .tableName("tbl2") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(200L).build()) .updatedAt(Instant.now()) .build()); From f955ded61892180eefdc562ecc48a0b5cbffa391 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 14:34:25 -0700 Subject: [PATCH 32/55] fix(optimizer): drop CommitDeltaMetrics from TableStatsRow table_stats is the current-state row (one per table). Per-commit deltas are an append-only history concern and belong only to TableStatsHistoryRow. Storing a delta on the current-state row implied an aggregation that isn't actually performed. - TableStatsRow: remove the `delta` field. - table_stats schema: drop the `delta` column. - ModelDbMapper.toTable: project only snapshot to model.TableStats; history-only deltas remain in TableStatsHistoryRow. - TableStatsRepositoryTest: drop .delta(...) builder usage. --- .../linkedin/openhouse/optimizer/db/TableStatsRow.java | 9 ++------- .../openhouse/optimizer/model/mapper/ModelDbMapper.java | 3 ++- .../optimizer/src/main/resources/db/optimizer-schema.sql | 1 - .../optimizer/repository/TableStatsRepositoryTest.java | 5 ----- 4 files changed, 4 insertions(+), 14 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java index 2566763ce..8d869ff1e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java @@ -22,9 +22,8 @@ *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA * to enumerate tables and check scheduling eligibility. * - *

Self-contained DB-layer type. The stats payload is split across two JSON columns — {@link - * SnapshotMetrics} (point-in-time fields, overwritten each commit) and {@link CommitDeltaMetrics} - * (per-commit counters). + *

Self-contained DB-layer type. Holds only the point-in-time {@link SnapshotMetrics} — + * per-commit deltas live exclusively on {@link TableStatsHistoryRow} and are not aggregated here. */ @TypeDef(name = "json", typeClass = JsonStringType.class) @Entity @@ -50,10 +49,6 @@ public class TableStatsRow { @Column(name = "snapshot", columnDefinition = "TEXT") private SnapshotMetrics snapshot; - @Type(type = "json") - @Column(name = "delta", columnDefinition = "TEXT") - private CommitDeltaMetrics delta; - @Type(type = "json") @Column(name = "table_properties", columnDefinition = "TEXT") private Map tableProperties; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java index 0ae9167e1..755b38400 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java @@ -109,7 +109,8 @@ public Table toTable(TableStatsRow row) { .tableId(row.getTableName()) .tableProperties( row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) - .stats(joinStats(row.getSnapshot(), row.getDelta())) + // table_stats holds only the snapshot — deltas live on the history table. + .stats(joinStats(row.getSnapshot(), null)) .build(); } diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 3f3d11629..24b367549 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -20,7 +20,6 @@ CREATE TABLE IF NOT EXISTS table_stats ( database_name VARCHAR(128) NOT NULL, table_name VARCHAR(128) NOT NULL, snapshot TEXT, - delta TEXT, table_properties TEXT, updated_at TIMESTAMP(6) NOT NULL, PRIMARY KEY (table_uuid) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index 5f6a4ef4f..493eb88b6 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -2,7 +2,6 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics; import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.time.Instant; @@ -27,8 +26,6 @@ void saveAndFindById() { String tableUuid = UUID.randomUUID().toString(); SnapshotMetrics snapshot = SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build(); - CommitDeltaMetrics delta = - CommitDeltaMetrics.builder().numFilesAdded(3L).numFilesDeleted(1L).build(); repository.save( TableStatsRow.builder() @@ -36,7 +33,6 @@ void saveAndFindById() { .databaseName("db1") .tableName("tbl1") .snapshot(snapshot) - .delta(delta) .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) .updatedAt(Instant.now()) .build()); @@ -45,7 +41,6 @@ void saveAndFindById() { assertThat(found).isPresent(); assertThat(found.get().getDatabaseName()).isEqualTo("db1"); assertThat(found.get().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); - assertThat(found.get().getDelta().getNumFilesAdded()).isEqualTo(3L); assertThat(found.get().getTableProperties()) .containsEntry("maintenance.optimizer.ofd.enabled", "true"); } From 861b584c3cd41ff03db336c85cb0cde4bc063fe4 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 14:55:57 -0700 Subject: [PATCH 33/55] feat(optimizer): extend model layer for service-only types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare model/ for a service-layer rewrite that returns only model/ types (no api/ DTO leakage into the service interface). - model/Table: add `Instant updatedAt`. The service stamps it on every upsert; controllers read it when assembling the wire DTO. - model/TableStatsHistory: new internal-model counterpart to db.TableStatsHistoryRow. Fields mirror the row in internal types (id, tableUuid, databaseName, tableName, stats, recordedAt). - ApiModelMapper: add the missing api↔model conversions that controllers will own once the service drops api/ knowledge — Table ↔ TableStatsDto, TableStatsHistory ↔ TableStatsHistoryDto, and toTable(tableUuid, UpsertTableStatsRequest). --- .../openhouse/optimizer/model/Table.java | 4 ++ .../optimizer/model/TableStatsHistory.java | 33 +++++++++++ .../model/mapper/ApiModelMapper.java | 58 +++++++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index c8bede225..dc0a16a0c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import java.time.Instant; import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; @@ -28,4 +29,7 @@ public class Table { @Builder.Default private Map tableProperties = Collections.emptyMap(); private TableStats stats; + + /** When the current snapshot was last written. Stamped server-side on every upsert. */ + private Instant updatedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java new file mode 100644 index 000000000..5cdad1918 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java @@ -0,0 +1,33 @@ +package com.linkedin.openhouse.optimizer.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Internal-model view of an append-only per-commit stats history record. + * + *

One per Iceberg commit. {@link #stats} carries both the snapshot at commit time and the commit + * delta — consumers can reconstruct change rates over arbitrary time windows. + * + *

Pure internal-model type — no references to wire-API or DB types. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsHistory { + + private String id; + private String tableUuid; + private String databaseName; + private String tableName; + + /** Snapshot + delta for this commit event. */ + private TableStats stats; + + /** When this history row was recorded. */ + private Instant recordedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java index 35af7fb25..d77b3a253 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -2,12 +2,18 @@ import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.model.HistoryStatus; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStatsHistory; +import java.util.Collections; import org.springframework.stereotype.Component; /** @@ -89,6 +95,58 @@ public TableOperationsHistoryDto toDto(TableOperationsHistory history) { .build(); } + // --- Table <-> TableStatsDto / UpsertTableStatsRequest --- + + /** + * Build an internal-model {@link Table} from a wire upsert request. {@link Table#getUpdatedAt()} + * is intentionally left null — the service stamps it server-side at write time. + */ + public Table toTable(String tableUuid, UpsertTableStatsRequest request) { + if (request == null) { + return null; + } + return Table.builder() + .tableUuid(tableUuid) + .databaseName(request.getDatabaseName()) + .tableId(request.getTableName()) + .tableProperties( + request.getTableProperties() != null + ? request.getTableProperties() + : Collections.emptyMap()) + .stats(toModelStats(request.getStats())) + .build(); + } + + public TableStatsDto toDto(Table table) { + if (table == null) { + return null; + } + return TableStatsDto.builder() + .tableUuid(table.getTableUuid()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .stats(toApiStats(table.getStats())) + .tableProperties(table.getTableProperties()) + .updatedAt(table.getUpdatedAt()) + .build(); + } + + // --- TableStatsHistory <-> TableStatsHistoryDto --- + + public TableStatsHistoryDto toDto(TableStatsHistory history) { + if (history == null) { + return null; + } + return TableStatsHistoryDto.builder() + .id(history.getId()) + .tableUuid(history.getTableUuid()) + .databaseName(history.getDatabaseName()) + .tableName(history.getTableName()) + .stats(toApiStats(history.getStats())) + .recordedAt(history.getRecordedAt()) + .build(); + } + // --- TableStats payload --- public TableStats toModelStats(com.linkedin.openhouse.optimizer.api.model.TableStats apiStats) { From b60a3bfc1d51a6f60ac42baba3669bf90e71683f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:00:13 -0700 Subject: [PATCH 34/55] feat(optimizer): extend ModelDbMapper for service-only types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round out the model↔db boundary for the upcoming service-layer rewrite that returns only internal-model types: - toTable: stamp model.Table.updatedAt from the row's updated_at column so the model carries the freshness needed by callers without leaking the row. - toStatsHistory: new — db.TableStatsHistoryRow → model.TableStatsHistory. Joins the row's snapshot + delta columns into the model's single TableStats payload. --- .../optimizer/model/mapper/ModelDbMapper.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java index 755b38400..7a454c78c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java @@ -13,6 +13,7 @@ import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import java.util.Collections; import org.springframework.stereotype.Component; @@ -111,6 +112,23 @@ public Table toTable(TableStatsRow row) { row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) // table_stats holds only the snapshot — deltas live on the history table. .stats(joinStats(row.getSnapshot(), null)) + .updatedAt(row.getUpdatedAt()) + .build(); + } + + // --- TableStatsHistoryRow -> TableStatsHistory --- + + public TableStatsHistory toStatsHistory(TableStatsHistoryRow row) { + if (row == null) { + return null; + } + return TableStatsHistory.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .stats(joinStats(row.getSnapshot(), row.getDelta())) + .recordedAt(row.getRecordedAt()) .build(); } From 25d98aaacc7ffd4c506b1f43a896210725b83f9a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:10:16 -0700 Subject: [PATCH 35/55] feat(optimizer): restore batch CAS methods on TableOperationsRepository MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimizer-1 db/ rewrite accidentally dropped the batch CAS helpers used by the scheduler. Restore them with db/-typed enum parameters and JPQL queries that compare against fully-qualified db.OperationStatus constants. - markSchedulingBatch(ids, scheduledAt): PENDING → SCHEDULING. - markScheduledBatch(ids, jobId): SCHEDULING → SCHEDULED. - markPendingBatch(ids): SCHEDULING → PENDING (job-launch failure retry). - cancelDuplicatePendingBatch(operationType, keepIds): drop dupe PENDING rows for an operation type, keeping the supplied IDs. --- .../repository/TableOperationsRepository.java | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index e9bc1c8b3..962a108a2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -3,8 +3,10 @@ import com.linkedin.openhouse.optimizer.db.OperationStatus; import com.linkedin.openhouse.optimizer.db.OperationType; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import java.time.Instant; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Modifying; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; @@ -28,4 +30,59 @@ List find( @Param("tableUuid") String tableUuid, @Param("databaseName") String databaseName, @Param("tableName") String tableName); + + /** + * Batch CAS: PENDING → SCHEDULING for every {@code id} still in PENDING. Returns the number of + * rows transitioned. Rows already claimed by another instance are skipped silently; callers must + * re-query if they need the precise list. + */ + @Modifying + @Query( + "UPDATE TableOperationsRow r " + + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING," + + " r.scheduledAt = :scheduledAt, r.version = r.version + 1 " + + "WHERE r.id IN :ids " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING") + int markSchedulingBatch( + @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); + + /** + * Batch CAS: SCHEDULING → SCHEDULED with the given {@code jobId} for every {@code id} still in + * SCHEDULING. Returns the number of rows transitioned. + */ + @Modifying + @Query( + "UPDATE TableOperationsRow r " + + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED," + + " r.jobId = :jobId, r.version = r.version + 1 " + + "WHERE r.id IN :ids " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") + int markScheduledBatch(@Param("ids") List ids, @Param("jobId") String jobId); + + /** + * Batch transition: SCHEDULING → PENDING for every {@code id} still in SCHEDULING. Used by the + * scheduler to release claimed rows when job submission fails so the next pass can retry. Returns + * the number of rows reverted. + */ + @Modifying + @Query( + "UPDATE TableOperationsRow r " + + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING," + + " r.scheduledAt = NULL, r.version = r.version + 1 " + + "WHERE r.id IN :ids " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") + int markPendingBatch(@Param("ids") List ids); + + /** + * Batch-delete duplicate PENDING rows for the given operation type, keeping only the IDs in + * {@code keepIds}. Used by the scheduler to deduplicate before claiming. + */ + @Modifying + @Query( + "DELETE FROM TableOperationsRow r " + + "WHERE r.operationType = :operationType " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING " + + "AND r.id NOT IN :keepIds") + int cancelDuplicatePendingBatch( + @Param("operationType") OperationType operationType, @Param("keepIds") List keepIds); } From 188713d7479b0d1c0425b9e753e0da2df25915b4 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:20:50 -0700 Subject: [PATCH 36/55] docs(optimizer): comment every field on opt-0 api/ and model/ types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several fields under api/model/ and model/ were left undocumented in the earlier per-layer-types passes. Audit + fill them in: api/model/TableOperationsHistoryDto: databaseName, tableName, operationType — add display/role docs. api/model/HistoryStatus: SUCCESS, FAILED — add enum-value docs. api/model/TableStats inner classes: - SnapshotMetrics: clusterId, tableVersion, tableLocation, tableSizeBytes — add field docs. - CommitDelta: numFilesAdded, numFilesDeleted, addedSizeBytes, deletedSizeBytes — add field docs. model/Table: tableUuid, databaseName, tableId, tableProperties, stats — add field docs. model/TableStats: same field-doc additions on SnapshotMetrics and CommitDelta as the api/ counterpart. model/OperationStatus: PENDING, SCHEDULING, SCHEDULED, CANCELED — add enum-value docs. model/OperationType: ORPHAN_FILES_DELETION — add enum-value doc. model/HistoryStatus: SUCCESS, FAILED — add enum-value docs. model/TableStatsHistory: id, tableUuid, databaseName, tableName — add field docs. --- .../optimizer/api/model/HistoryStatus.java | 4 ++++ .../api/model/TableOperationsHistoryDto.java | 5 +++++ .../optimizer/api/model/TableStats.java | 17 +++++++++++++++++ .../optimizer/model/HistoryStatus.java | 4 ++++ .../optimizer/model/OperationStatus.java | 8 ++++++++ .../optimizer/model/OperationType.java | 2 ++ .../openhouse/optimizer/model/Table.java | 7 +++++++ .../openhouse/optimizer/model/TableStats.java | 17 +++++++++++++++++ .../optimizer/model/TableStatsHistory.java | 7 +++++++ 9 files changed, 71 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index 2fbcf6235..dc52f863e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -2,6 +2,10 @@ /** Terminal states for a completed Spark maintenance job. */ public enum HistoryStatus { + + /** The Spark job for this operation completed successfully. */ SUCCESS, + + /** The Spark job for this operation failed. */ FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index d9fa1f387..4e247c7ce 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -19,8 +19,13 @@ public class TableOperationsHistoryDto { /** Stable table identity from the Tables Service. */ private String tableUuid; + /** Denormalized database name for display. */ private String databaseName; + + /** Denormalized table name for display. */ private String tableName; + + /** The type of maintenance operation this history row records. */ private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index de268ffe7..dcb360330 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -32,10 +32,19 @@ public class TableStats { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { + + /** Cluster the table lives on. */ private String clusterId; + + /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; + + /** Filesystem path (or URI) of the table's storage root. */ private String tableLocation; + + /** Total on-disk size of the table at this snapshot, in bytes. */ private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ private Long numCurrentFiles; } @@ -47,9 +56,17 @@ public static class SnapshotMetrics { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class CommitDelta { + + /** Number of data files this commit added to the table. */ private Long numFilesAdded; + + /** Number of data files this commit removed from the table. */ private Long numFilesDeleted; + + /** Total bytes added by this commit. */ private Long addedSizeBytes; + + /** Total bytes removed by this commit. */ private Long deletedSizeBytes; } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java index d29c88719..97b8e2992 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java @@ -8,6 +8,10 @@ *

Intentionally separate from the wire-API and DB representations. */ public enum HistoryStatus { + + /** The operation completed successfully. */ SUCCESS, + + /** The operation failed. */ FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java index 66f213c73..f284fedaf 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java @@ -8,8 +8,16 @@ *

Intentionally separate from the wire-API and DB representations. */ public enum OperationStatus { + + /** Analyzer has written the row; not yet claimed by the scheduler. */ PENDING, + + /** Scheduler has claimed the row and is launching a job; jobId not yet recorded. */ SCHEDULING, + + /** Job has been submitted to the Jobs Service; the row carries a {@code jobId}. */ SCHEDULED, + + /** Scheduler marked this row as a duplicate of another PENDING row; not claimable. */ CANCELED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java index bea44018b..8f4fe35a8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -6,5 +6,7 @@ * supported operations without churning either boundary. */ public enum OperationType { + + /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index dc0a16a0c..bca7e2420 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -22,12 +22,19 @@ @AllArgsConstructor public class Table { + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ private String tableUuid; + + /** Database the table lives in. */ private String databaseName; + + /** Iceberg table identifier (table name, not UUID). */ private String tableId; + /** Current table-property map (e.g. maintenance opt-in flags). Never null. */ @Builder.Default private Map tableProperties = Collections.emptyMap(); + /** Latest snapshot stats for this table. Delta is null when read from the current-state row. */ private TableStats stats; /** When the current snapshot was last written. Stamped server-side on every upsert. */ diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 3b56196ea..94d0a1655 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -27,10 +27,19 @@ public class TableStats { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { + + /** Cluster the table lives on. */ private String clusterId; + + /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; + + /** Filesystem path (or URI) of the table's storage root. */ private String tableLocation; + + /** Total on-disk size of the table at this snapshot, in bytes. */ private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ private Long numCurrentFiles; } @@ -42,9 +51,17 @@ public static class SnapshotMetrics { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class CommitDelta { + + /** Number of data files this commit added to the table. */ private Long numFilesAdded; + + /** Number of data files this commit removed from the table. */ private Long numFilesDeleted; + + /** Total bytes added by this commit. */ private Long addedSizeBytes; + + /** Total bytes removed by this commit. */ private Long deletedSizeBytes; } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java index 5cdad1918..53bb54d1e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java @@ -20,9 +20,16 @@ @AllArgsConstructor public class TableStatsHistory { + /** UUID primary key — set by the caller, not generated server-side. */ private String id; + + /** Stable table identity from the Tables Service. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseName; + + /** Denormalized table name for display. */ private String tableName; /** Snapshot + delta for this commit event. */ From 8d642732244b002f1f7926ae81e98b27f95b1881 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:28:57 -0700 Subject: [PATCH 37/55] refactor(optimizer): remove clusterId from SnapshotMetrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clusterId is per-table-immutable in OpenHouse — it never changes after the table is created — so persisting and transmitting it on every snapshot is dead weight. Remove from the wire and internal representations. - api/model/TableStats.SnapshotMetrics: drop clusterId. - model/TableStats.SnapshotMetrics: drop clusterId. - model/mapper/ApiModelMapper: drop the clusterId hop in toModelSnapshot and toApiSnapshot. --- .../com/linkedin/openhouse/optimizer/api/model/TableStats.java | 3 --- .../com/linkedin/openhouse/optimizer/model/TableStats.java | 3 --- .../openhouse/optimizer/model/mapper/ApiModelMapper.java | 2 -- 3 files changed, 8 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index dcb360330..096eecd1e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -33,9 +33,6 @@ public class TableStats { @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { - /** Cluster the table lives on. */ - private String clusterId; - /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 94d0a1655..56291e510 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -28,9 +28,6 @@ public class TableStats { @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { - /** Cluster the table lives on. */ - private String clusterId; - /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java index d77b3a253..31141ff44 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -215,7 +215,6 @@ private TableStats.SnapshotMetrics toModelSnapshot( return null; } return TableStats.SnapshotMetrics.builder() - .clusterId(apiValue.getClusterId()) .tableVersion(apiValue.getTableVersion()) .tableLocation(apiValue.getTableLocation()) .tableSizeBytes(apiValue.getTableSizeBytes()) @@ -229,7 +228,6 @@ private com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics to return null; } return com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics.builder() - .clusterId(modelValue.getClusterId()) .tableVersion(modelValue.getTableVersion()) .tableLocation(modelValue.getTableLocation()) .tableSizeBytes(modelValue.getTableSizeBytes()) From c1ad24615aaae6dd5c5012f2fdd591f8c0c01712 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:34:17 -0700 Subject: [PATCH 38/55] refactor(optimizer): comment every db/ field; drop clusterId and version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two cleanups on the DB layer, plus a doc audit. clusterId removal: - db/SnapshotMetrics: drop clusterId. - model/mapper/ModelDbMapper: drop clusterId from toModelSnapshot and toDbSnapshot. - Repository tests: drop .clusterId("cl1") from builders. (The api/ and model/ copies were retired in the prior optimizer-0 commit; this completes the removal at the db edge.) version removal: - db/TableOperationsRow: drop the `version` field. The batch CAS pattern's atomicity comes from filtering on `status` (PENDING → SCHEDULING is unambiguous on status alone); the version bump was decorative. - table_operations schema: drop the `version BIGINT` column. - TableOperationsRepository: remove `r.version = r.version + 1` from markSchedulingBatch / markScheduledBatch / markPendingBatch query strings. - model/mapper/ModelDbMapper.toRow: stop initializing version on the row builder. Doc audit on db/: - db/SnapshotMetrics, db/CommitDeltaMetrics: doc every field. - db/HistoryStatus, db/OperationStatus, db/OperationType: doc every enum value. - db/TableOperationsRow, db/TableOperationsHistoryRow, db/TableStatsRow, db/TableStatsHistoryRow: doc every field. --- .../optimizer/db/CommitDeltaMetrics.java | 7 +++++++ .../openhouse/optimizer/db/HistoryStatus.java | 4 ++++ .../optimizer/db/OperationStatus.java | 8 ++++++++ .../openhouse/optimizer/db/OperationType.java | 2 ++ .../optimizer/db/SnapshotMetrics.java | 6 +++++- .../db/TableOperationsHistoryRow.java | 6 ++++++ .../optimizer/db/TableOperationsRow.java | 18 ++++++++---------- .../optimizer/db/TableStatsHistoryRow.java | 7 +++++++ .../openhouse/optimizer/db/TableStatsRow.java | 5 +++++ .../optimizer/model/mapper/ModelDbMapper.java | 3 --- .../repository/TableOperationsRepository.java | 6 +++--- .../src/main/resources/db/optimizer-schema.sql | 1 - .../TableStatsHistoryRepositoryTest.java | 2 +- .../repository/TableStatsRepositoryTest.java | 3 +-- 14 files changed, 57 insertions(+), 21 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java index 8094d28b8..5a30c9afd 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java @@ -14,8 +14,15 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class CommitDeltaMetrics { + /** Number of data files this commit added to the table. */ private Long numFilesAdded; + + /** Number of data files this commit removed from the table. */ private Long numFilesDeleted; + + /** Total bytes added by this commit. */ private Long addedSizeBytes; + + /** Total bytes removed by this commit. */ private Long deletedSizeBytes; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java index 94e573968..3680735f4 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java @@ -6,6 +6,10 @@ *

Self-contained: no references to api/ or model/ types. */ public enum HistoryStatus { + + /** The Spark job for this operation completed successfully. */ SUCCESS, + + /** The Spark job for this operation failed. */ FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java index 4e9161693..0a2e07483 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java @@ -6,8 +6,16 @@ *

Self-contained: no references to api/ or model/ types. */ public enum OperationStatus { + + /** Analyzer has written the row; not yet claimed by the scheduler. */ PENDING, + + /** Scheduler has claimed the row and is launching a job; jobId not yet recorded. */ SCHEDULING, + + /** Job has been submitted to the Jobs Service; the row carries a {@code jobId}. */ SCHEDULED, + + /** Scheduler marked this row as a duplicate of another PENDING row; not claimable. */ CANCELED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java index 3a896e415..e4caf549b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java @@ -8,5 +8,7 @@ * {@code @Enumerated(EnumType.STRING)}. */ public enum OperationType { + + /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java index 22d222172..452b35097 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java @@ -14,9 +14,13 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class SnapshotMetrics { - private String clusterId; + /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; + + /** Filesystem path (or URI) of the table's storage root. */ private String tableLocation; + + /** Total on-disk size of the table at this snapshot, in bytes. */ private Long tableSizeBytes; /** Total number of data files as of the latest snapshot — used for bin-packing. */ diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java index 2e1230181..5f4a598d9 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java @@ -47,22 +47,28 @@ public class TableOperationsHistoryRow { @Column(name = "id", nullable = false, length = 36) private String id; + /** Stable table identity from the Tables Service. */ @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; + /** Denormalized database name. */ @Column(name = "database_name", nullable = false, length = 128) private String databaseName; + /** Denormalized table name. */ @Column(name = "table_name", nullable = false, length = 128) private String tableName; + /** The type of maintenance operation this history row records. */ @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) private OperationType operationType; + /** When the operation completed, as recorded by the complete endpoint. */ @Column(name = "completed_at", nullable = false) private Instant completedAt; + /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) private HistoryStatus status; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java index 9652214d3..dfe40d402 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java @@ -44,44 +44,42 @@ @AllArgsConstructor(access = AccessLevel.PROTECTED) public class TableOperationsRow { + /** Client-generated UUID identifying this specific operation recommendation. */ @Id @Column(name = "id", nullable = false, length = 36) private String id; + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; + /** Denormalized database name. */ @Column(name = "database_name", nullable = false, length = 128) private String databaseName; + /** Denormalized table name. */ @Column(name = "table_name", nullable = false, length = 128) private String tableName; + /** The type of maintenance operation this row recommends. */ @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) private OperationType operationType; + /** Lifecycle state — drives the scheduler's CAS claim and the analyzer's eligibility check. */ @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) private OperationStatus status; + /** When the analyzer first created this row. Set on insert; never updated. */ @Column(name = "created_at", nullable = false) private Instant createdAt; + /** When the scheduler last submitted a job for this row. {@code null} while {@code PENDING}. */ @Column(name = "scheduled_at") private Instant scheduledAt; /** Spark job ID written by the scheduler at claim time. Internal-only; never exposed on wire. */ @Column(name = "job_id", length = 255) private String jobId; - - /** - * Monotonically-increasing version for application-level optimistic concurrency control. The - * scheduler's batch CAS transitions match this in the WHERE clause and bump it by one on UPDATE, - * ensuring two scheduler instances can't both move the same row out of PENDING. Not managed by - * JPA optimistic locking — kept as a plain column so the WHERE-clause-based CAS pattern works - * portably across MySQL and H2. - */ - @Column(name = "version") - private Long version; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java index 71c17b582..4eaee2a6f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java @@ -41,27 +41,34 @@ @AllArgsConstructor(access = AccessLevel.PROTECTED) public class TableStatsHistoryRow { + /** UUID primary key — set by the caller, not generated server-side. */ @Id @Column(name = "id", nullable = false, length = 36) private String id; + /** Stable Iceberg table UUID. */ @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; + /** Denormalized database name. */ @Column(name = "database_name", nullable = false, length = 128) private String databaseName; + /** Denormalized table name. */ @Column(name = "table_name", nullable = false, length = 128) private String tableName; + /** Snapshot fields at commit time. Stored as a JSON blob in the {@code snapshot} column. */ @Type(type = "json") @Column(name = "snapshot", columnDefinition = "TEXT") private SnapshotMetrics snapshot; + /** Per-commit delta counters. Stored as a JSON blob in the {@code delta} column. */ @Type(type = "json") @Column(name = "delta", columnDefinition = "TEXT") private CommitDeltaMetrics delta; + /** When this history row was recorded (commit time). */ @Column(name = "recorded_at", nullable = false) private Instant recordedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java index 8d869ff1e..165247b6a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java @@ -35,20 +35,25 @@ @AllArgsConstructor(access = AccessLevel.PROTECTED) public class TableStatsRow { + /** Stable Iceberg table UUID. Primary key. */ @Id @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; + /** Denormalized database name. */ @Column(name = "database_name", nullable = false, length = 128) private String databaseName; + /** Denormalized table name. */ @Column(name = "table_name", nullable = false, length = 128) private String tableName; + /** Latest snapshot fields. Stored as a JSON blob in the {@code snapshot} column. */ @Type(type = "json") @Column(name = "snapshot", columnDefinition = "TEXT") private SnapshotMetrics snapshot; + /** Current table-property map (e.g. maintenance opt-in flags). Stored as JSON. */ @Type(type = "json") @Column(name = "table_properties", columnDefinition = "TEXT") private Map tableProperties; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java index 7a454c78c..59d7e8680 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java @@ -62,7 +62,6 @@ public TableOperationsRow toRow(TableOperation op) { .status(toDbOperationStatus(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) - .version(0L) .build(); } @@ -208,7 +207,6 @@ private TableStats.SnapshotMetrics toModelSnapshot(SnapshotMetrics v) { return null; } return TableStats.SnapshotMetrics.builder() - .clusterId(v.getClusterId()) .tableVersion(v.getTableVersion()) .tableLocation(v.getTableLocation()) .tableSizeBytes(v.getTableSizeBytes()) @@ -221,7 +219,6 @@ private SnapshotMetrics toDbSnapshot(TableStats.SnapshotMetrics v) { return null; } return SnapshotMetrics.builder() - .clusterId(v.getClusterId()) .tableVersion(v.getTableVersion()) .tableLocation(v.getTableLocation()) .tableSizeBytes(v.getTableSizeBytes()) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 962a108a2..8baddfe42 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -40,7 +40,7 @@ List find( @Query( "UPDATE TableOperationsRow r " + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING," - + " r.scheduledAt = :scheduledAt, r.version = r.version + 1 " + + " r.scheduledAt = :scheduledAt " + "WHERE r.id IN :ids " + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING") int markSchedulingBatch( @@ -54,7 +54,7 @@ int markSchedulingBatch( @Query( "UPDATE TableOperationsRow r " + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED," - + " r.jobId = :jobId, r.version = r.version + 1 " + + " r.jobId = :jobId " + "WHERE r.id IN :ids " + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") int markScheduledBatch(@Param("ids") List ids, @Param("jobId") String jobId); @@ -68,7 +68,7 @@ int markSchedulingBatch( @Query( "UPDATE TableOperationsRow r " + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING," - + " r.scheduledAt = NULL, r.version = r.version + 1 " + + " r.scheduledAt = NULL " + "WHERE r.id IN :ids " + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") int markPendingBatch(@Param("ids") List ids); diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 24b367549..892c1c55f 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -10,7 +10,6 @@ CREATE TABLE IF NOT EXISTS table_operations ( created_at TIMESTAMP(6) NOT NULL, scheduled_at TIMESTAMP(6), job_id VARCHAR(255), - version BIGINT, -- TODO: per-operation metric columns will be added as operations are onboarded. PRIMARY KEY (id) ); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index dbd8cc686..536b72e35 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -132,7 +132,7 @@ private static TableStatsHistoryRow buildRow( .tableUuid(tableUuid) .databaseName(databaseName) .tableName(tableName) - .snapshot(SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(1024L).build()) .delta( CommitDeltaMetrics.builder() .numFilesAdded(numFilesAdded) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index 493eb88b6..f9cc28d57 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -24,8 +24,7 @@ class TableStatsRepositoryTest { @Test void saveAndFindById() { String tableUuid = UUID.randomUUID().toString(); - SnapshotMetrics snapshot = - SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build(); + SnapshotMetrics snapshot = SnapshotMetrics.builder().tableSizeBytes(1024L).build(); repository.save( TableStatsRow.builder() From c72aae8ed9e324591b88cf54f993400370f087b3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:58:31 -0700 Subject: [PATCH 39/55] =?UTF-8?q?refactor(optimizer):=20move=20api?= =?UTF-8?q?=E2=86=94model=20conversion=20onto=20api=20types;=20delete=20Ap?= =?UTF-8?q?iModelMapper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the api/model boundary mapper with conversion methods on the types themselves. The api layer now imports model/ directly via to/from methods — controllers and other api-edge callers no longer inject a mapper bean. The dependency direction is a strict downward chain: api → model → db api types know about model types (and call model methods); model types know about db types (next round). db remains import-free. No central mapper, no risk of a cycle through a hub class. api/model/* changes (each gets a `toModel()` instance method + a static `fromModel(...)` factory): - TableOperationsDto ↔ model.TableOperation. - TableOperationsHistoryDto ↔ model.TableOperationsHistory. - TableStatsDto ↔ model.Table. - TableStatsHistoryDto ↔ model.TableStatsHistory. - UpsertTableStatsRequest → model.Table (one-way; takes the path-var tableUuid; updatedAt is server-stamped). - TableStats (+ SnapshotMetrics + CommitDelta inner) ↔ model.TableStats. - OperationType / OperationStatus / HistoryStatus (api enums) ↔ model enums. CompleteOperationRequest keeps its fields plain — callers extract `operationId` and `status.toModel()` directly; no wrapper needed. Delete services/optimizer/.../model/mapper/ApiModelMapper.java. --- .../optimizer/api/model/HistoryStatus.java | 12 +- .../optimizer/api/model/OperationStatus.java | 13 +- .../optimizer/api/model/OperationType.java | 12 +- .../api/model/TableOperationsDto.java | 32 +++ .../api/model/TableOperationsHistoryDto.java | 30 ++ .../optimizer/api/model/TableStats.java | 67 +++++ .../optimizer/api/model/TableStatsDto.java | 29 ++ .../api/model/TableStatsHistoryDto.java | 28 ++ .../api/model/UpsertTableStatsRequest.java | 17 ++ .../model/mapper/ApiModelMapper.java | 263 ------------------ 10 files changed, 237 insertions(+), 266 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index dc52f863e..0c9ff95da 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -7,5 +7,15 @@ public enum HistoryStatus { SUCCESS, /** The Spark job for this operation failed. */ - FAILED + FAILED; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.HistoryStatus toModel() { + return com.linkedin.openhouse.optimizer.model.HistoryStatus.valueOf(name()); + } + + /** Build the api-layer enum from the internal-model counterpart. */ + public static HistoryStatus fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { + return v == null ? null : HistoryStatus.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java index c97be441b..300c28263 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java @@ -17,5 +17,16 @@ public enum OperationStatus { * operation_type)}. Only the most-recent PENDING row is claimed; older duplicates are CANCELED * before the claim step. */ - CANCELED + CANCELED; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.OperationStatus toModel() { + return com.linkedin.openhouse.optimizer.model.OperationStatus.valueOf(name()); + } + + /** Build the api-layer enum from the internal-model counterpart. */ + public static OperationStatus fromModel( + com.linkedin.openhouse.optimizer.model.OperationStatus v) { + return v == null ? null : OperationStatus.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java index 8507bae12..5f325e712 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java @@ -3,5 +3,15 @@ /** Maintenance operation types supported by the continuous optimizer. */ public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ - ORPHAN_FILES_DELETION + ORPHAN_FILES_DELETION; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.OperationType toModel() { + return com.linkedin.openhouse.optimizer.model.OperationType.valueOf(name()); + } + + /** Build the api-layer enum from the internal-model counterpart. */ + public static OperationType fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { + return v == null ? null : OperationType.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index d41bd6906..db8ef1039 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableOperation; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -39,4 +40,35 @@ public class TableOperationsDto { /** Job ID returned by the Jobs Service after successful submission. */ private String jobId; + + /** Convert to the internal-model counterpart. */ + public TableOperation toModel() { + return TableOperation.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType == null ? null : operationType.toModel()) + .status(status == null ? null : status.toModel()) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableOperationsDto fromModel(TableOperation op) { + if (op == null) { + return null; + } + return TableOperationsDto.builder() + .id(op.getId()) + .tableUuid(op.getTableUuid()) + .databaseName(op.getDatabaseName()) + .tableName(op.getTableName()) + .operationType(OperationType.fromModel(op.getOperationType())) + .status(OperationStatus.fromModel(op.getStatus())) + .createdAt(op.getCreatedAt()) + .scheduledAt(op.getScheduledAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 4e247c7ce..935435040 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -33,4 +34,33 @@ public class TableOperationsHistoryDto { /** {@code SUCCESS} or {@code FAILED}. */ private HistoryStatus status; + + /** Convert to the internal-model counterpart. */ + public TableOperationsHistory toModel() { + return TableOperationsHistory.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType == null ? null : operationType.toModel()) + .completedAt(completedAt) + .status(status == null ? null : status.toModel()) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableOperationsHistoryDto fromModel(TableOperationsHistory h) { + if (h == null) { + return null; + } + return TableOperationsHistoryDto.builder() + .id(h.getId()) + .tableUuid(h.getTableUuid()) + .databaseName(h.getDatabaseName()) + .tableName(h.getTableName()) + .operationType(OperationType.fromModel(h.getOperationType())) + .completedAt(h.getCompletedAt()) + .status(HistoryStatus.fromModel(h.getStatus())) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index 096eecd1e..c75d21d75 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -25,6 +25,25 @@ public class TableStats { /** Delta fields — accumulated across commit events. */ private CommitDelta delta; + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.TableStats toModel() { + return com.linkedin.openhouse.optimizer.model.TableStats.builder() + .snapshot(snapshot == null ? null : snapshot.toModel()) + .delta(delta == null ? null : delta.toModel()) + .build(); + } + + /** Build the api-layer payload from the internal-model counterpart. */ + public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + if (m == null) { + return null; + } + return TableStats.builder() + .snapshot(SnapshotMetrics.fromModel(m.getSnapshot())) + .delta(CommitDelta.fromModel(m.getDelta())) + .build(); + } + /** Point-in-time metadata read from Iceberg at scan time. */ @Data @Builder(toBuilder = true) @@ -44,6 +63,30 @@ public static class SnapshotMetrics { /** Total number of data files as of the latest snapshot — used for bin-packing. */ private Long numCurrentFiles; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel() { + return com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics.builder() + .tableVersion(tableVersion) + .tableLocation(tableLocation) + .tableSizeBytes(tableSizeBytes) + .numCurrentFiles(numCurrentFiles) + .build(); + } + + /** Build the api-layer inner object from the internal-model counterpart. */ + public static SnapshotMetrics fromModel( + com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics m) { + if (m == null) { + return null; + } + return SnapshotMetrics.builder() + .tableVersion(m.getTableVersion()) + .tableLocation(m.getTableLocation()) + .tableSizeBytes(m.getTableSizeBytes()) + .numCurrentFiles(m.getNumCurrentFiles()) + .build(); + } } /** Per-commit incremental counters; accumulated across all recorded commit events. */ @@ -65,5 +108,29 @@ public static class CommitDelta { /** Total bytes removed by this commit. */ private Long deletedSizeBytes; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { + return com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta.builder() + .numFilesAdded(numFilesAdded) + .numFilesDeleted(numFilesDeleted) + .addedSizeBytes(addedSizeBytes) + .deletedSizeBytes(deletedSizeBytes) + .build(); + } + + /** Build the api-layer inner object from the internal-model counterpart. */ + public static CommitDelta fromModel( + com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta m) { + if (m == null) { + return null; + } + return CommitDelta.builder() + .numFilesAdded(m.getNumFilesAdded()) + .numFilesDeleted(m.getNumFilesDeleted()) + .addedSizeBytes(m.getAddedSizeBytes()) + .deletedSizeBytes(m.getDeletedSizeBytes()) + .build(); + } } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 81dd6b802..82dc552c2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,6 +1,8 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.Table; import java.time.Instant; +import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; @@ -31,4 +33,31 @@ public class TableStatsDto { /** When this row was last written. Used for staleness monitoring. */ private Instant updatedAt; + + /** Convert to the internal-model counterpart. */ + public Table toModel() { + return Table.builder() + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableId(tableName) + .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) + .stats(stats == null ? null : stats.toModel()) + .updatedAt(updatedAt) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableStatsDto fromModel(Table t) { + if (t == null) { + return null; + } + return TableStatsDto.builder() + .tableUuid(t.getTableUuid()) + .databaseName(t.getDatabaseName()) + .tableName(t.getTableId()) + .stats(TableStats.fromModel(t.getStats())) + .tableProperties(t.getTableProperties()) + .updatedAt(t.getUpdatedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 4a994fdb3..b5f971bbf 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -30,4 +31,31 @@ public class TableStatsHistoryDto { /** When this history row was recorded. */ private Instant recordedAt; + + /** Convert to the internal-model counterpart. */ + public TableStatsHistory toModel() { + return TableStatsHistory.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .stats(stats == null ? null : stats.toModel()) + .recordedAt(recordedAt) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableStatsHistoryDto fromModel(TableStatsHistory h) { + if (h == null) { + return null; + } + return TableStatsHistoryDto.builder() + .id(h.getId()) + .tableUuid(h.getTableUuid()) + .databaseName(h.getDatabaseName()) + .tableName(h.getTableName()) + .stats(TableStats.fromModel(h.getStats())) + .recordedAt(h.getRecordedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 02290bad5..13476543f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,5 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.Table; +import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; @@ -29,4 +31,19 @@ public class UpsertTableStatsRequest { /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; + + /** + * Build the internal-model {@link Table} described by this request. {@code tableUuid} comes from + * the URL path, not the body. {@link Table#getUpdatedAt()} is left {@code null}; the service + * stamps it server-side at write time. + */ + public Table toModel(String tableUuid) { + return Table.builder() + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableId(tableName) + .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) + .stats(stats == null ? null : stats.toModel()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java deleted file mode 100644 index 31141ff44..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ /dev/null @@ -1,263 +0,0 @@ -package com.linkedin.openhouse.optimizer.model.mapper; - -import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; -import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.OperationStatus; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.linkedin.openhouse.optimizer.model.TableStatsHistory; -import java.util.Collections; -import org.springframework.stereotype.Component; - -/** - * Converts between wire-API DTOs and internal {@code model/} domain objects. - * - *

The only place inside {@code model/} where {@code api/} types are referenced — this is the - * boundary at which the internal model meets the wire-API. Pure data types under {@code model/} - * stay free of any api-side imports. - * - *

API-layer enums + payloads are intentionally separate Java types from the internal-model - * counterparts; the two sides evolve independently. This mapper translates by name. - */ -@Component -public class ApiModelMapper { - - // --- TableOperationsDto <-> TableOperation --- - - public TableOperation toOperation(TableOperationsDto dto) { - if (dto == null) { - return null; - } - return TableOperation.builder() - .id(dto.getId()) - .tableUuid(dto.getTableUuid()) - .databaseName(dto.getDatabaseName()) - .tableName(dto.getTableName()) - .operationType(toModelOperationType(dto.getOperationType())) - .status(toModelOperationStatus(dto.getStatus())) - .createdAt(dto.getCreatedAt()) - .scheduledAt(dto.getScheduledAt()) - .build(); - } - - public TableOperationsDto toDto(TableOperation op) { - if (op == null) { - return null; - } - return TableOperationsDto.builder() - .id(op.getId()) - .tableUuid(op.getTableUuid()) - .databaseName(op.getDatabaseName()) - .tableName(op.getTableName()) - .operationType(toApiOperationType(op.getOperationType())) - .status(toApiOperationStatus(op.getStatus())) - .createdAt(op.getCreatedAt()) - .scheduledAt(op.getScheduledAt()) - .build(); - } - - // --- TableOperationsHistoryDto <-> TableOperationsHistory --- - - public TableOperationsHistory toHistory(TableOperationsHistoryDto dto) { - if (dto == null) { - return null; - } - return TableOperationsHistory.builder() - .id(dto.getId()) - .tableUuid(dto.getTableUuid()) - .databaseName(dto.getDatabaseName()) - .tableName(dto.getTableName()) - .operationType(toModelOperationType(dto.getOperationType())) - .completedAt(dto.getCompletedAt()) - .status(toModelHistoryStatus(dto.getStatus())) - .build(); - } - - public TableOperationsHistoryDto toDto(TableOperationsHistory history) { - if (history == null) { - return null; - } - return TableOperationsHistoryDto.builder() - .id(history.getId()) - .tableUuid(history.getTableUuid()) - .databaseName(history.getDatabaseName()) - .tableName(history.getTableName()) - .operationType(toApiOperationType(history.getOperationType())) - .completedAt(history.getCompletedAt()) - .status(toApiHistoryStatus(history.getStatus())) - .build(); - } - - // --- Table <-> TableStatsDto / UpsertTableStatsRequest --- - - /** - * Build an internal-model {@link Table} from a wire upsert request. {@link Table#getUpdatedAt()} - * is intentionally left null — the service stamps it server-side at write time. - */ - public Table toTable(String tableUuid, UpsertTableStatsRequest request) { - if (request == null) { - return null; - } - return Table.builder() - .tableUuid(tableUuid) - .databaseName(request.getDatabaseName()) - .tableId(request.getTableName()) - .tableProperties( - request.getTableProperties() != null - ? request.getTableProperties() - : Collections.emptyMap()) - .stats(toModelStats(request.getStats())) - .build(); - } - - public TableStatsDto toDto(Table table) { - if (table == null) { - return null; - } - return TableStatsDto.builder() - .tableUuid(table.getTableUuid()) - .databaseName(table.getDatabaseName()) - .tableName(table.getTableId()) - .stats(toApiStats(table.getStats())) - .tableProperties(table.getTableProperties()) - .updatedAt(table.getUpdatedAt()) - .build(); - } - - // --- TableStatsHistory <-> TableStatsHistoryDto --- - - public TableStatsHistoryDto toDto(TableStatsHistory history) { - if (history == null) { - return null; - } - return TableStatsHistoryDto.builder() - .id(history.getId()) - .tableUuid(history.getTableUuid()) - .databaseName(history.getDatabaseName()) - .tableName(history.getTableName()) - .stats(toApiStats(history.getStats())) - .recordedAt(history.getRecordedAt()) - .build(); - } - - // --- TableStats payload --- - - public TableStats toModelStats(com.linkedin.openhouse.optimizer.api.model.TableStats apiStats) { - if (apiStats == null) { - return null; - } - return TableStats.builder() - .snapshot(toModelSnapshot(apiStats.getSnapshot())) - .delta(toModelDelta(apiStats.getDelta())) - .build(); - } - - public com.linkedin.openhouse.optimizer.api.model.TableStats toApiStats(TableStats modelStats) { - if (modelStats == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.TableStats.builder() - .snapshot(toApiSnapshot(modelStats.getSnapshot())) - .delta(toApiDelta(modelStats.getDelta())) - .build(); - } - - // --- enum helpers --- - - public OperationType toModelOperationType( - com.linkedin.openhouse.optimizer.api.model.OperationType apiValue) { - return apiValue == null ? null : OperationType.valueOf(apiValue.name()); - } - - public com.linkedin.openhouse.optimizer.api.model.OperationType toApiOperationType( - OperationType modelValue) { - return modelValue == null - ? null - : com.linkedin.openhouse.optimizer.api.model.OperationType.valueOf(modelValue.name()); - } - - public OperationStatus toModelOperationStatus( - com.linkedin.openhouse.optimizer.api.model.OperationStatus apiValue) { - return apiValue == null ? null : OperationStatus.valueOf(apiValue.name()); - } - - public com.linkedin.openhouse.optimizer.api.model.OperationStatus toApiOperationStatus( - OperationStatus modelValue) { - return modelValue == null - ? null - : com.linkedin.openhouse.optimizer.api.model.OperationStatus.valueOf(modelValue.name()); - } - - public HistoryStatus toModelHistoryStatus( - com.linkedin.openhouse.optimizer.api.model.HistoryStatus apiValue) { - return apiValue == null ? null : HistoryStatus.valueOf(apiValue.name()); - } - - public com.linkedin.openhouse.optimizer.api.model.HistoryStatus toApiHistoryStatus( - HistoryStatus modelValue) { - return modelValue == null - ? null - : com.linkedin.openhouse.optimizer.api.model.HistoryStatus.valueOf(modelValue.name()); - } - - // --- TableStats inner classes --- - - private TableStats.SnapshotMetrics toModelSnapshot( - com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics apiValue) { - if (apiValue == null) { - return null; - } - return TableStats.SnapshotMetrics.builder() - .tableVersion(apiValue.getTableVersion()) - .tableLocation(apiValue.getTableLocation()) - .tableSizeBytes(apiValue.getTableSizeBytes()) - .numCurrentFiles(apiValue.getNumCurrentFiles()) - .build(); - } - - private com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics toApiSnapshot( - TableStats.SnapshotMetrics modelValue) { - if (modelValue == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics.builder() - .tableVersion(modelValue.getTableVersion()) - .tableLocation(modelValue.getTableLocation()) - .tableSizeBytes(modelValue.getTableSizeBytes()) - .numCurrentFiles(modelValue.getNumCurrentFiles()) - .build(); - } - - private TableStats.CommitDelta toModelDelta( - com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta apiValue) { - if (apiValue == null) { - return null; - } - return TableStats.CommitDelta.builder() - .numFilesAdded(apiValue.getNumFilesAdded()) - .numFilesDeleted(apiValue.getNumFilesDeleted()) - .addedSizeBytes(apiValue.getAddedSizeBytes()) - .deletedSizeBytes(apiValue.getDeletedSizeBytes()) - .build(); - } - - private com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta toApiDelta( - TableStats.CommitDelta modelValue) { - if (modelValue == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta.builder() - .numFilesAdded(modelValue.getNumFilesAdded()) - .numFilesDeleted(modelValue.getNumFilesDeleted()) - .addedSizeBytes(modelValue.getAddedSizeBytes()) - .deletedSizeBytes(modelValue.getDeletedSizeBytes()) - .build(); - } -} From 8ae8777422a940e3b730ede226f8801db5618619 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 16:02:28 -0700 Subject: [PATCH 40/55] =?UTF-8?q?refactor(optimizer):=20move=20model?= =?UTF-8?q?=E2=86=94db=20conversion=20onto=20model=20types;=20delete=20Mod?= =?UTF-8?q?elDbMapper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the model/db boundary mapper with conversion methods on the model types themselves. Same pattern that opt-0 just applied at the api↔model boundary — each layer's type carries the to/from methods for the layer below. The dependency chain after this commit: api → model → db api/* → model/* (added on opt-0). model/* → db/* (this commit). db/* still imports nothing — bottom of the chain. model/* changes (each gets a `toRow()` instance method + a static `fromRow(...)` factory): - Table ↔ db.TableStatsRow (current-state row; snapshot only, delta lives on history rows). - TableOperation ↔ db.TableOperationsRow. - TableOperationsHistory ↔ db.TableOperationsHistoryRow. - TableStatsHistory ↔ db.TableStatsHistoryRow (joins/splits the snapshot + delta columns). - TableStats inner: SnapshotMetrics ↔ db.SnapshotMetrics, CommitDelta ↔ db.CommitDeltaMetrics. TableStats itself exposes toSnapshotRow() / toDeltaRow() for the split-write side and a static fromRows(snapshot, delta) for the join-read side. - OperationType / OperationStatus / HistoryStatus (model enums) ↔ db enums. Delete services/optimizer/.../model/mapper/ModelDbMapper.java. --- .../optimizer/model/HistoryStatus.java | 12 +- .../optimizer/model/OperationStatus.java | 12 +- .../optimizer/model/OperationType.java | 12 +- .../openhouse/optimizer/model/Table.java | 37 ++- .../optimizer/model/TableOperation.java | 37 ++- .../model/TableOperationsHistory.java | 30 +++ .../openhouse/optimizer/model/TableStats.java | 71 +++++ .../optimizer/model/TableStatsHistory.java | 31 ++- .../optimizer/model/mapper/ModelDbMapper.java | 252 ------------------ 9 files changed, 232 insertions(+), 262 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java index 97b8e2992..e6321873d 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java @@ -13,5 +13,15 @@ public enum HistoryStatus { SUCCESS, /** The operation failed. */ - FAILED + FAILED; + + /** Convert to the DB-layer counterpart. */ + public com.linkedin.openhouse.optimizer.db.HistoryStatus toDb() { + return com.linkedin.openhouse.optimizer.db.HistoryStatus.valueOf(name()); + } + + /** Build the internal-model enum from the DB-layer counterpart. */ + public static HistoryStatus fromDb(com.linkedin.openhouse.optimizer.db.HistoryStatus v) { + return v == null ? null : HistoryStatus.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java index f284fedaf..137d97902 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java @@ -19,5 +19,15 @@ public enum OperationStatus { SCHEDULED, /** Scheduler marked this row as a duplicate of another PENDING row; not claimable. */ - CANCELED + CANCELED; + + /** Convert to the DB-layer counterpart. */ + public com.linkedin.openhouse.optimizer.db.OperationStatus toDb() { + return com.linkedin.openhouse.optimizer.db.OperationStatus.valueOf(name()); + } + + /** Build the internal-model enum from the DB-layer counterpart. */ + public static OperationStatus fromDb(com.linkedin.openhouse.optimizer.db.OperationStatus v) { + return v == null ? null : OperationStatus.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java index 8f4fe35a8..13c7e9c61 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -8,5 +8,15 @@ public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ - ORPHAN_FILES_DELETION + ORPHAN_FILES_DELETION; + + /** Convert to the DB-layer counterpart. */ + public com.linkedin.openhouse.optimizer.db.OperationType toDb() { + return com.linkedin.openhouse.optimizer.db.OperationType.valueOf(name()); + } + + /** Build the internal-model enum from the DB-layer counterpart. */ + public static OperationType fromDb(com.linkedin.openhouse.optimizer.db.OperationType v) { + return v == null ? null : OperationType.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index bca7e2420..659dd18da 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.time.Instant; import java.util.Collections; import java.util.Map; @@ -13,8 +14,8 @@ * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads * stats for bin-packing). * - *

Pure internal-model type — no references to wire-API or DB types. Construct via {@link - * com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper#toTable} at the DB boundary. + *

Conversion methods cross into the DB layer one-way; the inverse lives on the api side. db/ + * types know nothing about model/ or api/. */ @Data @Builder @@ -39,4 +40,36 @@ public class Table { /** When the current snapshot was last written. Stamped server-side on every upsert. */ private Instant updatedAt; + + /** + * Project to the current-state DB row. {@code table_stats} carries the snapshot only — per-commit + * deltas live on {@code table_stats_history} (see {@link TableStatsHistory#toRow()}). + */ + public TableStatsRow toRow() { + return TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableId) + .snapshot(stats == null ? null : stats.toSnapshotRow()) + .tableProperties(tableProperties) + .updatedAt(updatedAt) + .build(); + } + + /** Build a {@link Table} from a current-state DB row. */ + public static Table fromRow(TableStatsRow row) { + if (row == null) { + return null; + } + return Table.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableId(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + // table_stats holds only the snapshot — deltas live on the history table. + .stats(TableStats.fromRows(row.getSnapshot(), null)) + .updatedAt(row.getUpdatedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java index 1f14dddff..81f97f1de 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import java.time.Instant; import java.util.Comparator; import java.util.UUID; @@ -12,9 +13,8 @@ * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks * up and submits. * - *

Pure internal-model type — no references to wire-API or DB types. Cross-layer construction - * happens via {@link com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper} (DB boundary) or - * {@link com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper} (API boundary). + *

Conversion methods cross into the DB layer one-way; the inverse lives on the api side. db/ + * types know nothing about model/ or api/. * *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not @@ -75,4 +75,35 @@ public static TableOperation mostRecent(TableOperation a, TableOperation b) { Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); return byCreatedAt.compare(a, b) >= 0 ? a : b; } + + /** Convert to the corresponding DB row. */ + public TableOperationsRow toRow() { + return TableOperationsRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType == null ? null : operationType.toDb()) + .status(status == null ? null : status.toDb()) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .build(); + } + + /** Build a {@link TableOperation} from a DB row. */ + public static TableOperation fromRow(TableOperationsRow row) { + if (row == null) { + return null; + } + return TableOperation.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.fromDb(row.getOperationType())) + .status(OperationStatus.fromDb(row.getStatus())) + .createdAt(row.getCreatedAt()) + .scheduledAt(row.getScheduledAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index fe5bee5f7..42a48479a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -38,4 +39,33 @@ public class TableOperationsHistory { /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ private HistoryStatus status; + + /** Convert to the corresponding DB row. */ + public TableOperationsHistoryRow toRow() { + return TableOperationsHistoryRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType == null ? null : operationType.toDb()) + .completedAt(completedAt) + .status(status == null ? null : status.toDb()) + .build(); + } + + /** Build a {@link TableOperationsHistory} from a DB row. */ + public static TableOperationsHistory fromRow(TableOperationsHistoryRow row) { + if (row == null) { + return null; + } + return TableOperationsHistory.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.fromDb(row.getOperationType())) + .completedAt(row.getCompletedAt()) + .status(HistoryStatus.fromDb(row.getStatus())) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 56291e510..212390af9 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -20,6 +20,31 @@ public class TableStats { /** Delta fields — accumulated across commit events. */ private CommitDelta delta; + /** Project to the DB-layer {@link com.linkedin.openhouse.optimizer.db.SnapshotMetrics} object. */ + public com.linkedin.openhouse.optimizer.db.SnapshotMetrics toSnapshotRow() { + return snapshot == null ? null : snapshot.toDb(); + } + + /** + * Project to the DB-layer {@link com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics} object. + */ + public com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics toDeltaRow() { + return delta == null ? null : delta.toDb(); + } + + /** Join the two DB-side columns back into a single internal-model {@link TableStats}. */ + public static TableStats fromRows( + com.linkedin.openhouse.optimizer.db.SnapshotMetrics dbSnapshot, + com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics dbDelta) { + if (dbSnapshot == null && dbDelta == null) { + return null; + } + return TableStats.builder() + .snapshot(SnapshotMetrics.fromDb(dbSnapshot)) + .delta(CommitDelta.fromDb(dbDelta)) + .build(); + } + /** Point-in-time metadata read from Iceberg at scan time. */ @Data @Builder(toBuilder = true) @@ -39,6 +64,29 @@ public static class SnapshotMetrics { /** Total number of data files as of the latest snapshot — used for bin-packing. */ private Long numCurrentFiles; + + /** Convert to the DB-layer counterpart. */ + public com.linkedin.openhouse.optimizer.db.SnapshotMetrics toDb() { + return com.linkedin.openhouse.optimizer.db.SnapshotMetrics.builder() + .tableVersion(tableVersion) + .tableLocation(tableLocation) + .tableSizeBytes(tableSizeBytes) + .numCurrentFiles(numCurrentFiles) + .build(); + } + + /** Build the internal-model inner object from the DB-layer counterpart. */ + public static SnapshotMetrics fromDb(com.linkedin.openhouse.optimizer.db.SnapshotMetrics v) { + if (v == null) { + return null; + } + return SnapshotMetrics.builder() + .tableVersion(v.getTableVersion()) + .tableLocation(v.getTableLocation()) + .tableSizeBytes(v.getTableSizeBytes()) + .numCurrentFiles(v.getNumCurrentFiles()) + .build(); + } } /** Per-commit incremental counters; accumulated across all recorded commit events. */ @@ -60,5 +108,28 @@ public static class CommitDelta { /** Total bytes removed by this commit. */ private Long deletedSizeBytes; + + /** Convert to the DB-layer counterpart. */ + public com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics toDb() { + return com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics.builder() + .numFilesAdded(numFilesAdded) + .numFilesDeleted(numFilesDeleted) + .addedSizeBytes(addedSizeBytes) + .deletedSizeBytes(deletedSizeBytes) + .build(); + } + + /** Build the internal-model inner object from the DB-layer counterpart. */ + public static CommitDelta fromDb(com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics v) { + if (v == null) { + return null; + } + return CommitDelta.builder() + .numFilesAdded(v.getNumFilesAdded()) + .numFilesDeleted(v.getNumFilesDeleted()) + .addedSizeBytes(v.getAddedSizeBytes()) + .deletedSizeBytes(v.getDeletedSizeBytes()) + .build(); + } } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java index 53bb54d1e..f7f111151 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -11,8 +12,6 @@ * *

One per Iceberg commit. {@link #stats} carries both the snapshot at commit time and the commit * delta — consumers can reconstruct change rates over arbitrary time windows. - * - *

Pure internal-model type — no references to wire-API or DB types. */ @Data @Builder @@ -37,4 +36,32 @@ public class TableStatsHistory { /** When this history row was recorded. */ private Instant recordedAt; + + /** Convert to the corresponding DB row. */ + public TableStatsHistoryRow toRow() { + return TableStatsHistoryRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .snapshot(stats == null ? null : stats.toSnapshotRow()) + .delta(stats == null ? null : stats.toDeltaRow()) + .recordedAt(recordedAt) + .build(); + } + + /** Build a {@link TableStatsHistory} from a DB row. */ + public static TableStatsHistory fromRow(TableStatsHistoryRow row) { + if (row == null) { + return null; + } + return TableStatsHistory.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .stats(TableStats.fromRows(row.getSnapshot(), row.getDelta())) + .recordedAt(row.getRecordedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java deleted file mode 100644 index 59d7e8680..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java +++ /dev/null @@ -1,252 +0,0 @@ -package com.linkedin.openhouse.optimizer.model.mapper; - -import com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics; -import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; -import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; -import com.linkedin.openhouse.optimizer.db.TableOperationsRow; -import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.db.TableStatsRow; -import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.OperationStatus; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.linkedin.openhouse.optimizer.model.TableStatsHistory; -import java.util.Collections; -import org.springframework.stereotype.Component; - -/** - * Converts between internal {@code model/} domain objects and database row entities. - * - *

The only place inside {@code model/} where {@code db/} types are referenced — this is the - * boundary at which the internal model meets the database layer. Pure data types under {@code - * model/} stay free of any DB-side imports. - * - *

Each layer carries its own per-layer enum + payload types. The DB layer flattens the wire-side - * {@code TableStats} envelope into two separate columns ({@code snapshot} and {@code delta}); this - * mapper joins / splits them at the boundary. - */ -@Component -public class ModelDbMapper { - - // --- TableOperationsRow <-> TableOperation --- - - public TableOperation toOperation(TableOperationsRow row) { - if (row == null) { - return null; - } - return TableOperation.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .operationType(toModelOperationType(row.getOperationType())) - .status(toModelOperationStatus(row.getStatus())) - .createdAt(row.getCreatedAt()) - .scheduledAt(row.getScheduledAt()) - .build(); - } - - public TableOperationsRow toRow(TableOperation op) { - if (op == null) { - return null; - } - return TableOperationsRow.builder() - .id(op.getId()) - .tableUuid(op.getTableUuid()) - .databaseName(op.getDatabaseName()) - .tableName(op.getTableName()) - .operationType(toDbOperationType(op.getOperationType())) - .status(toDbOperationStatus(op.getStatus())) - .createdAt(op.getCreatedAt()) - .scheduledAt(op.getScheduledAt()) - .build(); - } - - // --- TableOperationsHistoryRow <-> TableOperationsHistory --- - - public TableOperationsHistory toHistory(TableOperationsHistoryRow row) { - if (row == null) { - return null; - } - return TableOperationsHistory.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .operationType(toModelOperationType(row.getOperationType())) - .completedAt(row.getCompletedAt()) - .status(toModelHistoryStatus(row.getStatus())) - .build(); - } - - public TableOperationsHistoryRow toRow(TableOperationsHistory history) { - if (history == null) { - return null; - } - return TableOperationsHistoryRow.builder() - .id(history.getId()) - .tableUuid(history.getTableUuid()) - .databaseName(history.getDatabaseName()) - .tableName(history.getTableName()) - .operationType(toDbOperationType(history.getOperationType())) - .completedAt(history.getCompletedAt()) - .status(toDbHistoryStatus(history.getStatus())) - .build(); - } - - // --- TableStatsRow -> Table --- - - public Table toTable(TableStatsRow row) { - if (row == null) { - return null; - } - return Table.builder() - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableId(row.getTableName()) - .tableProperties( - row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) - // table_stats holds only the snapshot — deltas live on the history table. - .stats(joinStats(row.getSnapshot(), null)) - .updatedAt(row.getUpdatedAt()) - .build(); - } - - // --- TableStatsHistoryRow -> TableStatsHistory --- - - public TableStatsHistory toStatsHistory(TableStatsHistoryRow row) { - if (row == null) { - return null; - } - return TableStatsHistory.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .stats(joinStats(row.getSnapshot(), row.getDelta())) - .recordedAt(row.getRecordedAt()) - .build(); - } - - // --- TableStats payload <-> (snapshot, delta) --- - - /** Join the two DB-side columns into a single internal-model {@link TableStats}. */ - public TableStats joinStats(SnapshotMetrics dbSnapshot, CommitDeltaMetrics dbDelta) { - if (dbSnapshot == null && dbDelta == null) { - return null; - } - return TableStats.builder() - .snapshot(toModelSnapshot(dbSnapshot)) - .delta(toModelDelta(dbDelta)) - .build(); - } - - /** Project the internal-model {@link TableStats#getSnapshot()} side. */ - public SnapshotMetrics toDbSnapshot(TableStats modelStats) { - return modelStats == null ? null : toDbSnapshot(modelStats.getSnapshot()); - } - - /** Project the internal-model {@link TableStats#getDelta()} side. */ - public CommitDeltaMetrics toDbDelta(TableStats modelStats) { - return modelStats == null ? null : toDbDelta(modelStats.getDelta()); - } - - public TableStatsHistoryRow toStatsHistoryRow( - String id, - String tableUuid, - String databaseName, - String tableName, - TableStats stats, - java.time.Instant recordedAt) { - return TableStatsHistoryRow.builder() - .id(id) - .tableUuid(tableUuid) - .databaseName(databaseName) - .tableName(tableName) - .snapshot(toDbSnapshot(stats)) - .delta(toDbDelta(stats)) - .recordedAt(recordedAt) - .build(); - } - - // --- enum helpers --- - - public OperationType toModelOperationType(com.linkedin.openhouse.optimizer.db.OperationType v) { - return v == null ? null : OperationType.valueOf(v.name()); - } - - public com.linkedin.openhouse.optimizer.db.OperationType toDbOperationType(OperationType v) { - return v == null ? null : com.linkedin.openhouse.optimizer.db.OperationType.valueOf(v.name()); - } - - public OperationStatus toModelOperationStatus( - com.linkedin.openhouse.optimizer.db.OperationStatus v) { - return v == null ? null : OperationStatus.valueOf(v.name()); - } - - public com.linkedin.openhouse.optimizer.db.OperationStatus toDbOperationStatus( - OperationStatus v) { - return v == null ? null : com.linkedin.openhouse.optimizer.db.OperationStatus.valueOf(v.name()); - } - - public HistoryStatus toModelHistoryStatus(com.linkedin.openhouse.optimizer.db.HistoryStatus v) { - return v == null ? null : HistoryStatus.valueOf(v.name()); - } - - public com.linkedin.openhouse.optimizer.db.HistoryStatus toDbHistoryStatus(HistoryStatus v) { - return v == null ? null : com.linkedin.openhouse.optimizer.db.HistoryStatus.valueOf(v.name()); - } - - // --- inner-payload field copies --- - - private TableStats.SnapshotMetrics toModelSnapshot(SnapshotMetrics v) { - if (v == null) { - return null; - } - return TableStats.SnapshotMetrics.builder() - .tableVersion(v.getTableVersion()) - .tableLocation(v.getTableLocation()) - .tableSizeBytes(v.getTableSizeBytes()) - .numCurrentFiles(v.getNumCurrentFiles()) - .build(); - } - - private SnapshotMetrics toDbSnapshot(TableStats.SnapshotMetrics v) { - if (v == null) { - return null; - } - return SnapshotMetrics.builder() - .tableVersion(v.getTableVersion()) - .tableLocation(v.getTableLocation()) - .tableSizeBytes(v.getTableSizeBytes()) - .numCurrentFiles(v.getNumCurrentFiles()) - .build(); - } - - private TableStats.CommitDelta toModelDelta(CommitDeltaMetrics v) { - if (v == null) { - return null; - } - return TableStats.CommitDelta.builder() - .numFilesAdded(v.getNumFilesAdded()) - .numFilesDeleted(v.getNumFilesDeleted()) - .addedSizeBytes(v.getAddedSizeBytes()) - .deletedSizeBytes(v.getDeletedSizeBytes()) - .build(); - } - - private CommitDeltaMetrics toDbDelta(TableStats.CommitDelta v) { - if (v == null) { - return null; - } - return CommitDeltaMetrics.builder() - .numFilesAdded(v.getNumFilesAdded()) - .numFilesDeleted(v.getNumFilesDeleted()) - .addedSizeBytes(v.getAddedSizeBytes()) - .deletedSizeBytes(v.getDeletedSizeBytes()) - .build(); - } -} From af23d5ef63ff1e44a483392e6a364c507d4cae34 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:02:51 -0700 Subject: [PATCH 41/55] fix(optimizer): make TableStats self-describing; route DTO conversion to TableStats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit model.TableStats now carries its own identity (tableUuid, databaseName, tableName) and metadata (tableProperties, updatedAt) alongside the snapshot + delta payload. Consumers no longer need an outer wrapper to know which table the stats belong to. api.TableStatsDto.toModel() and api.UpsertTableStatsRequest.toModel() now return model.TableStats (was model.Table). The two types only happened to have the same shape — semantically a DTO for stats is stats, not a table. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/api/model/TableStatsDto.java | 31 +++++++++++-------- .../api/model/UpsertTableStatsRequest.java | 17 +++++----- .../openhouse/optimizer/model/TableStats.java | 31 +++++++++++++++++-- 3 files changed, 56 insertions(+), 23 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 82dc552c2..244050b04 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.Table; import java.time.Instant; import java.util.Collections; import java.util.Map; @@ -35,29 +34,35 @@ public class TableStatsDto { private Instant updatedAt; /** Convert to the internal-model counterpart. */ - public Table toModel() { - return Table.builder() + public com.linkedin.openhouse.optimizer.model.TableStats toModel() { + com.linkedin.openhouse.optimizer.model.TableStats payload = + stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + return payload + .toBuilder() .tableUuid(tableUuid) .databaseName(databaseName) - .tableId(tableName) + .tableName(tableName) .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) - .stats(stats == null ? null : stats.toModel()) .updatedAt(updatedAt) .build(); } /** Build a wire DTO from the internal-model counterpart. */ - public static TableStatsDto fromModel(Table t) { - if (t == null) { + public static TableStatsDto fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + if (m == null) { return null; } return TableStatsDto.builder() - .tableUuid(t.getTableUuid()) - .databaseName(t.getDatabaseName()) - .tableName(t.getTableId()) - .stats(TableStats.fromModel(t.getStats())) - .tableProperties(t.getTableProperties()) - .updatedAt(t.getUpdatedAt()) + .tableUuid(m.getTableUuid()) + .databaseName(m.getDatabaseName()) + .tableName(m.getTableName()) + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.fromModel(m.getSnapshot())) + .delta(TableStats.CommitDelta.fromModel(m.getDelta())) + .build()) + .tableProperties(m.getTableProperties()) + .updatedAt(m.getUpdatedAt()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 13476543f..08b42050f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.Table; import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; @@ -33,17 +32,19 @@ public class UpsertTableStatsRequest { private Map tableProperties; /** - * Build the internal-model {@link Table} described by this request. {@code tableUuid} comes from - * the URL path, not the body. {@link Table#getUpdatedAt()} is left {@code null}; the service - * stamps it server-side at write time. + * Build the internal-model {@link com.linkedin.openhouse.optimizer.model.TableStats} described by + * this request. {@code tableUuid} comes from the URL path, not the body. {@code updatedAt} is + * left {@code null}; the service stamps it server-side at write time. */ - public Table toModel(String tableUuid) { - return Table.builder() + public com.linkedin.openhouse.optimizer.model.TableStats toModel(String tableUuid) { + com.linkedin.openhouse.optimizer.model.TableStats payload = + stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + return payload + .toBuilder() .tableUuid(tableUuid) .databaseName(databaseName) - .tableId(tableName) + .tableName(tableName) .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) - .stats(stats == null ? null : stats.toModel()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 56291e510..906d01669 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -1,12 +1,24 @@ package com.linkedin.openhouse.optimizer.model; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.time.Instant; +import java.util.Collections; +import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; -/** Combined stats payload stored as a single JSON blob per table. */ +/** + * Self-describing per-table stats record. Carries the table's identity and metadata alongside the + * snapshot + delta payload so consumers don't need an outer wrapper to know which table the stats + * belong to. + * + *

Identity ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}) and metadata ({@link + * #tableProperties}, {@link #updatedAt}) are populated when read from a current-state row. When + * this record is built from a per-commit history row, {@link #delta} is populated and {@link + * #tableProperties} / {@link #updatedAt} are typically {@code null}. + */ @Data @Builder(toBuilder = true) @NoArgsConstructor @@ -14,12 +26,27 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class TableStats { + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ + private String tableUuid; + + /** Database the table lives in. */ + private String databaseName; + + /** Iceberg table name (the human-readable identifier, not the UUID). */ + private String tableName; + + /** Current table-property map (e.g. maintenance opt-in flags). Never null. */ + @Builder.Default private Map tableProperties = Collections.emptyMap(); + /** Snapshot fields — overwritten on every upsert. */ private SnapshotMetrics snapshot; - /** Delta fields — accumulated across commit events. */ + /** Delta fields — accumulated across commit events. Null when read from a current-state row. */ private CommitDelta delta; + /** When the current snapshot was last written. Stamped server-side on every upsert. */ + private Instant updatedAt; + /** Point-in-time metadata read from Iceberg at scan time. */ @Data @Builder(toBuilder = true) From 3864e4257d0476333cbd1d78f87207dc1c46b16e Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:04:24 -0700 Subject: [PATCH 42/55] chore(optimizer): cascade self-describing TableStats from opt-0 to opt-1 Enriches model.TableStats with identity (tableUuid, databaseName, tableName) and metadata (tableProperties, updatedAt), and reroutes the api DTOs' toModel/fromModel pair to model.TableStats. opt-1's existing toSnapshotRow / toDeltaRow / fromRows helpers are preserved. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/api/model/TableStatsDto.java | 31 +++++++++++-------- .../api/model/UpsertTableStatsRequest.java | 17 +++++----- .../openhouse/optimizer/model/TableStats.java | 31 +++++++++++++++++-- 3 files changed, 56 insertions(+), 23 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 82dc552c2..244050b04 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.Table; import java.time.Instant; import java.util.Collections; import java.util.Map; @@ -35,29 +34,35 @@ public class TableStatsDto { private Instant updatedAt; /** Convert to the internal-model counterpart. */ - public Table toModel() { - return Table.builder() + public com.linkedin.openhouse.optimizer.model.TableStats toModel() { + com.linkedin.openhouse.optimizer.model.TableStats payload = + stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + return payload + .toBuilder() .tableUuid(tableUuid) .databaseName(databaseName) - .tableId(tableName) + .tableName(tableName) .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) - .stats(stats == null ? null : stats.toModel()) .updatedAt(updatedAt) .build(); } /** Build a wire DTO from the internal-model counterpart. */ - public static TableStatsDto fromModel(Table t) { - if (t == null) { + public static TableStatsDto fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + if (m == null) { return null; } return TableStatsDto.builder() - .tableUuid(t.getTableUuid()) - .databaseName(t.getDatabaseName()) - .tableName(t.getTableId()) - .stats(TableStats.fromModel(t.getStats())) - .tableProperties(t.getTableProperties()) - .updatedAt(t.getUpdatedAt()) + .tableUuid(m.getTableUuid()) + .databaseName(m.getDatabaseName()) + .tableName(m.getTableName()) + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.fromModel(m.getSnapshot())) + .delta(TableStats.CommitDelta.fromModel(m.getDelta())) + .build()) + .tableProperties(m.getTableProperties()) + .updatedAt(m.getUpdatedAt()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 13476543f..08b42050f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.Table; import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; @@ -33,17 +32,19 @@ public class UpsertTableStatsRequest { private Map tableProperties; /** - * Build the internal-model {@link Table} described by this request. {@code tableUuid} comes from - * the URL path, not the body. {@link Table#getUpdatedAt()} is left {@code null}; the service - * stamps it server-side at write time. + * Build the internal-model {@link com.linkedin.openhouse.optimizer.model.TableStats} described by + * this request. {@code tableUuid} comes from the URL path, not the body. {@code updatedAt} is + * left {@code null}; the service stamps it server-side at write time. */ - public Table toModel(String tableUuid) { - return Table.builder() + public com.linkedin.openhouse.optimizer.model.TableStats toModel(String tableUuid) { + com.linkedin.openhouse.optimizer.model.TableStats payload = + stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + return payload + .toBuilder() .tableUuid(tableUuid) .databaseName(databaseName) - .tableId(tableName) + .tableName(tableName) .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) - .stats(stats == null ? null : stats.toModel()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 212390af9..eb11c9d25 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -1,12 +1,24 @@ package com.linkedin.openhouse.optimizer.model; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.time.Instant; +import java.util.Collections; +import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; -/** Combined stats payload stored as a single JSON blob per table. */ +/** + * Self-describing per-table stats record. Carries the table's identity and metadata alongside the + * snapshot + delta payload so consumers don't need an outer wrapper to know which table the stats + * belong to. + * + *

Identity ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}) and metadata ({@link + * #tableProperties}, {@link #updatedAt}) are populated when read from a current-state row. When + * this record is built from a per-commit history row, {@link #delta} is populated and {@link + * #tableProperties} / {@link #updatedAt} are typically {@code null}. + */ @Data @Builder(toBuilder = true) @NoArgsConstructor @@ -14,12 +26,27 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class TableStats { + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ + private String tableUuid; + + /** Database the table lives in. */ + private String databaseName; + + /** Iceberg table name (the human-readable identifier, not the UUID). */ + private String tableName; + + /** Current table-property map (e.g. maintenance opt-in flags). Never null. */ + @Builder.Default private Map tableProperties = Collections.emptyMap(); + /** Snapshot fields — overwritten on every upsert. */ private SnapshotMetrics snapshot; - /** Delta fields — accumulated across commit events. */ + /** Delta fields — accumulated across commit events. Null when read from a current-state row. */ private CommitDelta delta; + /** When the current snapshot was last written. Stamped server-side on every upsert. */ + private Instant updatedAt; + /** Project to the DB-layer {@link com.linkedin.openhouse.optimizer.db.SnapshotMetrics} object. */ public com.linkedin.openhouse.optimizer.db.SnapshotMetrics toSnapshotRow() { return snapshot == null ? null : snapshot.toDb(); From a6045b5534b39b14299b4eb36a61c3f872ef3ab6 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:06:06 -0700 Subject: [PATCH 43/55] =?UTF-8?q?feat(optimizer):=20add=20TableStats?= =?UTF-8?q?=E2=86=94TableStatsRow=20conversion=20on=20model?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TableStats.toRow() / fromRow() let the service operate purely on the self-describing model.TableStats type instead of going through Table. Existing toSnapshotRow / toDeltaRow / fromRows helpers are preserved for the history path. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/optimizer/model/TableStats.java | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index eb11c9d25..847f5a00e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -47,6 +47,38 @@ public class TableStats { /** When the current snapshot was last written. Stamped server-side on every upsert. */ private Instant updatedAt; + /** + * Project to the current-state {@code table_stats} row. Snapshot only; deltas live on history. + */ + public com.linkedin.openhouse.optimizer.db.TableStatsRow toRow() { + return com.linkedin.openhouse.optimizer.db.TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .snapshot(snapshot == null ? null : snapshot.toDb()) + .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) + .updatedAt(updatedAt) + .build(); + } + + /** + * Build a {@link TableStats} from a current-state DB row. {@link #delta} is left {@code null}. + */ + public static TableStats fromRow(com.linkedin.openhouse.optimizer.db.TableStatsRow row) { + if (row == null) { + return null; + } + return TableStats.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + .snapshot(SnapshotMetrics.fromDb(row.getSnapshot())) + .updatedAt(row.getUpdatedAt()) + .build(); + } + /** Project to the DB-layer {@link com.linkedin.openhouse.optimizer.db.SnapshotMetrics} object. */ public com.linkedin.openhouse.optimizer.db.SnapshotMetrics toSnapshotRow() { return snapshot == null ? null : snapshot.toDb(); From 3aebf64b743fb88b2d92a7d623ed70b5dbdee981 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:32:00 -0700 Subject: [PATCH 44/55] chore(optimizer): enable toBuilder on model.Table and model.TableOperationsHistory Moved down from opt-2. The service-layer code (opt-2) uses .toBuilder() on both types; the lombok annotation that enables it belongs on the PR that owns model/. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../main/java/com/linkedin/openhouse/optimizer/model/Table.java | 2 +- .../openhouse/optimizer/model/TableOperationsHistory.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index bca7e2420..089a52982 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -17,7 +17,7 @@ * com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper#toTable} at the DB boundary. */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class Table { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index fe5bee5f7..c8950ee26 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -13,7 +13,7 @@ * components that need to reason about completed operations (e.g., scheduling-cadence analyzers). */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class TableOperationsHistory { From bf30f86e18a8f53f185b2c00fb4b0880847a976d Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:33:19 -0700 Subject: [PATCH 45/55] chore(optimizer): cascade toBuilder annotations from opt-0 to opt-1 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../main/java/com/linkedin/openhouse/optimizer/model/Table.java | 2 +- .../openhouse/optimizer/model/TableOperationsHistory.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index 659dd18da..149128f44 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -18,7 +18,7 @@ * types know nothing about model/ or api/. */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class Table { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index 42a48479a..8cbfb6ff7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -14,7 +14,7 @@ * components that need to reason about completed operations (e.g., scheduling-cadence analyzers). */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class TableOperationsHistory { From b6c7f42774a61214cdabe6d01384b89c685cda35 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 10:32:28 -0700 Subject: [PATCH 46/55] refactor(optimizer): drop fileCount enrichment from model.TableOperation TableOperation becomes a pure operation record. Consumers (scheduler) look up TableStats at the point they need it, rather than carrying enrichment data on the model type. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/optimizer/model/TableOperation.java | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java index 1f14dddff..fe91c38d0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -15,10 +15,6 @@ *

Pure internal-model type — no references to wire-API or DB types. Cross-layer construction * happens via {@link com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper} (DB boundary) or * {@link com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper} (API boundary). - * - *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., - * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not - * carry it. */ @Data @Builder @@ -50,12 +46,6 @@ public class TableOperation { /** When the scheduler last submitted a job for this operation. */ private Instant scheduledAt; - /** - * Number of current data files on the table at evaluation time. Non-persisted enrichment; - * populated by consumers that need it. Null when not enriched. - */ - private Long fileCount; - /** Create a new PENDING operation for the given table and operation type. */ public static TableOperation pending(Table table, OperationType operationType) { return TableOperation.builder() From 2b06c92e0cb3f5eaf0ab8f205dcb141eb9c47650 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 14:44:35 -0700 Subject: [PATCH 47/55] feat(repo): add findClaimedIds for transactional batch-claim verification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit markSchedulingBatch returns only a count of rows transitioned; callers that need to know *which* rows they own must re-query. findClaimedIds takes the same id list + scheduledAt watermark passed to the UPDATE and returns the subset whose SCHEDULING transition matches that watermark — i.e. the rows this caller actually claimed in this call. Used by the scheduler to subset its bin to actually-claimed operations before submitting the Spark job; without this the scheduler can launch a job for ids another instance already owns and then incorrectly mark all of them SCHEDULED. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../repository/TableOperationsRepository.java | 19 +++++- .../TableOperationsRepositoryTest.java | 58 +++++++++++++++++++ 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 8baddfe42..513006bf6 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -33,8 +33,9 @@ List find( /** * Batch CAS: PENDING → SCHEDULING for every {@code id} still in PENDING. Returns the number of - * rows transitioned. Rows already claimed by another instance are skipped silently; callers must - * re-query if they need the precise list. + * rows transitioned. Rows already claimed by another instance are skipped silently; pair this + * call with {@link #findClaimedIds(List, Instant)} (using the same {@code scheduledAt}) to get + * the precise list of rows this caller now owns. */ @Modifying @Query( @@ -46,6 +47,20 @@ List find( int markSchedulingBatch( @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); + /** + * Return the subset of {@code ids} that are currently {@code SCHEDULING} with the given {@code + * scheduledAt} watermark. Used after {@link #markSchedulingBatch(List, Instant)} to determine + * which rows this caller actually claimed (vs. rows another instance owns or rows that no longer + * exist). + */ + @Query( + "SELECT r.id FROM TableOperationsRow r " + + "WHERE r.id IN :ids " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING " + + "AND r.scheduledAt = :scheduledAt") + List findClaimedIds( + @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); + /** * Batch CAS: SCHEDULING → SCHEDULED with the given {@code jobId} for every {@code id} still in * SCHEDULING. Returns the number of rows transitioned. diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index 44a03ba9e..bfe3fc437 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -105,6 +105,64 @@ void find_byStatus() { assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); } + @Test + void findClaimedIds_returnsOnlyClaimedSubset() { + String idA = UUID.randomUUID().toString(); + String idB = UUID.randomUUID().toString(); + String idC = UUID.randomUUID().toString(); + repository.save(pending(idA)); + repository.save(pending(idB)); + // idC is already SCHEDULING with a different scheduledAt — must NOT appear. + repository.save( + TableOperationsRow.builder() + .id(idC) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl_c") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULING) + .createdAt(Instant.now()) + .scheduledAt(Instant.now().minusSeconds(60)) + .build()); + + Instant now = Instant.now(); + repository.markSchedulingBatch(List.of(idA, idB, idC), now); + + List claimed = repository.findClaimedIds(List.of(idA, idB, idC), now); + assertThat(claimed).containsExactlyInAnyOrder(idA, idB); + } + + @Test + void findClaimedIds_emptyWhenNothingClaimed() { + String id = UUID.randomUUID().toString(); + repository.save( + TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl_x") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .scheduledAt(Instant.now()) + .build()); + + List claimed = repository.findClaimedIds(List.of(id), Instant.now()); + assertThat(claimed).isEmpty(); + } + + private TableOperationsRow pending(String id) { + return TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl_" + id) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + } + @Test void find_byDatabaseAndTable() { repository.save( From 437a0ed84a2fa7a53ea827b241404f60d20ac230 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 19 May 2026 13:35:27 -0700 Subject: [PATCH 48/55] refactor(optimizer): add Dto suffix to all api/model classes (PR #527 review) Per @abhisheknath2011 review comment 3262776356: > "We could change all the internal model add Dto suffix something like > TableOperationsDto. This aligns with the existing services codebase." Renames (suffix added): - CompleteOperationRequest -> CompleteOperationRequestDto - UpsertTableStatsRequest -> UpsertTableStatsRequestDto - OperationType (enum) -> OperationTypeDto - OperationStatus (enum) -> OperationStatusDto - HistoryStatus (enum) -> HistoryStatusDto - TableStats (inner payload) -> TableStatsPayloadDto - TableStats.SnapshotMetrics -> TableStatsPayloadDto.SnapshotMetricsDto - TableStats.CommitDelta -> TableStatsPayloadDto.CommitDeltaDto Cross-reference updates inside api/model. Internal model layer (services/optimizer/.../model/) is intentionally unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- ....java => CompleteOperationRequestDto.java} | 6 ++--- ...storyStatus.java => HistoryStatusDto.java} | 6 ++--- ...ionStatus.java => OperationStatusDto.java} | 6 ++--- ...erationType.java => OperationTypeDto.java} | 6 ++--- .../api/model/TableOperationsDto.java | 8 +++--- .../api/model/TableOperationsHistoryDto.java | 8 +++--- .../optimizer/api/model/TableStatsDto.java | 8 +++--- .../api/model/TableStatsHistoryDto.java | 4 +-- ...leStats.java => TableStatsPayloadDto.java} | 27 ++++++++++--------- ...t.java => UpsertTableStatsRequestDto.java} | 4 +-- 10 files changed, 42 insertions(+), 41 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{CompleteOperationRequest.java => CompleteOperationRequestDto.java} (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{HistoryStatus.java => HistoryStatusDto.java} (73%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationStatus.java => OperationStatusDto.java} (87%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationType.java => OperationTypeDto.java} (72%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{TableStats.java => TableStatsPayloadDto.java} (86%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{UpsertTableStatsRequest.java => UpsertTableStatsRequestDto.java} (95%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java index 0add634b5..0db7a8a37 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java @@ -25,13 +25,13 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class CompleteOperationRequest { +public class CompleteOperationRequestDto { /** Operation row's UUID — the primary lookup key. */ private String operationId; /** Terminal outcome for this single operation. */ - private HistoryStatus status; + private HistoryStatusDto status; /** Debug echo: stable table identity the caller believed it was completing. */ private String tableUuid; @@ -43,5 +43,5 @@ public class CompleteOperationRequest { private String tableName; /** Debug echo: operation type. */ - private OperationType operationType; + private OperationTypeDto operationType; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java similarity index 73% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java index 0c9ff95da..5a4421332 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Terminal states for a completed Spark maintenance job. */ -public enum HistoryStatus { +public enum HistoryStatusDto { /** The Spark job for this operation completed successfully. */ SUCCESS, @@ -15,7 +15,7 @@ public com.linkedin.openhouse.optimizer.model.HistoryStatus toModel() { } /** Build the api-layer enum from the internal-model counterpart. */ - public static HistoryStatus fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { - return v == null ? null : HistoryStatus.valueOf(v.name()); + public static HistoryStatusDto fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { + return v == null ? null : HistoryStatusDto.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java similarity index 87% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java index 300c28263..89fa9f1b0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Lifecycle states for a table operation recommendation. */ -public enum OperationStatus { +public enum OperationStatusDto { /** Recommended by the Analyzer but not yet claimed by the Scheduler. */ PENDING, @@ -25,8 +25,8 @@ public com.linkedin.openhouse.optimizer.model.OperationStatus toModel() { } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationStatus fromModel( + public static OperationStatusDto fromModel( com.linkedin.openhouse.optimizer.model.OperationStatus v) { - return v == null ? null : OperationStatus.valueOf(v.name()); + return v == null ? null : OperationStatusDto.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java similarity index 72% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java index 5f325e712..210010eb0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Maintenance operation types supported by the continuous optimizer. */ -public enum OperationType { +public enum OperationTypeDto { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION; @@ -11,7 +11,7 @@ public com.linkedin.openhouse.optimizer.model.OperationType toModel() { } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationType fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { - return v == null ? null : OperationType.valueOf(v.name()); + public static OperationTypeDto fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { + return v == null ? null : OperationTypeDto.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index db8ef1039..880fe7926 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -27,10 +27,10 @@ public class TableOperationsDto { private String tableName; /** The type of maintenance operation (e.g. ORPHAN_FILES_DELETION). */ - private OperationType operationType; + private OperationTypeDto operationType; /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ - private OperationStatus status; + private OperationStatusDto status; /** Server-set when the row is first created by the Analyzer. */ private Instant createdAt; @@ -65,8 +65,8 @@ public static TableOperationsDto fromModel(TableOperation op) { .tableUuid(op.getTableUuid()) .databaseName(op.getDatabaseName()) .tableName(op.getTableName()) - .operationType(OperationType.fromModel(op.getOperationType())) - .status(OperationStatus.fromModel(op.getStatus())) + .operationType(OperationTypeDto.fromModel(op.getOperationType())) + .status(OperationStatusDto.fromModel(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) .build(); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 935435040..652a58b3f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -27,13 +27,13 @@ public class TableOperationsHistoryDto { private String tableName; /** The type of maintenance operation this history row records. */ - private OperationType operationType; + private OperationTypeDto operationType; /** When the operation completed, as recorded by the complete endpoint. */ private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private HistoryStatus status; + private HistoryStatusDto status; /** Convert to the internal-model counterpart. */ public TableOperationsHistory toModel() { @@ -58,9 +58,9 @@ public static TableOperationsHistoryDto fromModel(TableOperationsHistory h) { .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .operationType(OperationType.fromModel(h.getOperationType())) + .operationType(OperationTypeDto.fromModel(h.getOperationType())) .completedAt(h.getCompletedAt()) - .status(HistoryStatus.fromModel(h.getStatus())) + .status(HistoryStatusDto.fromModel(h.getStatus())) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 244050b04..6852081ab 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -25,7 +25,7 @@ public class TableStatsDto { private String tableName; /** Combined snapshot + delta stats payload, stored as JSON. */ - private TableStats stats; + private TableStatsPayloadDto stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; @@ -57,9 +57,9 @@ public static TableStatsDto fromModel(com.linkedin.openhouse.optimizer.model.Tab .databaseName(m.getDatabaseName()) .tableName(m.getTableName()) .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.fromModel(m.getSnapshot())) - .delta(TableStats.CommitDelta.fromModel(m.getDelta())) + TableStatsPayloadDto.builder() + .snapshot(TableStatsPayloadDto.SnapshotMetricsDto.fromModel(m.getSnapshot())) + .delta(TableStatsPayloadDto.CommitDeltaDto.fromModel(m.getDelta())) .build()) .tableProperties(m.getTableProperties()) .updatedAt(m.getUpdatedAt()) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index b5f971bbf..bac3782ff 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -27,7 +27,7 @@ public class TableStatsHistoryDto { private String tableName; /** Snapshot + delta stats from this commit event. */ - private TableStats stats; + private TableStatsPayloadDto stats; /** When this history row was recorded. */ private Instant recordedAt; @@ -54,7 +54,7 @@ public static TableStatsHistoryDto fromModel(TableStatsHistory h) { .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .stats(TableStats.fromModel(h.getStats())) + .stats(TableStatsPayloadDto.fromModel(h.getStats())) .recordedAt(h.getRecordedAt()) .build(); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java similarity index 86% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java index c75d21d75..692cb7247 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java @@ -17,13 +17,13 @@ @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { +public class TableStatsPayloadDto { /** Snapshot fields — overwritten on every upsert. */ - private SnapshotMetrics snapshot; + private SnapshotMetricsDto snapshot; /** Delta fields — accumulated across commit events. */ - private CommitDelta delta; + private CommitDeltaDto delta; /** Convert to the internal-model counterpart. */ public com.linkedin.openhouse.optimizer.model.TableStats toModel() { @@ -34,13 +34,14 @@ public com.linkedin.openhouse.optimizer.model.TableStats toModel() { } /** Build the api-layer payload from the internal-model counterpart. */ - public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + public static TableStatsPayloadDto fromModel( + com.linkedin.openhouse.optimizer.model.TableStats m) { if (m == null) { return null; } - return TableStats.builder() - .snapshot(SnapshotMetrics.fromModel(m.getSnapshot())) - .delta(CommitDelta.fromModel(m.getDelta())) + return TableStatsPayloadDto.builder() + .snapshot(SnapshotMetricsDto.fromModel(m.getSnapshot())) + .delta(CommitDeltaDto.fromModel(m.getDelta())) .build(); } @@ -50,7 +51,7 @@ public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableS @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) - public static class SnapshotMetrics { + public static class SnapshotMetricsDto { /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; @@ -75,12 +76,12 @@ public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel } /** Build the api-layer inner object from the internal-model counterpart. */ - public static SnapshotMetrics fromModel( + public static SnapshotMetricsDto fromModel( com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics m) { if (m == null) { return null; } - return SnapshotMetrics.builder() + return SnapshotMetricsDto.builder() .tableVersion(m.getTableVersion()) .tableLocation(m.getTableLocation()) .tableSizeBytes(m.getTableSizeBytes()) @@ -95,7 +96,7 @@ public static SnapshotMetrics fromModel( @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) - public static class CommitDelta { + public static class CommitDeltaDto { /** Number of data files this commit added to the table. */ private Long numFilesAdded; @@ -120,12 +121,12 @@ public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { } /** Build the api-layer inner object from the internal-model counterpart. */ - public static CommitDelta fromModel( + public static CommitDeltaDto fromModel( com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta m) { if (m == null) { return null; } - return CommitDelta.builder() + return CommitDeltaDto.builder() .numFilesAdded(m.getNumFilesAdded()) .numFilesDeleted(m.getNumFilesDeleted()) .addedSizeBytes(m.getAddedSizeBytes()) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java similarity index 95% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java index 08b42050f..75753fa69 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java @@ -17,7 +17,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class UpsertTableStatsRequest { +public class UpsertTableStatsRequestDto { /** Denormalized database name for display. */ private String databaseName; @@ -26,7 +26,7 @@ public class UpsertTableStatsRequest { private String tableName; /** Combined snapshot + delta stats payload from this commit. */ - private TableStats stats; + private TableStatsPayloadDto stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; From 4f98c228b6ea661291fb924ed870d41e82757159 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 19 May 2026 13:56:57 -0700 Subject: [PATCH 49/55] refactor(optimizer): rename api.model package to api.spec (PR #527 review) Per @abhisheknath2011 review comment 3262769497: > "Can we change the client side API to api.spec instead of api.model? > This also aligns with existing services." Mechanical package rename. The 10 api wire types move from services/optimizer/.../api/model/ to services/optimizer/.../api/spec/. No type or signature changes. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/{model => spec}/CompleteOperationRequestDto.java | 2 +- .../optimizer/api/{model => spec}/HistoryStatusDto.java | 2 +- .../optimizer/api/{model => spec}/OperationStatusDto.java | 2 +- .../optimizer/api/{model => spec}/OperationTypeDto.java | 2 +- .../optimizer/api/{model => spec}/TableOperationsDto.java | 2 +- .../api/{model => spec}/TableOperationsHistoryDto.java | 2 +- .../openhouse/optimizer/api/{model => spec}/TableStatsDto.java | 2 +- .../optimizer/api/{model => spec}/TableStatsHistoryDto.java | 2 +- .../optimizer/api/{model => spec}/TableStatsPayloadDto.java | 2 +- .../api/{model => spec}/UpsertTableStatsRequestDto.java | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/CompleteOperationRequestDto.java (96%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/HistoryStatusDto.java (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/OperationStatusDto.java (95%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/OperationTypeDto.java (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableOperationsDto.java (97%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableOperationsHistoryDto.java (97%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableStatsDto.java (97%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableStatsHistoryDto.java (96%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableStatsPayloadDto.java (98%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/UpsertTableStatsRequestDto.java (96%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java index 0db7a8a37..9dca54a8e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java index 5a4421332..034be4cf2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; /** Terminal states for a completed Spark maintenance job. */ public enum HistoryStatusDto { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java similarity index 95% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java index 89fa9f1b0..f02ee2815 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; /** Lifecycle states for a table operation recommendation. */ public enum OperationStatusDto { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java index 210010eb0..4e057b232 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; /** Maintenance operation types supported by the continuous optimizer. */ public enum OperationTypeDto { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java similarity index 97% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java index 880fe7926..496f59f42 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.linkedin.openhouse.optimizer.model.TableOperation; import java.time.Instant; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java similarity index 97% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java index 652a58b3f..8b508bf36 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.time.Instant; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java similarity index 97% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java index 6852081ab..165ae47dc 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import java.time.Instant; import java.util.Collections; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java index bac3782ff..9e7c44c56 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import java.time.Instant; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java similarity index 98% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java index 692cb7247..761471f91 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java index 75753fa69..3e1fe4764 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import java.util.Collections; import java.util.Map; From b31decf8a6cb93351ce5fd153b2740f1ea0329e3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 14:51:19 -0700 Subject: [PATCH 50/55] refactor(optimizer): move Dto suffix from api/spec to model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reversal of an earlier inconsistency surfaced by abhisheknath2011 in the PR #527 review thread on api/spec/HistoryStatusDto.java. The api wire types are the canonical contract; they should carry the canonical name. The internal-model types are transfer objects between layers and now carry the Dto suffix. api/spec/ — Dto stripped from class + filename (10 files): CompleteOperationRequestDto -> CompleteOperationRequest HistoryStatusDto -> HistoryStatus OperationStatusDto -> OperationStatus OperationTypeDto -> OperationType TableOperationsDto -> TableOperations TableOperationsHistoryDto -> TableOperationsHistory TableStatsDto -> TableStats TableStatsHistoryDto -> TableStatsHistory TableStatsPayloadDto -> TableStatsPayload UpsertTableStatsRequestDto -> UpsertTableStatsRequest model/ — Dto added to class + filename (8 files): HistoryStatus -> HistoryStatusDto OperationStatus -> OperationStatusDto OperationType -> OperationTypeDto Table -> TableDto TableOperation -> TableOperationDto TableOperationsHistory -> TableOperationsHistoryDto TableStats -> TableStatsDto TableStatsHistory -> TableStatsHistoryDto Both renames land on opt-0 because opt-0 owns api/spec/ and model/. Cascade up the stack in follow-up commits. Out of scope here: HistoryStatus enum value additions (CANCELED, QUEUED) also raised in the same review thread; separate semantic change. Co-Authored-By: Claude Opus 4.7 --- ...Dto.java => CompleteOperationRequest.java} | 6 ++--- ...storyStatusDto.java => HistoryStatus.java} | 10 ++++---- ...ionStatusDto.java => OperationStatus.java} | 12 +++++----- ...erationTypeDto.java => OperationType.java} | 10 ++++---- ...perationsDto.java => TableOperations.java} | 20 ++++++++-------- ...ryDto.java => TableOperationsHistory.java} | 20 ++++++++-------- .../{TableStatsDto.java => TableStats.java} | 22 +++++++++-------- ...HistoryDto.java => TableStatsHistory.java} | 16 ++++++------- ...PayloadDto.java => TableStatsPayload.java} | 24 +++++++++---------- ...tDto.java => UpsertTableStatsRequest.java} | 16 +++++++------ ...storyStatus.java => HistoryStatusDto.java} | 2 +- ...ionStatus.java => OperationStatusDto.java} | 2 +- ...erationType.java => OperationTypeDto.java} | 2 +- .../model/{Table.java => TableDto.java} | 6 ++--- ...eOperation.java => TableOperationDto.java} | 16 ++++++------- ...ry.java => TableOperationsHistoryDto.java} | 8 +++---- .../{TableStats.java => TableStatsDto.java} | 2 +- ...History.java => TableStatsHistoryDto.java} | 4 ++-- 18 files changed, 101 insertions(+), 97 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{CompleteOperationRequestDto.java => CompleteOperationRequest.java} (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{HistoryStatusDto.java => HistoryStatus.java} (52%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{OperationStatusDto.java => OperationStatus.java} (73%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{OperationTypeDto.java => OperationType.java} (50%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableOperationsDto.java => TableOperations.java} (80%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableOperationsHistoryDto.java => TableOperationsHistory.java} (82%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableStatsDto.java => TableStats.java} (70%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableStatsHistoryDto.java => TableStatsHistory.java} (82%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableStatsPayloadDto.java => TableStatsPayload.java} (81%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{UpsertTableStatsRequestDto.java => UpsertTableStatsRequest.java} (71%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{HistoryStatus.java => HistoryStatusDto.java} (93%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{OperationStatus.java => OperationStatusDto.java} (95%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{OperationType.java => OperationTypeDto.java} (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{Table.java => TableDto.java} (93%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableOperation.java => TableOperationDto.java} (80%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableOperationsHistory.java => TableOperationsHistoryDto.java} (82%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableStats.java => TableStatsDto.java} (99%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableStatsHistory.java => TableStatsHistoryDto.java} (94%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java index 9dca54a8e..15112882d 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java @@ -25,13 +25,13 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class CompleteOperationRequestDto { +public class CompleteOperationRequest { /** Operation row's UUID — the primary lookup key. */ private String operationId; /** Terminal outcome for this single operation. */ - private HistoryStatusDto status; + private HistoryStatus status; /** Debug echo: stable table identity the caller believed it was completing. */ private String tableUuid; @@ -43,5 +43,5 @@ public class CompleteOperationRequestDto { private String tableName; /** Debug echo: operation type. */ - private OperationTypeDto operationType; + private OperationType operationType; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatus.java similarity index 52% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatus.java index 034be4cf2..1d799818f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.spec; /** Terminal states for a completed Spark maintenance job. */ -public enum HistoryStatusDto { +public enum HistoryStatus { /** The Spark job for this operation completed successfully. */ SUCCESS, @@ -10,12 +10,12 @@ public enum HistoryStatusDto { FAILED; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.HistoryStatus toModel() { - return com.linkedin.openhouse.optimizer.model.HistoryStatus.valueOf(name()); + public com.linkedin.openhouse.optimizer.model.HistoryStatusDto toModel() { + return com.linkedin.openhouse.optimizer.model.HistoryStatusDto.valueOf(name()); } /** Build the api-layer enum from the internal-model counterpart. */ - public static HistoryStatusDto fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { - return v == null ? null : HistoryStatusDto.valueOf(v.name()); + public static HistoryStatus fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatusDto v) { + return v == null ? null : HistoryStatus.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatus.java similarity index 73% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatus.java index f02ee2815..b1cbe42b0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.spec; /** Lifecycle states for a table operation recommendation. */ -public enum OperationStatusDto { +public enum OperationStatus { /** Recommended by the Analyzer but not yet claimed by the Scheduler. */ PENDING, @@ -20,13 +20,13 @@ public enum OperationStatusDto { CANCELED; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.OperationStatus toModel() { - return com.linkedin.openhouse.optimizer.model.OperationStatus.valueOf(name()); + public com.linkedin.openhouse.optimizer.model.OperationStatusDto toModel() { + return com.linkedin.openhouse.optimizer.model.OperationStatusDto.valueOf(name()); } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationStatusDto fromModel( - com.linkedin.openhouse.optimizer.model.OperationStatus v) { - return v == null ? null : OperationStatusDto.valueOf(v.name()); + public static OperationStatus fromModel( + com.linkedin.openhouse.optimizer.model.OperationStatusDto v) { + return v == null ? null : OperationStatus.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationType.java similarity index 50% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationType.java index 4e057b232..ea6d2797c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationType.java @@ -1,17 +1,17 @@ package com.linkedin.openhouse.optimizer.api.spec; /** Maintenance operation types supported by the continuous optimizer. */ -public enum OperationTypeDto { +public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.OperationType toModel() { - return com.linkedin.openhouse.optimizer.model.OperationType.valueOf(name()); + public com.linkedin.openhouse.optimizer.model.OperationTypeDto toModel() { + return com.linkedin.openhouse.optimizer.model.OperationTypeDto.valueOf(name()); } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationTypeDto fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { - return v == null ? null : OperationTypeDto.valueOf(v.name()); + public static OperationType fromModel(com.linkedin.openhouse.optimizer.model.OperationTypeDto v) { + return v == null ? null : OperationType.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java similarity index 80% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java index 496f59f42..60f2c3dd8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.spec; -import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -12,7 +12,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperationsDto { +public class TableOperations { /** Client-generated UUID identifying this specific operation recommendation. */ private String id; @@ -27,10 +27,10 @@ public class TableOperationsDto { private String tableName; /** The type of maintenance operation (e.g. ORPHAN_FILES_DELETION). */ - private OperationTypeDto operationType; + private OperationType operationType; /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ - private OperationStatusDto status; + private OperationStatus status; /** Server-set when the row is first created by the Analyzer. */ private Instant createdAt; @@ -42,8 +42,8 @@ public class TableOperationsDto { private String jobId; /** Convert to the internal-model counterpart. */ - public TableOperation toModel() { - return TableOperation.builder() + public TableOperationDto toModel() { + return TableOperationDto.builder() .id(id) .tableUuid(tableUuid) .databaseName(databaseName) @@ -56,17 +56,17 @@ public TableOperation toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableOperationsDto fromModel(TableOperation op) { + public static TableOperations fromModel(TableOperationDto op) { if (op == null) { return null; } - return TableOperationsDto.builder() + return TableOperations.builder() .id(op.getId()) .tableUuid(op.getTableUuid()) .databaseName(op.getDatabaseName()) .tableName(op.getTableName()) - .operationType(OperationTypeDto.fromModel(op.getOperationType())) - .status(OperationStatusDto.fromModel(op.getStatus())) + .operationType(OperationType.fromModel(op.getOperationType())) + .status(OperationStatus.fromModel(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) .build(); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistory.java similarity index 82% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistory.java index 8b508bf36..7a000f840 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistory.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.spec; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -12,7 +12,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperationsHistoryDto { +public class TableOperationsHistory { /** Same UUID as the originating {@code table_operations.id}; supplied by the caller. */ private String id; @@ -27,17 +27,17 @@ public class TableOperationsHistoryDto { private String tableName; /** The type of maintenance operation this history row records. */ - private OperationTypeDto operationType; + private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private HistoryStatusDto status; + private HistoryStatus status; /** Convert to the internal-model counterpart. */ - public TableOperationsHistory toModel() { - return TableOperationsHistory.builder() + public TableOperationsHistoryDto toModel() { + return TableOperationsHistoryDto.builder() .id(id) .tableUuid(tableUuid) .databaseName(databaseName) @@ -49,18 +49,18 @@ public TableOperationsHistory toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableOperationsHistoryDto fromModel(TableOperationsHistory h) { + public static TableOperationsHistory fromModel(TableOperationsHistoryDto h) { if (h == null) { return null; } - return TableOperationsHistoryDto.builder() + return TableOperationsHistory.builder() .id(h.getId()) .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .operationType(OperationTypeDto.fromModel(h.getOperationType())) + .operationType(OperationType.fromModel(h.getOperationType())) .completedAt(h.getCompletedAt()) - .status(HistoryStatusDto.fromModel(h.getStatus())) + .status(HistoryStatus.fromModel(h.getStatus())) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStats.java similarity index 70% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStats.java index 165ae47dc..41f44f763 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStats.java @@ -13,7 +13,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableStatsDto { +public class TableStats { /** Stable Iceberg table UUID. Primary key of the stats row. */ private String tableUuid; @@ -25,7 +25,7 @@ public class TableStatsDto { private String tableName; /** Combined snapshot + delta stats payload, stored as JSON. */ - private TableStatsPayloadDto stats; + private TableStatsPayload stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; @@ -34,9 +34,11 @@ public class TableStatsDto { private Instant updatedAt; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats toModel() { - com.linkedin.openhouse.optimizer.model.TableStats payload = - stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + public com.linkedin.openhouse.optimizer.model.TableStatsDto toModel() { + com.linkedin.openhouse.optimizer.model.TableStatsDto payload = + stats == null + ? new com.linkedin.openhouse.optimizer.model.TableStatsDto() + : stats.toModel(); return payload .toBuilder() .tableUuid(tableUuid) @@ -48,18 +50,18 @@ public com.linkedin.openhouse.optimizer.model.TableStats toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableStatsDto fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableStatsDto m) { if (m == null) { return null; } - return TableStatsDto.builder() + return TableStats.builder() .tableUuid(m.getTableUuid()) .databaseName(m.getDatabaseName()) .tableName(m.getTableName()) .stats( - TableStatsPayloadDto.builder() - .snapshot(TableStatsPayloadDto.SnapshotMetricsDto.fromModel(m.getSnapshot())) - .delta(TableStatsPayloadDto.CommitDeltaDto.fromModel(m.getDelta())) + TableStatsPayload.builder() + .snapshot(TableStatsPayload.SnapshotMetricsDto.fromModel(m.getSnapshot())) + .delta(TableStatsPayload.CommitDeltaDto.fromModel(m.getDelta())) .build()) .tableProperties(m.getTableProperties()) .updatedAt(m.getUpdatedAt()) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistory.java similarity index 82% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistory.java index 9e7c44c56..5508aca27 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistory.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.spec; -import com.linkedin.openhouse.optimizer.model.TableStatsHistory; +import com.linkedin.openhouse.optimizer.model.TableStatsHistoryDto; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -12,7 +12,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableStatsHistoryDto { +public class TableStatsHistory { /** UUID primary key set by the caller. */ private String id; @@ -27,14 +27,14 @@ public class TableStatsHistoryDto { private String tableName; /** Snapshot + delta stats from this commit event. */ - private TableStatsPayloadDto stats; + private TableStatsPayload stats; /** When this history row was recorded. */ private Instant recordedAt; /** Convert to the internal-model counterpart. */ - public TableStatsHistory toModel() { - return TableStatsHistory.builder() + public TableStatsHistoryDto toModel() { + return TableStatsHistoryDto.builder() .id(id) .tableUuid(tableUuid) .databaseName(databaseName) @@ -45,16 +45,16 @@ public TableStatsHistory toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableStatsHistoryDto fromModel(TableStatsHistory h) { + public static TableStatsHistory fromModel(TableStatsHistoryDto h) { if (h == null) { return null; } - return TableStatsHistoryDto.builder() + return TableStatsHistory.builder() .id(h.getId()) .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .stats(TableStatsPayloadDto.fromModel(h.getStats())) + .stats(TableStatsPayload.fromModel(h.getStats())) .recordedAt(h.getRecordedAt()) .build(); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayload.java similarity index 81% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayload.java index 761471f91..c347bf385 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayload.java @@ -17,7 +17,7 @@ @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) -public class TableStatsPayloadDto { +public class TableStatsPayload { /** Snapshot fields — overwritten on every upsert. */ private SnapshotMetricsDto snapshot; @@ -26,20 +26,20 @@ public class TableStatsPayloadDto { private CommitDeltaDto delta; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats toModel() { - return com.linkedin.openhouse.optimizer.model.TableStats.builder() + public com.linkedin.openhouse.optimizer.model.TableStatsDto toModel() { + return com.linkedin.openhouse.optimizer.model.TableStatsDto.builder() .snapshot(snapshot == null ? null : snapshot.toModel()) .delta(delta == null ? null : delta.toModel()) .build(); } /** Build the api-layer payload from the internal-model counterpart. */ - public static TableStatsPayloadDto fromModel( - com.linkedin.openhouse.optimizer.model.TableStats m) { + public static TableStatsPayload fromModel( + com.linkedin.openhouse.optimizer.model.TableStatsDto m) { if (m == null) { return null; } - return TableStatsPayloadDto.builder() + return TableStatsPayload.builder() .snapshot(SnapshotMetricsDto.fromModel(m.getSnapshot())) .delta(CommitDeltaDto.fromModel(m.getDelta())) .build(); @@ -66,8 +66,8 @@ public static class SnapshotMetricsDto { private Long numCurrentFiles; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel() { - return com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics.builder() + public com.linkedin.openhouse.optimizer.model.TableStatsDto.SnapshotMetrics toModel() { + return com.linkedin.openhouse.optimizer.model.TableStatsDto.SnapshotMetrics.builder() .tableVersion(tableVersion) .tableLocation(tableLocation) .tableSizeBytes(tableSizeBytes) @@ -77,7 +77,7 @@ public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel /** Build the api-layer inner object from the internal-model counterpart. */ public static SnapshotMetricsDto fromModel( - com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics m) { + com.linkedin.openhouse.optimizer.model.TableStatsDto.SnapshotMetrics m) { if (m == null) { return null; } @@ -111,8 +111,8 @@ public static class CommitDeltaDto { private Long deletedSizeBytes; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { - return com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta.builder() + public com.linkedin.openhouse.optimizer.model.TableStatsDto.CommitDelta toModel() { + return com.linkedin.openhouse.optimizer.model.TableStatsDto.CommitDelta.builder() .numFilesAdded(numFilesAdded) .numFilesDeleted(numFilesDeleted) .addedSizeBytes(addedSizeBytes) @@ -122,7 +122,7 @@ public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { /** Build the api-layer inner object from the internal-model counterpart. */ public static CommitDeltaDto fromModel( - com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta m) { + com.linkedin.openhouse.optimizer.model.TableStatsDto.CommitDelta m) { if (m == null) { return null; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java similarity index 71% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java index 3e1fe4764..d1b4a5fe2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java @@ -17,7 +17,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class UpsertTableStatsRequestDto { +public class UpsertTableStatsRequest { /** Denormalized database name for display. */ private String databaseName; @@ -26,19 +26,21 @@ public class UpsertTableStatsRequestDto { private String tableName; /** Combined snapshot + delta stats payload from this commit. */ - private TableStatsPayloadDto stats; + private TableStatsPayload stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; /** - * Build the internal-model {@link com.linkedin.openhouse.optimizer.model.TableStats} described by - * this request. {@code tableUuid} comes from the URL path, not the body. {@code updatedAt} is + * Build the internal-model {@link com.linkedin.openhouse.optimizer.model.TableStatsDto} described + * by this request. {@code tableUuid} comes from the URL path, not the body. {@code updatedAt} is * left {@code null}; the service stamps it server-side at write time. */ - public com.linkedin.openhouse.optimizer.model.TableStats toModel(String tableUuid) { - com.linkedin.openhouse.optimizer.model.TableStats payload = - stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + public com.linkedin.openhouse.optimizer.model.TableStatsDto toModel(String tableUuid) { + com.linkedin.openhouse.optimizer.model.TableStatsDto payload = + stats == null + ? new com.linkedin.openhouse.optimizer.model.TableStatsDto() + : stats.toModel(); return payload .toBuilder() .tableUuid(tableUuid) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatusDto.java similarity index 93% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatusDto.java index 97b8e2992..463c62605 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatusDto.java @@ -7,7 +7,7 @@ * *

Intentionally separate from the wire-API and DB representations. */ -public enum HistoryStatus { +public enum HistoryStatusDto { /** The operation completed successfully. */ SUCCESS, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatusDto.java similarity index 95% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatusDto.java index f284fedaf..b766f7dbe 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatusDto.java @@ -7,7 +7,7 @@ * *

Intentionally separate from the wire-API and DB representations. */ -public enum OperationStatus { +public enum OperationStatusDto { /** Analyzer has written the row; not yet claimed by the scheduler. */ PENDING, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationTypeDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationTypeDto.java index 8f4fe35a8..39b299806 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationTypeDto.java @@ -5,7 +5,7 @@ * separate from the wire-API and DB representations so the internal model can evolve its set of * supported operations without churning either boundary. */ -public enum OperationType { +public enum OperationTypeDto { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableDto.java similarity index 93% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableDto.java index 089a52982..408bc4fc7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableDto.java @@ -10,7 +10,7 @@ /** * An OpenHouse table enriched with stats and properties, built by combining data sources. Consumed - * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads + * by the analyzer (decides whether to produce a {@link TableOperationDto}) and the scheduler (reads * stats for bin-packing). * *

Pure internal-model type — no references to wire-API or DB types. Construct via {@link @@ -20,7 +20,7 @@ @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor -public class Table { +public class TableDto { /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ private String tableUuid; @@ -35,7 +35,7 @@ public class Table { @Builder.Default private Map tableProperties = Collections.emptyMap(); /** Latest snapshot stats for this table. Delta is null when read from the current-state row. */ - private TableStats stats; + private TableStatsDto stats; /** When the current snapshot was last written. Stamped server-side on every upsert. */ private Instant updatedAt; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java similarity index 80% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java index fe91c38d0..8809a1b62 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java @@ -20,7 +20,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperation { +public class TableOperationDto { /** Unique operation ID (UUID). */ private String id; @@ -35,10 +35,10 @@ public class TableOperation { private String tableName; /** Operation type. */ - private OperationType operationType; + private OperationTypeDto operationType; /** Current lifecycle status. */ - private OperationStatus status; + private OperationStatusDto status; /** When this operation record was created. */ private Instant createdAt; @@ -47,21 +47,21 @@ public class TableOperation { private Instant scheduledAt; /** Create a new PENDING operation for the given table and operation type. */ - public static TableOperation pending(Table table, OperationType operationType) { - return TableOperation.builder() + public static TableOperationDto pending(TableDto table, OperationTypeDto operationType) { + return TableOperationDto.builder() .id(UUID.randomUUID().toString()) .tableUuid(table.getTableUuid()) .databaseName(table.getDatabaseName()) .tableName(table.getTableId()) .operationType(operationType) - .status(OperationStatus.PENDING) + .status(OperationStatusDto.PENDING) .createdAt(Instant.now()) .build(); } /** Return the more recently created of two operations. */ - public static TableOperation mostRecent(TableOperation a, TableOperation b) { - Comparator byCreatedAt = + public static TableOperationDto mostRecent(TableOperationDto a, TableOperationDto b) { + Comparator byCreatedAt = Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); return byCreatedAt.compare(a, b) >= 0 ? a : b; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistoryDto.java similarity index 82% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistoryDto.java index c8950ee26..e05bb641e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistoryDto.java @@ -16,7 +16,7 @@ @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor -public class TableOperationsHistory { +public class TableOperationsHistoryDto { /** Same UUID as the originating live-operations row. */ private String id; @@ -31,11 +31,11 @@ public class TableOperationsHistory { private String tableName; /** Operation type for this completed run. */ - private OperationType operationType; + private OperationTypeDto operationType; /** When the operation completed, as recorded by the complete endpoint. */ private Instant completedAt; - /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ - private HistoryStatus status; + /** Terminal outcome: {@link HistoryStatusDto#SUCCESS} or {@link HistoryStatusDto#FAILED}. */ + private HistoryStatusDto status; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsDto.java similarity index 99% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsDto.java index 906d01669..d142dcc8b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsDto.java @@ -24,7 +24,7 @@ @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { +public class TableStatsDto { /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ private String tableUuid; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistoryDto.java similarity index 94% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistoryDto.java index 53bb54d1e..5579c95ed 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistoryDto.java @@ -18,7 +18,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableStatsHistory { +public class TableStatsHistoryDto { /** UUID primary key — set by the caller, not generated server-side. */ private String id; @@ -33,7 +33,7 @@ public class TableStatsHistory { private String tableName; /** Snapshot + delta for this commit event. */ - private TableStats stats; + private TableStatsDto stats; /** When this history row was recorded. */ private Instant recordedAt; From 4e86569ce2e4327665b0d8885276c6b2e048612a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:16:10 -0700 Subject: [PATCH 51/55] feat(optimizer): propagate jobId through model + api conversions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit model.TableOperationDto grows a jobId field; api.TableOperations conversions copy it across the api ↔ model boundary. The api DTO already had the field; the model side was missing it. Relocated from opt-5 to its proper owner per the model-layer rule. Model ↔ db plumbing for the same field lands on opt-1 in a follow-up. Co-Authored-By: Claude Opus 4.7 --- .../linkedin/openhouse/optimizer/api/spec/TableOperations.java | 2 ++ .../linkedin/openhouse/optimizer/model/TableOperationDto.java | 3 +++ 2 files changed, 5 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java index 60f2c3dd8..0bca95734 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java @@ -52,6 +52,7 @@ public TableOperationDto toModel() { .status(status == null ? null : status.toModel()) .createdAt(createdAt) .scheduledAt(scheduledAt) + .jobId(jobId) .build(); } @@ -69,6 +70,7 @@ public static TableOperations fromModel(TableOperationDto op) { .status(OperationStatus.fromModel(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) + .jobId(op.getJobId()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java index 8809a1b62..4cac14187 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java @@ -46,6 +46,9 @@ public class TableOperationDto { /** When the scheduler last submitted a job for this operation. */ private Instant scheduledAt; + /** Job ID returned by the Jobs Service after the scheduler submitted; null until SCHEDULED. */ + private String jobId; + /** Create a new PENDING operation for the given table and operation type. */ public static TableOperationDto pending(TableDto table, OperationTypeDto operationType) { return TableOperationDto.builder() From efcceeaa9d4656fe6ec2028c72a60fe7d92f59a3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:17:06 -0700 Subject: [PATCH 52/55] =?UTF-8?q?feat(optimizer):=20propagate=20jobId=20th?= =?UTF-8?q?rough=20model=20=E2=86=94=20db=20conversions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Companion to the opt-0 jobId field addition: now that model.TableOperationDto carries jobId, wire it through toRow/fromRow so the db row's job_id column round-trips through the model layer. Relocated from opt-5. Co-Authored-By: Claude Opus 4.7 --- .../linkedin/openhouse/optimizer/model/TableOperationDto.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java index c39a71ecd..18d57ce66 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java @@ -80,6 +80,7 @@ public TableOperationsRow toRow() { .status(status == null ? null : status.toDb()) .createdAt(createdAt) .scheduledAt(scheduledAt) + .jobId(jobId) .build(); } @@ -97,6 +98,7 @@ public static TableOperationDto fromRow(TableOperationsRow row) { .status(OperationStatusDto.fromDb(row.getStatus())) .createdAt(row.getCreatedAt()) .scheduledAt(row.getScheduledAt()) + .jobId(row.getJobId()) .build(); } } From 1fe71f043260d2c5b57c6556cb69ea051f5fafbe Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:25:48 -0700 Subject: [PATCH 53/55] =?UTF-8?q?refactor(optimizer):=20rename=20CompleteO?= =?UTF-8?q?perationRequest=20=E2=86=92=20UpdateOperationRequest?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symbol rename only. The HistoryStatus enum (SUCCESS/FAILED) and the once-terminal semantics are unchanged; the endpoint's behavior is the same. Future broadening (CANCELED/QUEUED, idempotency, mid-lifecycle status changes) is a separate concern. Method names + URL path will follow on opt-2; Spark-app caller + docs follow on opt-5. Co-Authored-By: Claude Opus 4.7 --- ...nRequest.java => UpdateOperationRequest.java} | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{CompleteOperationRequest.java => UpdateOperationRequest.java} (70%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java similarity index 70% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java index 15112882d..a216e9db3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java @@ -6,26 +6,26 @@ import lombok.NoArgsConstructor; /** - * Request body for {@code POST /v1/table-operations/complete}. + * Request body for {@code POST /v1/table-operations/update}. * - *

Reports the outcome of a single completed operation. The service looks up the operation row by + *

Reports the outcome of a single operation update. The service looks up the operation row by * {@link #operationId} and writes a history entry for it. * *

A single Spark job typically processes N tables and yields N independent (status) outcomes — - * one per operation. Callers issue one complete request per operation; the service does not - * bulk-complete by job. + * one per operation. Callers issue one update request per operation; the service does not + * bulk-update by job. * *

The remaining fields ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}, {@link * #operationType}) are debug-only echo information. The server does not key off them; they are - * preserved on log lines and traces so an operator looking at a failing complete call can see which - * (db, table, operation) the caller believed it was completing without joining back to the - * operation row. + * preserved on log lines and traces so an operator looking at a failing update call can see which + * (db, table, operation) the caller believed it was updating without joining back to the operation + * row. */ @Data @Builder @NoArgsConstructor @AllArgsConstructor -public class CompleteOperationRequest { +public class UpdateOperationRequest { /** Operation row's UUID — the primary lookup key. */ private String operationId; From d65b511d472ff27f08ad12e86d393cf877457c51 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 17:19:47 -0700 Subject: [PATCH 54/55] refactor(optimizer-repo): unify find/updateBatch with Optional params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Repo public API now: - find(...) with Optional filters + required Pageable, on all four repos - updateBatch(ids, fromStatus, toStatus, Optional scheduledAt, Optional jobId) — replaces markSchedulingBatch, markScheduledBatch, markPendingBatch - cancel(ids) — replaces cancelDuplicatePendingBatch; deletes by-id with a defensive PENDING-only gate - findLatest(opType, Pageable) — was findLatestPerTable - history.find(tableUuid, Pageable) — was findByTableUuidOrderByCompletedAtDesc Side-effect columns on updateBatch use COALESCE with Optional.empty() → leave-unchanged. scheduledAt is not cleared on SCHEDULING → PENDING revert; status is the source of truth and the watermark is overwritten on the next claim. @Modifying queries get flushAutomatically + clearAutomatically so the L1 cache reflects the change immediately (caught by the unit tests). Spring Data @Query can't share an "IS NULL OR IN :list" pattern (Hibernate expands the list inline and the IS NULL check turns ungrammatical). The find path uses two internal queries dispatched by the default method — one with the ids predicate, one without. Callers (service, analyzer, scheduler) update on opt-2..opt-4 in follow-up commits. Co-Authored-By: Claude Opus 4.7 --- .../TableOperationsHistoryRepository.java | 15 +- .../repository/TableOperationsRepository.java | 181 +++++++---- .../TableStatsHistoryRepository.java | 19 +- .../repository/TableStatsRepository.java | 37 ++- .../TableOperationsHistoryRepositoryTest.java | 8 +- .../TableOperationsRepositoryTest.java | 307 ++++++++++++------ .../TableStatsHistoryRepositoryTest.java | 14 +- .../repository/TableStatsRepositoryTest.java | 17 +- 8 files changed, 396 insertions(+), 202 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 5faf349e3..6c08f844a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -13,11 +13,14 @@ public interface TableOperationsHistoryRepository extends JpaRepository { /** - * Return history rows for a single {@code tableUuid}, newest first. Used by the service-layer - * {@code getHistory} endpoint. + * Return history rows for a single {@code tableUuid}, newest first. {@code pageable} is required; + * callers pick the row cap (default limit lives in {@code optimizer.repo.default-limit}). */ - List findByTableUuidOrderByCompletedAtDesc( - String tableUuid, Pageable pageable); + @Query( + "SELECT r FROM TableOperationsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "ORDER BY r.completedAt DESC") + List find(@Param("tableUuid") String tableUuid, Pageable pageable); /** * Return the most-recent history row per {@code (table_uuid, operation_type)}, filtered to a @@ -37,6 +40,6 @@ List findByTableUuidOrderByCompletedAtDesc( + "AND r.completedAt = (" + " SELECT MAX(r2.completedAt) FROM TableOperationsHistoryRow r2 " + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") - List findLatestPerTable( - @Param("operationType") OperationType operationType); + List findLatest( + @Param("operationType") OperationType operationType, Pageable pageable); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 513006bf6..e0df2cd21 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -5,6 +5,8 @@ import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import java.time.Instant; import java.util.List; +import java.util.Optional; +import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Modifying; import org.springframework.data.jpa.repository.Query; @@ -14,90 +16,131 @@ public interface TableOperationsRepository extends JpaRepository { /** - * Return operations matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. + * Find operation rows matching the given filters. Every filter is optional ({@link + * Optional#empty()} to skip). {@code pageable} is required; callers pick the row cap (default + * limit lives in {@code optimizer.repo.default-limit}). */ + default List find( + Optional operationType, + Optional status, + Optional tableUuid, + Optional databaseName, + Optional tableName, + Optional scheduledAt, + Optional> ids, + Pageable pageable) { + // List parameters can't share an :ids IS NULL pattern with the IN clause — + // Hibernate expands the list inline and the IS NULL check turns ungrammatical. + // Two internal queries; dispatch by presence. + if (ids.isPresent()) { + return findInternalWithIds( + operationType.orElse(null), + status.orElse(null), + tableUuid.orElse(null), + databaseName.orElse(null), + tableName.orElse(null), + scheduledAt.orElse(null), + ids.get(), + pageable); + } + return findInternal( + operationType.orElse(null), + status.orElse(null), + tableUuid.orElse(null), + databaseName.orElse(null), + tableName.orElse(null), + scheduledAt.orElse(null), + pageable); + } + + /** + * Batch CAS: transition rows from {@code fromStatus} to {@code toStatus} for every id in {@code + * ids} that is still in {@code fromStatus}. Rows in a different status are skipped silently. + * Returns the number of rows transitioned. + * + *

Side-effect columns use COALESCE — {@link Optional#empty()} means "leave unchanged". The + * underlying transitions are: + * + *

+ */ + default int updateBatch( + List ids, + OperationStatus fromStatus, + OperationStatus toStatus, + Optional scheduledAt, + Optional jobId) { + return updateBatchInternal( + ids, fromStatus, toStatus, scheduledAt.orElse(null), jobId.orElse(null)); + } + + /** + * Delete the specified rows, but only if they are still {@code PENDING}. The status gate is + * defensive — never drop a row another instance has claimed. Returns the number of rows actually + * removed. + */ + @Modifying(flushAutomatically = true, clearAutomatically = true) + @Query( + "DELETE FROM TableOperationsRow r " + + "WHERE r.id IN :ids " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING") + int cancel(@Param("ids") List ids); + + // ---- Internals. Use the Optional-typed default methods above. ---- + @Query( "SELECT r FROM TableOperationsRow r " + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:status IS NULL OR r.status = :status) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName)") - List find( + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:scheduledAt IS NULL OR r.scheduledAt = :scheduledAt)") + List findInternal( @Param("operationType") OperationType operationType, @Param("status") OperationStatus status, @Param("tableUuid") String tableUuid, @Param("databaseName") String databaseName, - @Param("tableName") String tableName); - - /** - * Batch CAS: PENDING → SCHEDULING for every {@code id} still in PENDING. Returns the number of - * rows transitioned. Rows already claimed by another instance are skipped silently; pair this - * call with {@link #findClaimedIds(List, Instant)} (using the same {@code scheduledAt}) to get - * the precise list of rows this caller now owns. - */ - @Modifying - @Query( - "UPDATE TableOperationsRow r " - + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING," - + " r.scheduledAt = :scheduledAt " - + "WHERE r.id IN :ids " - + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING") - int markSchedulingBatch( - @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); - - /** - * Return the subset of {@code ids} that are currently {@code SCHEDULING} with the given {@code - * scheduledAt} watermark. Used after {@link #markSchedulingBatch(List, Instant)} to determine - * which rows this caller actually claimed (vs. rows another instance owns or rows that no longer - * exist). - */ - @Query( - "SELECT r.id FROM TableOperationsRow r " - + "WHERE r.id IN :ids " - + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING " - + "AND r.scheduledAt = :scheduledAt") - List findClaimedIds( - @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); + @Param("tableName") String tableName, + @Param("scheduledAt") Instant scheduledAt, + Pageable pageable); - /** - * Batch CAS: SCHEDULING → SCHEDULED with the given {@code jobId} for every {@code id} still in - * SCHEDULING. Returns the number of rows transitioned. - */ - @Modifying @Query( - "UPDATE TableOperationsRow r " - + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED," - + " r.jobId = :jobId " - + "WHERE r.id IN :ids " - + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") - int markScheduledBatch(@Param("ids") List ids, @Param("jobId") String jobId); + "SELECT r FROM TableOperationsRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:scheduledAt IS NULL OR r.scheduledAt = :scheduledAt) " + + "AND r.id IN :ids") + List findInternalWithIds( + @Param("operationType") OperationType operationType, + @Param("status") OperationStatus status, + @Param("tableUuid") String tableUuid, + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("scheduledAt") Instant scheduledAt, + @Param("ids") List ids, + Pageable pageable); - /** - * Batch transition: SCHEDULING → PENDING for every {@code id} still in SCHEDULING. Used by the - * scheduler to release claimed rows when job submission fails so the next pass can retry. Returns - * the number of rows reverted. - */ - @Modifying + @Modifying(flushAutomatically = true, clearAutomatically = true) @Query( "UPDATE TableOperationsRow r " - + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING," - + " r.scheduledAt = NULL " + + "SET r.status = :toStatus, " + + " r.scheduledAt = COALESCE(:scheduledAt, r.scheduledAt), " + + " r.jobId = COALESCE(:jobId, r.jobId) " + "WHERE r.id IN :ids " - + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") - int markPendingBatch(@Param("ids") List ids); - - /** - * Batch-delete duplicate PENDING rows for the given operation type, keeping only the IDs in - * {@code keepIds}. Used by the scheduler to deduplicate before claiming. - */ - @Modifying - @Query( - "DELETE FROM TableOperationsRow r " - + "WHERE r.operationType = :operationType " - + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING " - + "AND r.id NOT IN :keepIds") - int cancelDuplicatePendingBatch( - @Param("operationType") OperationType operationType, @Param("keepIds") List keepIds); + + "AND r.status = :fromStatus") + int updateBatchInternal( + @Param("ids") List ids, + @Param("fromStatus") OperationStatus fromStatus, + @Param("toStatus") OperationStatus toStatus, + @Param("scheduledAt") Instant scheduledAt, + @Param("jobId") String jobId); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java index 6f9595275..9b603f265 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -3,6 +3,7 @@ import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import java.time.Instant; import java.util.List; +import java.util.Optional; import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; @@ -12,18 +13,22 @@ public interface TableStatsHistoryRepository extends JpaRepository { /** - * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the - * time filter. - * - * @param tableUuid the stable table UUID - * @param since inclusive lower bound on recorded_at; {@code null} to skip - * @param pageable use {@code PageRequest.of(0, limit)} to cap results + * Return history rows for a table, newest first. {@code since} is optional ({@link + * Optional#empty()} to skip the time filter). {@code pageable} is required; callers pick the row + * cap (default limit lives in {@code optimizer.repo.default-limit}). */ + default List find( + String tableUuid, Optional since, Pageable pageable) { + return findInternal(tableUuid, since.orElse(null), pageable); + } + + // ---- Internals. Use the Optional-typed default method above. ---- + @Query( "SELECT r FROM TableStatsHistoryRow r " + "WHERE r.tableUuid = :tableUuid " + "AND (:since IS NULL OR r.recordedAt >= :since) " + "ORDER BY r.recordedAt DESC") - List find( + List findInternal( @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index dbf1de0ae..1123c0e7a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -2,6 +2,8 @@ import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.util.List; +import java.util.Optional; +import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; @@ -10,18 +12,18 @@ public interface TableStatsRepository extends JpaRepository { /** - * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. + * Return stats rows matching the given filters. Every filter is optional ({@link + * Optional#empty()} to skip). {@code pageable} is required; callers pick the row cap (default + * limit lives in {@code optimizer.repo.default-limit}). */ - @Query( - "SELECT r FROM TableStatsRow r " - + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") - List find( - @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid); + default List find( + Optional databaseName, + Optional tableName, + Optional tableUuid, + Pageable pageable) { + return findInternal( + databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null), pageable); + } /** * Return the distinct {@code database_name} values present in {@code table_stats}. Used by the @@ -30,4 +32,17 @@ List find( */ @Query("SELECT DISTINCT r.databaseName FROM TableStatsRow r") List findDistinctDatabaseNames(); + + // ---- Internals. Use the Optional-typed default methods above. ---- + + @Query( + "SELECT r FROM TableStatsRow r " + + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List findInternal( + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid, + Pageable pageable); } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index 706ecd877..9f1de0c0c 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -52,8 +52,7 @@ void findByTableUuid_returnsRowsNewestFirst() { .status(HistoryStatus.FAILED) .build()); - List rows = - repository.findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, 10)); + List rows = repository.find(tableUuid, PageRequest.of(0, 10)); assertThat(rows).hasSize(2); assertThat(rows.get(0).getId()).isEqualTo(idNewer); @@ -77,8 +76,7 @@ void findByTableUuid_respectsLimit() { .build()); } - List rows = - repository.findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, 3)); + List rows = repository.find(tableUuid, PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @@ -121,7 +119,7 @@ void findLatestPerTable_returnsOneRowPerTableUuid() { .build()); List latest = - repository.findLatestPerTable(OperationType.ORPHAN_FILES_DELETION); + repository.findLatest(OperationType.ORPHAN_FILES_DELETION, PageRequest.of(0, 10_000)); assertThat(latest).hasSize(2); TableOperationsHistoryRow forTarget = diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index bfe3fc437..8f46af1bf 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -9,9 +9,12 @@ import java.util.List; import java.util.Optional; import java.util.UUID; +import java.util.stream.Collectors; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.data.domain.Pageable; import org.springframework.test.context.ActiveProfiles; import org.springframework.transaction.annotation.Transactional; @@ -20,24 +23,15 @@ @Transactional class TableOperationsRepositoryTest { + private static final Pageable PAGE = PageRequest.of(0, 10_000); + @Autowired TableOperationsRepository repository; @Test void saveAndFindById() { String id = UUID.randomUUID().toString(); - TableOperationsRow row = - TableOperationsRow.builder() - .id(id) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build(); - - repository.save(row); + repository.save(pendingRow(id, "tbl1")); Optional found = repository.findById(id); assertThat(found).isPresent(); @@ -45,74 +39,103 @@ void saveAndFindById() { } @Test - void find_noParams_returnsAll() { - repository.save( - TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build()); - repository.save( - TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) - .createdAt(Instant.now()) - .build()); + void find_noFilters_returnsAll() { + repository.save(pendingRow(UUID.randomUUID().toString(), "tbl1")); + repository.save(scheduledRow(UUID.randomUUID().toString(), "tbl2")); - List rows = repository.find(null, null, null, null, null); + List rows = + repository.find( + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + PAGE); assertThat(rows).hasSize(2); } @Test void find_byStatus() { - repository.save( - TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build()); - repository.save( - TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) - .createdAt(Instant.now()) - .build()); + repository.save(pendingRow(UUID.randomUUID().toString(), "tbl1")); + repository.save(scheduledRow(UUID.randomUUID().toString(), "tbl2")); List pending = - repository.find(null, OperationStatus.PENDING, null, null, null); + repository.find( + Optional.empty(), + Optional.of(OperationStatus.PENDING), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + PAGE); assertThat(pending).hasSize(1); assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); List scheduled = - repository.find(null, OperationStatus.SCHEDULED, null, null, null); + repository.find( + Optional.empty(), + Optional.of(OperationStatus.SCHEDULED), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + PAGE); assertThat(scheduled).hasSize(1); assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); } @Test - void findClaimedIds_returnsOnlyClaimedSubset() { + void find_byDatabaseAndTable() { + repository.save(pendingRow(UUID.randomUUID().toString(), "tbl1", "db1")); + repository.save(pendingRow(UUID.randomUUID().toString(), "tbl2", "db2")); + + assertThat( + repository.find( + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.of("db1"), + Optional.empty(), + Optional.empty(), + Optional.empty(), + PAGE)) + .hasSize(1); + assertThat( + repository.find( + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.of("db2"), + Optional.of("tbl2"), + Optional.empty(), + Optional.empty(), + PAGE)) + .hasSize(1); + assertThat( + repository.find( + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.of("db1"), + Optional.of("tbl2"), + Optional.empty(), + Optional.empty(), + PAGE)) + .isEmpty(); + } + + @Test + void find_byScheduledAtAndIds_resolvesClaimedSubset() { String idA = UUID.randomUUID().toString(); String idB = UUID.randomUUID().toString(); String idC = UUID.randomUUID().toString(); - repository.save(pending(idA)); - repository.save(pending(idB)); - // idC is already SCHEDULING with a different scheduledAt — must NOT appear. + repository.save(pendingRow(idA, "tbl_a")); + repository.save(pendingRow(idB, "tbl_b")); + // idC is already SCHEDULING with an older watermark — must NOT appear. repository.save( TableOperationsRow.builder() .id(idC) @@ -126,68 +149,160 @@ void findClaimedIds_returnsOnlyClaimedSubset() { .build()); Instant now = Instant.now(); - repository.markSchedulingBatch(List.of(idA, idB, idC), now); + int transitioned = + repository.updateBatch( + List.of(idA, idB, idC), + OperationStatus.PENDING, + OperationStatus.SCHEDULING, + Optional.of(now), + Optional.empty()); + assertThat(transitioned).isEqualTo(2); - List claimed = repository.findClaimedIds(List.of(idA, idB, idC), now); - assertThat(claimed).containsExactlyInAnyOrder(idA, idB); + List claimedIds = + repository + .find( + Optional.empty(), + Optional.of(OperationStatus.SCHEDULING), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.of(now), + Optional.of(List.of(idA, idB, idC)), + PAGE) + .stream() + .map(TableOperationsRow::getId) + .collect(Collectors.toList()); + assertThat(claimedIds).containsExactlyInAnyOrder(idA, idB); } @Test - void findClaimedIds_emptyWhenNothingClaimed() { + void updateBatch_schedulingToScheduled_setsJobIdAndPreservesScheduledAt() { String id = UUID.randomUUID().toString(); + Instant claimedAt = Instant.parse("2026-05-20T16:42:43Z"); repository.save( TableOperationsRow.builder() .id(id) .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") - .tableName("tbl_x") + .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) + .status(OperationStatus.SCHEDULING) .createdAt(Instant.now()) - .scheduledAt(Instant.now()) + .scheduledAt(claimedAt) .build()); - List claimed = repository.findClaimedIds(List.of(id), Instant.now()); - assertThat(claimed).isEmpty(); - } + int updated = + repository.updateBatch( + List.of(id), + OperationStatus.SCHEDULING, + OperationStatus.SCHEDULED, + Optional.empty(), + Optional.of("job-123")); + assertThat(updated).isEqualTo(1); - private TableOperationsRow pending(String id) { - return TableOperationsRow.builder() - .id(id) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl_" + id) - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build(); + TableOperationsRow row = repository.findById(id).orElseThrow(); + assertThat(row.getStatus()).isEqualTo(OperationStatus.SCHEDULED); + assertThat(row.getJobId()).isEqualTo("job-123"); + assertThat(row.getScheduledAt()).isEqualTo(claimedAt); } @Test - void find_byDatabaseAndTable() { + void updateBatch_schedulingToPending_leavesScheduledAtUntouched() { + // scheduledAt is intentionally NOT cleared on revert. Status is the source of truth; the + // stale watermark gets overwritten on the next PENDING → SCHEDULING transition. + String id = UUID.randomUUID().toString(); + Instant claimedAt = Instant.parse("2026-05-20T16:42:43Z"); repository.save( TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) + .id(id) .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build()); - repository.save( - TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db2") - .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .status(OperationStatus.SCHEDULING) .createdAt(Instant.now()) + .scheduledAt(claimedAt) .build()); - assertThat(repository.find(null, null, null, "db1", null)).hasSize(1); - assertThat(repository.find(null, null, null, "db2", "tbl2")).hasSize(1); - assertThat(repository.find(null, null, null, "db1", "tbl2")).isEmpty(); + int reverted = + repository.updateBatch( + List.of(id), + OperationStatus.SCHEDULING, + OperationStatus.PENDING, + Optional.empty(), + Optional.empty()); + assertThat(reverted).isEqualTo(1); + + TableOperationsRow row = repository.findById(id).orElseThrow(); + assertThat(row.getStatus()).isEqualTo(OperationStatus.PENDING); + assertThat(row.getScheduledAt()).isEqualTo(claimedAt); + } + + @Test + void updateBatch_skipsRowsNotInFromStatus() { + String pendingId = UUID.randomUUID().toString(); + String scheduledId = UUID.randomUUID().toString(); + repository.save(pendingRow(pendingId, "tbl_a")); + repository.save(scheduledRow(scheduledId, "tbl_b")); + + int transitioned = + repository.updateBatch( + List.of(pendingId, scheduledId), + OperationStatus.PENDING, + OperationStatus.SCHEDULING, + Optional.of(Instant.now()), + Optional.empty()); + assertThat(transitioned).isEqualTo(1); + + assertThat(repository.findById(pendingId).orElseThrow().getStatus()) + .isEqualTo(OperationStatus.SCHEDULING); + assertThat(repository.findById(scheduledId).orElseThrow().getStatus()) + .isEqualTo(OperationStatus.SCHEDULED); + } + + @Test + void cancel_deletesOnlyPendingRows() { + String pendingId = UUID.randomUUID().toString(); + String scheduledId = UUID.randomUUID().toString(); + repository.save(pendingRow(pendingId, "tbl_p")); + repository.save(scheduledRow(scheduledId, "tbl_s")); + + int deleted = repository.cancel(List.of(pendingId, scheduledId)); + assertThat(deleted).isEqualTo(1); + + assertThat(repository.findById(pendingId)).isEmpty(); + assertThat(repository.findById(scheduledId)).isPresent(); + } + + // --- helpers --- + + private TableOperationsRow pendingRow(String id, String tableName) { + return pendingRow(id, tableName, "db1"); + } + + private TableOperationsRow pendingRow(String id, String tableName, String databaseName) { + return TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName(databaseName) + .tableName(tableName) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + } + + private TableOperationsRow scheduledRow(String id, String tableName) { + return TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName(tableName) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .scheduledAt(Instant.now()) + .jobId("job-" + id) + .build(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index 536b72e35..cddec50c9 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -8,6 +8,7 @@ import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.List; +import java.util.Optional; import java.util.UUID; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -32,7 +33,8 @@ void saveAndFind() { repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); - List rows = repository.find(tableUuid, null, PageRequest.of(0, 100)); + List rows = + repository.find(tableUuid, Optional.empty(), PageRequest.of(0, 100)); assertThat(rows).hasSize(3); // newest first @@ -49,7 +51,8 @@ void find_respectsLimit() { repository.save(buildRow(tableUuid, "db1", "tbl1", i, 0L, now.minus(i, ChronoUnit.HOURS))); } - List rows = repository.find(tableUuid, null, PageRequest.of(0, 3)); + List rows = + repository.find(tableUuid, Optional.empty(), PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @@ -64,7 +67,8 @@ void find_withSince_filtersOlderRows() { repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); - List rows = repository.find(tableUuid, cutoff, PageRequest.of(0, 100)); + List rows = + repository.find(tableUuid, Optional.of(cutoff), PageRequest.of(0, 100)); // only the 2 rows within the last 90 minutes assertThat(rows).hasSize(2); @@ -80,8 +84,8 @@ void find_isolatesByTableUuid() { repository.save(buildRow(uuid1, "db1", "tbl1", 10L, 0L, now)); repository.save(buildRow(uuid2, "db2", "tbl2", 20L, 0L, now)); - assertThat(repository.find(uuid1, null, PageRequest.of(0, 100))).hasSize(1); - assertThat(repository.find(uuid2, null, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.find(uuid1, Optional.empty(), PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.find(uuid2, Optional.empty(), PageRequest.of(0, 100))).hasSize(1); } @Test diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index f9cc28d57..e73ac0cb4 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -11,6 +11,8 @@ import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.data.domain.Pageable; import org.springframework.test.context.ActiveProfiles; import org.springframework.transaction.annotation.Transactional; @@ -19,6 +21,8 @@ @Transactional class TableStatsRepositoryTest { + private static final Pageable PAGE = PageRequest.of(0, 10_000); + @Autowired TableStatsRepository repository; @Test @@ -90,7 +94,8 @@ void find_noParams_returnsAll() { .updatedAt(Instant.now()) .build()); - assertThat(repository.find(null, null, null)).hasSize(2); + assertThat(repository.find(Optional.empty(), Optional.empty(), Optional.empty(), PAGE)) + .hasSize(2); } @Test @@ -112,7 +117,13 @@ void find_byDatabase() { .updatedAt(Instant.now()) .build()); - assertThat(repository.find("db1", null, null)).hasSize(1); - assertThat(repository.find("db1", null, null).get(0).getDatabaseName()).isEqualTo("db1"); + assertThat(repository.find(Optional.of("db1"), Optional.empty(), Optional.empty(), PAGE)) + .hasSize(1); + assertThat( + repository + .find(Optional.of("db1"), Optional.empty(), Optional.empty(), PAGE) + .get(0) + .getDatabaseName()) + .isEqualTo("db1"); } } From b69e09a511e684e30dc9a5adb1b8e26951c7190e Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 19:47:22 -0700 Subject: [PATCH 55/55] test(optimizer-repo): truncate Instant to micros for CI precision Instant.now() on Linux CI carries nanoseconds; MySQL TIMESTAMP(6) and H2 in MySQL mode store microseconds. The scheduledAt = :scheduledAt predicate in find(...) compared nano-resolution param against micro-resolution stored value and missed. Local (macOS, micro-only) hid the bug. Truncate to ChronoUnit.MICROS at write time in the one repo test that exercises the watermark round-trip. Co-Authored-By: Claude Opus 4.7 --- .../optimizer/repository/TableOperationsRepositoryTest.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index 8f46af1bf..072be5fd9 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -6,6 +6,7 @@ import com.linkedin.openhouse.optimizer.db.OperationType; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import java.time.Instant; +import java.time.temporal.ChronoUnit; import java.util.List; import java.util.Optional; import java.util.UUID; @@ -148,7 +149,10 @@ void find_byScheduledAtAndIds_resolvesClaimedSubset() { .scheduledAt(Instant.now().minusSeconds(60)) .build()); - Instant now = Instant.now(); + // Truncate to microseconds — MySQL TIMESTAMP(6) (and H2 in MySQL mode) stores microseconds, + // so a nano-precision now() round-trips lossily. On Linux CI Instant.now() carries nanos; + // truncating here keeps the watermark comparison exact across platforms. + Instant now = Instant.now().truncatedTo(ChronoUnit.MICROS); int transitioned = repository.updateBatch( List.of(idA, idB, idC),