From 2119555c2fe7748f704dfdf245fb32921349ba52 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 3 Apr 2026 11:00:58 -0700 Subject: [PATCH 01/81] =?UTF-8?q?feat(optimizer):=20add=20data=20model=20?= =?UTF-8?q?=E2=80=94=20schema,=20entities,=20DTOs,=20converters?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces the optimizer service module with: - MySQL/H2 schema for table_operations, table_stats, table_stats_history, and table_operations_history - JPA entities with JSON column support (vladmihalcea hibernate-types) - All model/DTO/enum types: OperationType, OperationStatus, TableStats, CompleteOperationRequest, JobResult, OperationMetrics, etc. - JPA AttributeConverters for JobResult and OperationMetrics JSON columns - MapStruct mapper (OptimizerMapper) for entity→DTO conversion - Spring Boot application shell and build wiring (settings.gradle, build.gradle dockerPrereqs) No repositories, controllers, or service layer yet — those follow in subsequent PRs. Co-Authored-By: Claude Opus 4.6 --- build.gradle | 3 + services/optimizer/build.gradle | 17 ++++ .../OptimizerServiceApplication.java | 13 +++ .../optimizer/api/mapper/OptimizerMapper.java | 32 ++++++ .../api/model/CompleteOperationRequest.java | 31 ++++++ .../optimizer/api/model/JobResult.java | 25 +++++ .../api/model/OperationHistoryStatus.java | 7 ++ .../optimizer/api/model/OperationMetrics.java | 24 +++++ .../optimizer/api/model/OperationStatus.java | 21 ++++ .../optimizer/api/model/OperationType.java | 12 +++ .../api/model/TableOperationsDto.java | 40 ++++++++ .../api/model/TableOperationsHistoryDto.java | 43 ++++++++ .../optimizer/api/model/TableStats.java | 48 +++++++++ .../optimizer/api/model/TableStatsDto.java | 23 +++++ .../api/model/TableStatsHistoryDto.java | 22 +++++ .../model/UpsertTableOperationsRequest.java | 26 +++++ .../api/model/UpsertTableStatsRequest.java | 25 +++++ .../optimizer/config/JobResultConverter.java | 39 ++++++++ .../config/OperationMetricsConverter.java | 44 +++++++++ .../entity/TableOperationsHistoryRow.java | 91 +++++++++++++++++ .../optimizer/entity/TableOperationsRow.java | 99 +++++++++++++++++++ .../entity/TableStatsHistoryRow.java | 64 ++++++++++++ .../optimizer/entity/TableStatsRow.java | 57 +++++++++++ .../optimizer/entity/package-info.java | 2 + .../src/main/resources/application.properties | 20 ++++ .../main/resources/db/optimizer-schema.sql | 53 ++++++++++ .../resources/application-test.properties | 12 +++ settings.gradle | 1 + 28 files changed, 894 insertions(+) create mode 100644 services/optimizer/build.gradle create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java create mode 100644 services/optimizer/src/main/resources/application.properties create mode 100644 services/optimizer/src/main/resources/db/optimizer-schema.sql create mode 100644 services/optimizer/src/test/resources/application-test.properties diff --git a/build.gradle b/build.gradle index 4699ca592..4cfac4a5d 100644 --- a/build.gradle +++ b/build.gradle @@ -157,6 +157,7 @@ tasks.register('CopyGitHooksTask', Copy) { // tables-service.Dockerfile -> :services:tables:bootJar // housetables-service.Dockerfile -> :services:housetables:bootJar // jobs-service.Dockerfile -> :services:jobs:bootJar +// optimizer-service.Dockerfile -> :services:optimizer:bootJar // jobs-scheduler.Dockerfile -> :apps:openhouse-spark-apps_2.12:shadowJar (uber JAR) // spark-base-hadoop2.8.dockerfile -> // :integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar (uber JAR) @@ -176,6 +177,7 @@ tasks.register('dockerPrereqs') { dependsOn ':services:tables:bootJar' dependsOn ':services:housetables:bootJar' dependsOn ':services:jobs:bootJar' + dependsOn ':services:optimizer:bootJar' // Spark runtime uber JARs (shadowJar) dependsOn ':integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar' @@ -196,6 +198,7 @@ tasks.register('dockerPrereqs') { println ' build/tables/libs/tables.jar' println ' build/housetables/libs/housetables.jar' println ' build/jobs/libs/jobs.jar' + println ' build/optimizer/libs/optimizer.jar' println ' build/openhouse-spark-runtime_2.12/libs/openhouse-spark-runtime_2.12-uber.jar' println ' build/openhouse-spark-3.5-runtime_2.12/libs/openhouse-spark-3.5-runtime_2.12-uber.jar' println ' build/openhouse-spark-apps_2.12/libs/openhouse-spark-apps_2.12-uber.jar' diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle new file mode 100644 index 000000000..c05c7f9c3 --- /dev/null +++ b/services/optimizer/build.gradle @@ -0,0 +1,17 @@ +plugins { + id 'openhouse.springboot-ext-conventions' + id 'org.springframework.boot' version '2.7.8' +} + +dependencies { + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' + implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' + implementation 'mysql:mysql-connector-java:8.+' + testImplementation 'com.h2database:h2:2.2.224' + testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' +} + +test { + useJUnitPlatform() +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java new file mode 100644 index 000000000..38eb363a8 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.optimizer; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; + +/** Spring Boot entry point for the Optimizer Service. */ +@SpringBootApplication +public class OptimizerServiceApplication { + + public static void main(String[] args) { + SpringApplication.run(OptimizerServiceApplication.class, args); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java new file mode 100644 index 000000000..8c0b17462 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java @@ -0,0 +1,32 @@ +package com.linkedin.openhouse.optimizer.api.mapper; + +import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import org.mapstruct.Mapper; + +/** + * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. + * + *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. + */ +@Mapper(componentModel = "spring") +public interface OptimizerMapper { + + /** Map a {@link TableOperationsRow} to its DTO. */ + TableOperationsDto toDto(TableOperationsRow row); + + /** Map a {@link TableOperationsHistoryRow} to its DTO. */ + TableOperationsHistoryDto toDto(TableOperationsHistoryRow row); + + /** Map a {@link TableStatsRow} to its DTO. */ + TableStatsDto toDto(TableStatsRow row); + + /** Map a {@link TableStatsHistoryRow} to its DTO. */ + TableStatsHistoryDto toDto(TableStatsHistoryRow row); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java new file mode 100644 index 000000000..c26893197 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -0,0 +1,31 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Request body for {@code POST /v1/table-operations/{id}/complete}. + * + *

Reports the outcome of a completed operation. The backend looks up the operation row by {@code + * id} and writes a history entry with the operation's table metadata and the supplied result. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class CompleteOperationRequest { + + /** Outcome of the operation. */ + private OperationHistoryStatus status; + + /** Error details on failure; {@code null} on success. */ + private JobResult result; + + /** Number of orphan files deleted; set by OFD Spark app on success. */ + private Integer orphanFilesDeleted; + + /** Bytes reclaimed by orphan file deletion; set by OFD Spark app on success. */ + private Long orphanBytesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java new file mode 100644 index 000000000..74942243c --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Result payload for a completed Spark maintenance job. + * + *

Stored as JSON in the {@code result} column of {@code table_operations_history}. Both fields + * are {@code null} on success; populated on failure. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class JobResult { + + /** Human-readable error message; {@code null} if the job succeeded. */ + private String errorMessage; + + /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ + private String errorType; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java new file mode 100644 index 000000000..791d910a6 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java @@ -0,0 +1,7 @@ +package com.linkedin.openhouse.optimizer.api.model; + +/** Terminal states for a completed Spark maintenance job. */ +public enum OperationHistoryStatus { + SUCCESS, + FAILED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java new file mode 100644 index 000000000..d6f788fcc --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java @@ -0,0 +1,24 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Denormalized stats snapshot captured by the Analyzer at analysis time. + * + *

Stored as JSON in the {@code metrics} column of {@code table_operations}. These values are + * point-in-time snapshots — they record what the Analyzer saw when it recommended the operation, + * not cumulative totals. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class OperationMetrics { + + private Long tableSizeBytes; + private Integer numFilesAdded; + private Integer numFilesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java new file mode 100644 index 000000000..c97be441b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java @@ -0,0 +1,21 @@ +package com.linkedin.openhouse.optimizer.api.model; + +/** Lifecycle states for a table operation recommendation. */ +public enum OperationStatus { + + /** Recommended by the Analyzer but not yet claimed by the Scheduler. */ + PENDING, + + /** Claimed by the Scheduler; waiting for the Jobs Service to return a job ID. */ + SCHEDULING, + + /** Job submitted to the Jobs Service; the row now carries a {@code jobId}. */ + SCHEDULED, + + /** + * Marked by the Scheduler when it detects duplicate PENDING rows for the same {@code (table_uuid, + * operation_type)}. Only the most-recent PENDING row is claimed; older duplicates are CANCELED + * before the claim step. + */ + CANCELED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java new file mode 100644 index 000000000..05e4a1e7b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java @@ -0,0 +1,12 @@ +package com.linkedin.openhouse.optimizer.api.model; + +/** + * Maintenance operation types supported by the continuous optimizer. + * + *

Only {@code ORPHAN_FILES_DELETION} is currently implemented. Additional types will be added as + * they are built out. + */ +public enum OperationType { + /** Removes orphaned data files no longer referenced by table metadata. */ + ORPHAN_FILES_DELETION +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java new file mode 100644 index 000000000..5eb5eaaa6 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -0,0 +1,40 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_operations} — Analyzer recommendations read by the Scheduler. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationsDto { + + /** Client-generated UUID identifying this specific operation recommendation. */ + private String id; + + /** Stable table identity from the Tables Service. */ + private String tableUuid; + + private String databaseName; + private String tableName; + private OperationType operationType; + + /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ + private OperationStatus status; + + /** Server-set when the row is first created by the Analyzer. */ + private Instant createdAt; + + /** Set by the Scheduler when claiming; {@code null} while PENDING. */ + private Instant scheduledAt; + + /** Job ID returned by the Jobs Service after successful submission. */ + private String jobId; + + /** Denormalized stats snapshot captured at analysis time. */ + private OperationMetrics metrics; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java new file mode 100644 index 000000000..7dca34271 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -0,0 +1,43 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_operations_history} — append-only Spark job results. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationsHistoryDto { + + /** Same UUID as the originating {@code table_operations.id}; supplied by the caller. */ + private String id; + + /** Stable table identity from the Tables Service. */ + private String tableUuid; + + private String databaseName; + private String tableName; + private OperationType operationType; + + /** When the Spark job was submitted / ran. */ + private Instant submittedAt; + + /** {@code SUCCESS} or {@code FAILED}. */ + private OperationHistoryStatus status; + + /** Spark job ID. */ + private String jobId; + + /** Job result payload; both fields null on success. */ + private JobResult result; + + /** Number of orphan files deleted; null for non-OFD operations or before completion. */ + private Integer orphanFilesDeleted; + + /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ + private Long orphanBytesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java new file mode 100644 index 000000000..cb77d994f --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -0,0 +1,48 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Combined stats payload stored as a single JSON blob per table. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters; accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long deletedSizeBytes; + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java new file mode 100644 index 000000000..1663d5ab0 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -0,0 +1,23 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_stats} — used for response payloads. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsDto { + + private String tableUuid; + private String databaseId; + private String tableName; + private TableStats stats; + private Map tableProperties; + private Instant updatedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java new file mode 100644 index 000000000..142f00245 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -0,0 +1,22 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_stats_history} — used for response payloads. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsHistoryDto { + + private Long id; + private String tableUuid; + private String databaseId; + private String tableName; + private TableStats stats; + private Instant recordedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java new file mode 100644 index 000000000..19dd1baac --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java @@ -0,0 +1,26 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * PUT request body for {@code /v1/table-operations/{id}}. + * + *

The Analyzer supplies the operation {@code id} (client-generated UUID) in the path and all + * table-identifying fields in this body. The service upserts by {@code id}: creates on first call, + * updates {@code metrics} on subsequent calls with the same {@code id}. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class UpsertTableOperationsRequest { + + private String tableUuid; + private String databaseName; + private String tableName; + private OperationType operationType; + private OperationMetrics metrics; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java new file mode 100644 index 000000000..3214a85a6 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Request body for {@code PUT /v1/table-stats/{tableUuid}}. + * + *

{@code tableUuid} comes from the path variable. {@code databaseId} and {@code tableName} are + * denormalized display columns carried in the body. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class UpsertTableStatsRequest { + + private String databaseId; + private String tableName; + private TableStats stats; + private Map tableProperties; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java new file mode 100644 index 000000000..4c9bfbe76 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java @@ -0,0 +1,39 @@ +package com.linkedin.openhouse.optimizer.config; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import java.io.IOException; +import javax.persistence.AttributeConverter; +import javax.persistence.Converter; + +/** JPA {@link AttributeConverter} that serializes {@link JobResult} to/from a JSON string. */ +@Converter +public class JobResultConverter implements AttributeConverter { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @Override + public String convertToDatabaseColumn(JobResult attribute) { + if (attribute == null) { + return null; + } + try { + return OBJECT_MAPPER.writeValueAsString(attribute); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize JobResult to JSON", e); + } + } + + @Override + public JobResult convertToEntityAttribute(String dbData) { + if (dbData == null) { + return null; + } + try { + return OBJECT_MAPPER.readValue(dbData, JobResult.class); + } catch (IOException e) { + throw new IllegalStateException("Failed to deserialize JobResult from JSON: " + dbData, e); + } + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java new file mode 100644 index 000000000..27f0882f5 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java @@ -0,0 +1,44 @@ +package com.linkedin.openhouse.optimizer.config; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; +import java.io.IOException; +import javax.persistence.AttributeConverter; +import javax.persistence.Converter; + +/** + * JPA {@link AttributeConverter} that serializes {@link OperationMetrics} to/from a JSON string. + */ +@Converter +public class OperationMetricsConverter implements AttributeConverter { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @Override + public String convertToDatabaseColumn(OperationMetrics attribute) { + // Null metrics are valid for PENDING operations that have not yet produced output. + if (attribute == null) { + return null; + } + try { + return OBJECT_MAPPER.writeValueAsString(attribute); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize OperationMetrics to JSON", e); + } + } + + @Override + public OperationMetrics convertToEntityAttribute(String dbData) { + // Null is stored for PENDING rows; return null so the entity reflects that state. + if (dbData == null) { + return null; + } + try { + return OBJECT_MAPPER.readValue(dbData, OperationMetrics.class); + } catch (IOException e) { + throw new IllegalStateException( + "Failed to deserialize OperationMetrics from JSON: " + dbData, e); + } + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java new file mode 100644 index 000000000..6a47b5022 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -0,0 +1,91 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.config.JobResultConverter; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Convert; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Append-only record of a completed Spark maintenance job. + * + *

Written by the Spark app after each table's operation finishes. The {@code id} is the same + * UUID as the originating {@code table_operations.id}, tying each history entry directly back to + * the specific operation cycle that produced it. Multiple runs of the same operation on the same + * table produce multiple rows (each cycle gets a new UUID from the Analyzer). + */ +@Entity +@Table( + name = "table_operations_history", + indexes = { + @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), + @Index(name = "idx_op_type_hist", columnList = "operation_type"), + @Index(name = "idx_submitted_at", columnList = "submitted_at"), + @Index(name = "idx_status_hist", columnList = "status"), + @Index(name = "idx_job_id", columnList = "job_id") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableOperationsHistoryRow { + + /** Same UUID as the originating {@code table_operations.id}. Set by the caller; not generated. */ + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 255) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Enumerated(EnumType.STRING) + @Column(name = "operation_type", nullable = false, length = 50) + private OperationType operationType; + + /** When the Spark job was submitted / ran, as reported by the job itself. */ + @Column(name = "submitted_at", nullable = false) + private Instant submittedAt; + + /** {@code SUCCESS} or {@code FAILED}. */ + @Enumerated(EnumType.STRING) + @Column(name = "status", nullable = false, length = 20) + private OperationHistoryStatus status; + + /** Spark job ID; indexed for job → result lookups. */ + @Column(name = "job_id", length = 255) + private String jobId; + + /** Job result: error details on failure, both fields null on success. */ + @Convert(converter = JobResultConverter.class) + @Column(name = "result") + private JobResult result; + + /** Number of orphan files deleted by the Spark job; null for non-OFD operations. */ + @Column(name = "orphan_files_deleted") + private Integer orphanFilesDeleted; + + /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ + @Column(name = "orphan_bytes_deleted") + private Long orphanBytesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java new file mode 100644 index 000000000..9d835aa20 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -0,0 +1,99 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.config.OperationMetricsConverter; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Convert; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * JPA entity representing an Analyzer recommendation for a table maintenance operation. + * + *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row + * when it first recommends an operation for a table, or when re-recommending after a prior terminal + * state (SUCCESS/FAILED). Old terminal rows accumulate — they serve as implicit history. {@code + * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). + * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, + * operation_type)} at a time. + */ +@Entity +@Table( + name = "table_operations", + indexes = { + @Index(name = "idx_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_op_type", columnList = "operation_type"), + @Index(name = "idx_status", columnList = "status"), + @Index(name = "idx_created_at", columnList = "created_at"), + @Index(name = "idx_scheduled_at", columnList = "scheduled_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableOperationsRow { + + /** Client-generated UUID identifying this specific operation recommendation. */ + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 255) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Enumerated(EnumType.STRING) + @Column(name = "operation_type", nullable = false, length = 50) + private OperationType operationType; + + @Enumerated(EnumType.STRING) + @Column(name = "status", nullable = false, length = 20) + private OperationStatus status; + + /** When the Analyzer first created this row. Set by the service on insert; never updated. */ + @Column(name = "created_at", nullable = false) + private Instant createdAt; + + /** Set when the operation is claimed; {@code null} while {@code PENDING}. */ + @Column(name = "scheduled_at") + private Instant scheduledAt; + + /** Job ID returned by the Jobs Service after successful submission. */ + @Column(name = "job_id", length = 255) + private String jobId; + + /** + * Manual optimistic lock for the Scheduler claim. Incremented by the raw {@code claimOperation} + * UPDATE query; must NOT use JPA {@code @Version} since the claim bypasses JPA entity management. + */ + @Column(name = "version") + private Long version; + + /** + * Denormalized stats snapshot captured at analysis time: table size, snapshot count, and file + * counts as of the moment the Analyzer ran. + */ + @Convert(converter = OperationMetricsConverter.class) + @Column(name = "metrics") + private OperationMetrics metrics; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java new file mode 100644 index 000000000..85d97a5eb --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -0,0 +1,64 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * Append-only record of per-commit stats reported by the Tables Service. + * + *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can + * query this table to reconstruct change rates over arbitrary time windows. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table( + name = "table_stats_history", + indexes = { + @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsHistoryRow { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "id", nullable = false) + private Long id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_id", nullable = false, length = 255) + private String databaseId; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Column(name = "recorded_at", nullable = false) + private Instant recordedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java new file mode 100644 index 000000000..71d6a9421 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -0,0 +1,57 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import java.util.Map; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * JPA entity representing a per-table stats snapshot in the optimizer DB. + * + *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA + * to enumerate tables and check scheduling eligibility. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table(name = "table_stats") +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsRow { + + @Id + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_id", nullable = false, length = 255) + private String databaseId; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Type(type = "json") + @Column(name = "table_properties", columnDefinition = "TEXT") + private Map tableProperties; + + /** Set on every upsert. Used for stats pipeline staleness monitoring. */ + @Column(name = "updated_at", nullable = false) + private Instant updatedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java new file mode 100644 index 000000000..7c0ca1f67 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java @@ -0,0 +1,2 @@ +/** JPA entities for the optimizer service. */ +package com.linkedin.openhouse.optimizer.entity; diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties new file mode 100644 index 000000000..c6c3f8437 --- /dev/null +++ b/services/optimizer/src/main/resources/application.properties @@ -0,0 +1,20 @@ +spring.application.name=openhouse-optimizer-service +server.port=8080 + +spring.jpa.hibernate.ddl-auto=none +spring.sql.init.mode=always +spring.jpa.defer-datasource-initialization=true +spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql + +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQL8Dialect +spring.jpa.properties.hibernate.show_sql=false +spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl + +spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver +spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:mysql://localhost:3306/oh_db} +spring.datasource.username=${OPTIMIZER_DB_USERNAME:oh_user} +spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} +spring.datasource.hikari.maximum-pool-size=20 + +management.endpoints.web.exposure.include=health,prometheus +management.endpoint.health.enabled=true diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql new file mode 100644 index 000000000..53062c5ad --- /dev/null +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -0,0 +1,53 @@ +-- Optimizer Service Schema +-- Compatible with MySQL (production) and H2 in MySQL mode (tests). +CREATE TABLE IF NOT EXISTS table_operations ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + status VARCHAR(20) NOT NULL, + created_at TIMESTAMP(6) NOT NULL, + scheduled_at TIMESTAMP(6), + job_id VARCHAR(255), + version BIGINT, + metrics TEXT, + PRIMARY KEY (id) +); + +CREATE TABLE IF NOT EXISTS table_stats ( + table_uuid VARCHAR(36) NOT NULL, + database_id VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + stats TEXT, + table_properties TEXT, + updated_at TIMESTAMP(6) NOT NULL, + PRIMARY KEY (table_uuid) +); + +CREATE TABLE IF NOT EXISTS table_stats_history ( + id BIGINT NOT NULL AUTO_INCREMENT, + table_uuid VARCHAR(36) NOT NULL, + database_id VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + stats TEXT, + recorded_at TIMESTAMP(6) NOT NULL, + PRIMARY KEY (id), + INDEX idx_tsh_table_uuid (table_uuid), + INDEX idx_tsh_recorded_at (recorded_at) +); + +CREATE TABLE IF NOT EXISTS table_operations_history ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + submitted_at TIMESTAMP(6) NOT NULL, + status VARCHAR(20) NOT NULL, + job_id VARCHAR(255), + result TEXT, + orphan_files_deleted INT, + orphan_bytes_deleted BIGINT, + PRIMARY KEY (id) +); diff --git a/services/optimizer/src/test/resources/application-test.properties b/services/optimizer/src/test/resources/application-test.properties new file mode 100644 index 000000000..97b7841dc --- /dev/null +++ b/services/optimizer/src/test/resources/application-test.properties @@ -0,0 +1,12 @@ +spring.datasource.url=jdbc:h2:mem:optimizer_test;MODE=MySQL;DATABASE_TO_LOWER=TRUE;DB_CLOSE_DELAY=-1 +spring.datasource.driver-class-name=org.h2.Driver +spring.datasource.username=sa +spring.datasource.password= + +spring.jpa.hibernate.ddl-auto=none +spring.sql.init.mode=always +spring.jpa.defer-datasource-initialization=true +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect +spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl + +spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql diff --git a/settings.gradle b/settings.gradle index 035e54349..cad06785e 100644 --- a/settings.gradle +++ b/settings.gradle @@ -49,6 +49,7 @@ include ':libs:datalayout' include ':services:common' include ':services:housetables' include ':services:jobs' +include ':services:optimizer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From 3c93d52f21ce82cc01ae37fef8ca5c1dba2522e1 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 3 Apr 2026 11:35:45 -0700 Subject: [PATCH 02/81] fix: address PR review feedback on optimizer data model - Remove OperationMetrics class and converter; stats are read directly from table_stats instead of duplicating into operations - Remove orphanFilesDeleted/orphanBytesDeleted from history entity, DTO, and schema; operation-specific data belongs in the result JSON - Add addedSizeBytes to CommitDelta for tracking write volume - Fix OperationType javadoc to describe current state, not roadmap - Fix TableOperationsHistoryRow javadoc: written on operation complete, not by Spark app directly - Add field comments to all DTOs and request objects Co-Authored-By: Claude Opus 4.6 --- .../optimizer/api/model/OperationMetrics.java | 24 ---------- .../optimizer/api/model/OperationType.java | 7 +-- .../api/model/TableOperationsDto.java | 9 +++- .../api/model/TableOperationsHistoryDto.java | 12 ++--- .../optimizer/api/model/TableStats.java | 1 + .../optimizer/api/model/TableStatsDto.java | 11 +++++ .../api/model/TableStatsHistoryDto.java | 11 +++++ .../model/UpsertTableOperationsRequest.java | 11 +++-- .../api/model/UpsertTableStatsRequest.java | 7 +++ .../config/OperationMetricsConverter.java | 44 ------------------- .../entity/TableOperationsHistoryRow.java | 20 +++------ .../optimizer/entity/TableOperationsRow.java | 10 ++--- .../main/resources/db/optimizer-schema.sql | 2 - 13 files changed, 58 insertions(+), 111 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java deleted file mode 100644 index d6f788fcc..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Denormalized stats snapshot captured by the Analyzer at analysis time. - * - *

Stored as JSON in the {@code metrics} column of {@code table_operations}. These values are - * point-in-time snapshots — they record what the Analyzer saw when it recommended the operation, - * not cumulative totals. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class OperationMetrics { - - private Long tableSizeBytes; - private Integer numFilesAdded; - private Integer numFilesDeleted; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java index 05e4a1e7b..8507bae12 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java @@ -1,11 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; -/** - * Maintenance operation types supported by the continuous optimizer. - * - *

Only {@code ORPHAN_FILES_DELETION} is currently implemented. Additional types will be added as - * they are built out. - */ +/** Maintenance operation types supported by the continuous optimizer. */ public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index 5eb5eaaa6..9c33d8907 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -19,8 +19,13 @@ public class TableOperationsDto { /** Stable table identity from the Tables Service. */ private String tableUuid; + /** Denormalized database name for display; not part of the primary key. */ private String databaseName; + + /** Denormalized table name for display; not part of the primary key. */ private String tableName; + + /** The type of maintenance operation (e.g. ORPHAN_FILES_DELETION). */ private OperationType operationType; /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ @@ -35,6 +40,6 @@ public class TableOperationsDto { /** Job ID returned by the Jobs Service after successful submission. */ private String jobId; - /** Denormalized stats snapshot captured at analysis time. */ - private OperationMetrics metrics; + /** Reserved for future per-operation metadata; currently unused. */ + private String metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 7dca34271..efc9bebbb 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -6,7 +6,7 @@ import lombok.Data; import lombok.NoArgsConstructor; -/** DTO for {@code table_operations_history} — append-only Spark job results. */ +/** DTO for {@code table_operations_history} — append-only operation results. */ @Data @Builder @NoArgsConstructor @@ -23,21 +23,15 @@ public class TableOperationsHistoryDto { private String tableName; private OperationType operationType; - /** When the Spark job was submitted / ran. */ + /** When the operation completed, as recorded by the complete endpoint. */ private Instant submittedAt; /** {@code SUCCESS} or {@code FAILED}. */ private OperationHistoryStatus status; - /** Spark job ID. */ + /** Job ID from the Jobs Service. */ private String jobId; /** Job result payload; both fields null on success. */ private JobResult result; - - /** Number of orphan files deleted; null for non-OFD operations or before completion. */ - private Integer orphanFilesDeleted; - - /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ - private Long orphanBytesDeleted; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index cb77d994f..51aa8a712 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -43,6 +43,7 @@ public static class SnapshotMetrics { public static class CommitDelta { private Long numFilesAdded; private Long numFilesDeleted; + private Long addedSizeBytes; private Long deletedSizeBytes; } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 1663d5ab0..a668af434 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -14,10 +14,21 @@ @AllArgsConstructor public class TableStatsDto { + /** Stable Iceberg table UUID. Primary key of the stats row. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseId; + + /** Denormalized table name for display. */ private String tableName; + + /** Combined snapshot + delta stats payload, stored as JSON. */ private TableStats stats; + + /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; + + /** When this row was last written. Used for staleness monitoring. */ private Instant updatedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 142f00245..0604e07de 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -13,10 +13,21 @@ @AllArgsConstructor public class TableStatsHistoryDto { + /** Auto-increment primary key. */ private Long id; + + /** Stable Iceberg table UUID. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseId; + + /** Denormalized table name for display. */ private String tableName; + + /** Snapshot + delta stats from this commit event. */ private TableStats stats; + + /** When this history row was recorded. */ private Instant recordedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java index 19dd1baac..21174c337 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java @@ -9,8 +9,7 @@ * PUT request body for {@code /v1/table-operations/{id}}. * *

The Analyzer supplies the operation {@code id} (client-generated UUID) in the path and all - * table-identifying fields in this body. The service upserts by {@code id}: creates on first call, - * updates {@code metrics} on subsequent calls with the same {@code id}. + * table-identifying fields in this body. The service creates the row on first call. */ @Data @Builder @@ -18,9 +17,15 @@ @AllArgsConstructor public class UpsertTableOperationsRequest { + /** Stable Iceberg table UUID identifying the target table. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseName; + + /** Denormalized table name for display. */ private String tableName; + + /** The type of maintenance operation to create. */ private OperationType operationType; - private OperationMetrics metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 3214a85a6..721c3deaf 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -18,8 +18,15 @@ @AllArgsConstructor public class UpsertTableStatsRequest { + /** Denormalized database name for display. */ private String databaseId; + + /** Denormalized table name for display. */ private String tableName; + + /** Combined snapshot + delta stats payload from this commit. */ private TableStats stats; + + /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java deleted file mode 100644 index 27f0882f5..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.linkedin.openhouse.optimizer.config; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; -import java.io.IOException; -import javax.persistence.AttributeConverter; -import javax.persistence.Converter; - -/** - * JPA {@link AttributeConverter} that serializes {@link OperationMetrics} to/from a JSON string. - */ -@Converter -public class OperationMetricsConverter implements AttributeConverter { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @Override - public String convertToDatabaseColumn(OperationMetrics attribute) { - // Null metrics are valid for PENDING operations that have not yet produced output. - if (attribute == null) { - return null; - } - try { - return OBJECT_MAPPER.writeValueAsString(attribute); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize OperationMetrics to JSON", e); - } - } - - @Override - public OperationMetrics convertToEntityAttribute(String dbData) { - // Null is stored for PENDING rows; return null so the entity reflects that state. - if (dbData == null) { - return null; - } - try { - return OBJECT_MAPPER.readValue(dbData, OperationMetrics.class); - } catch (IOException e) { - throw new IllegalStateException( - "Failed to deserialize OperationMetrics from JSON: " + dbData, e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index 6a47b5022..e7493024c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -21,12 +21,12 @@ import lombok.NoArgsConstructor; /** - * Append-only record of a completed Spark maintenance job. + * Append-only record of a completed maintenance operation. * - *

Written by the Spark app after each table's operation finishes. The {@code id} is the same - * UUID as the originating {@code table_operations.id}, tying each history entry directly back to - * the specific operation cycle that produced it. Multiple runs of the same operation on the same - * table produce multiple rows (each cycle gets a new UUID from the Analyzer). + *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the + * originating {@code table_operations.id}, tying each history entry back to the operation cycle + * that produced it. Multiple runs of the same operation on the same table produce multiple rows + * (each cycle gets a new UUID from the Analyzer). */ @Entity @Table( @@ -63,7 +63,7 @@ public class TableOperationsHistoryRow { @Column(name = "operation_type", nullable = false, length = 50) private OperationType operationType; - /** When the Spark job was submitted / ran, as reported by the job itself. */ + /** When the operation completed, as recorded by the complete endpoint. */ @Column(name = "submitted_at", nullable = false) private Instant submittedAt; @@ -80,12 +80,4 @@ public class TableOperationsHistoryRow { @Convert(converter = JobResultConverter.class) @Column(name = "result") private JobResult result; - - /** Number of orphan files deleted by the Spark job; null for non-OFD operations. */ - @Column(name = "orphan_files_deleted") - private Integer orphanFilesDeleted; - - /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ - @Column(name = "orphan_bytes_deleted") - private Long orphanBytesDeleted; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index 9d835aa20..e5493b510 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -1,12 +1,9 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.config.OperationMetricsConverter; import java.time.Instant; import javax.persistence.Column; -import javax.persistence.Convert; import javax.persistence.Entity; import javax.persistence.EnumType; import javax.persistence.Enumerated; @@ -90,10 +87,9 @@ public class TableOperationsRow { private Long version; /** - * Denormalized stats snapshot captured at analysis time: table size, snapshot count, and file - * counts as of the moment the Analyzer ran. + * Reserved for future per-operation metadata. Stored as JSON text; currently unused. The Analyzer + * reads stats directly from {@code table_stats} instead of duplicating them here. */ - @Convert(converter = OperationMetricsConverter.class) @Column(name = "metrics") - private OperationMetrics metrics; + private String metrics; } diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 53062c5ad..098380e7f 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -47,7 +47,5 @@ CREATE TABLE IF NOT EXISTS table_operations_history ( status VARCHAR(20) NOT NULL, job_id VARCHAR(255), result TEXT, - orphan_files_deleted INT, - orphan_bytes_deleted BIGINT, PRIMARY KEY (id) ); From d419eb31f0449b5893739391047cf1af013cc6e3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 10:57:51 -0700 Subject: [PATCH 03/81] feat(optimizer): add repositories and repository tests Spring Data JPA repositories for all four optimizer tables with filtered query support. Includes tests exercising save/find, filtered queries, upsert semantics, and append-only history. Co-Authored-By: Claude Opus 4.6 --- .../TableOperationsHistoryRepository.java | 60 ++++++ .../repository/TableOperationsRepository.java | 33 +++ .../TableStatsHistoryRepository.java | 41 ++++ .../repository/TableStatsRepository.java | 25 +++ .../OptimizerServiceContextTest.java | 19 ++ .../TableOperationsHistoryRepositoryTest.java | 189 ++++++++++++++++++ .../TableOperationsRepositoryTest.java | 135 +++++++++++++ .../TableStatsHistoryRepositoryTest.java | 127 ++++++++++++ .../repository/TableStatsRepositoryTest.java | 141 +++++++++++++ 9 files changed, 770 insertions(+) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java new file mode 100644 index 000000000..2ba5bdf7a --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -0,0 +1,60 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; +import org.springframework.stereotype.Repository; + +/** + * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is auto-increment {@code id}. + */ +@Repository +public interface TableOperationsHistoryRepository + extends JpaRepository { + + /** + * Return the most recent history rows for a table UUID, newest first, up to {@code limit} rows. + * + * @param tableUuid the stable table UUID + * @param limit maximum number of rows to return + * @return history rows ordered by {@code submitted_at} descending + */ + @Query( + value = + "SELECT * FROM table_operations_history " + + "WHERE table_uuid = :tableUuid " + + "ORDER BY submitted_at DESC LIMIT :limit", + nativeQuery = true) + List find( + @Param("tableUuid") String tableUuid, @Param("limit") int limit); + + /** + * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Every parameter is optional — pass {@code null} to skip that filter. + */ + @Query( + "SELECT r FROM TableOperationsHistoryRow r " + + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:since IS NULL OR r.submittedAt >= :since) " + + "AND (:until IS NULL OR r.submittedAt <= :until) " + + "ORDER BY r.submittedAt DESC") + List findFiltered( + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid, + @Param("operationType") OperationType operationType, + @Param("status") OperationHistoryStatus status, + @Param("since") Instant since, + @Param("until") Instant until, + Pageable pageable); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java new file mode 100644 index 000000000..69476991f --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -0,0 +1,33 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; +import org.springframework.stereotype.Repository; + +/** Repository for {@link TableOperationsRow}. PK is the client-generated UUID {@code id}. */ +@Repository +public interface TableOperationsRepository extends JpaRepository { + + /** + * Return operations matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. No filters returns all rows. + */ + @Query( + "SELECT r FROM TableOperationsRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List findFiltered( + @Param("operationType") OperationType operationType, + @Param("status") OperationStatus status, + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java new file mode 100644 index 000000000..c6ec3befd --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -0,0 +1,41 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Append-only repository for per-commit stats history rows. */ +public interface TableStatsHistoryRepository extends JpaRepository { + + /** + * Return history rows for a table, newest first. + * + * @param tableUuid the stable table UUID + * @param pageable use {@code PageRequest.of(0, limit)} to cap results + */ + @Query( + "SELECT r FROM TableStatsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "ORDER BY r.recordedAt DESC") + List findByTableUuid( + @Param("tableUuid") String tableUuid, Pageable pageable); + + /** + * Return history rows for a table recorded at or after {@code since}, newest first. + * + * @param tableUuid the stable table UUID + * @param since inclusive lower bound on recorded_at + * @param pageable use {@code PageRequest.of(0, limit)} to cap results + */ + @Query( + "SELECT r FROM TableStatsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "AND r.recordedAt >= :since " + + "ORDER BY r.recordedAt DESC") + List findByTableUuidSince( + @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java new file mode 100644 index 000000000..6c071cf5b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for reading and writing {@code table_stats} rows. */ +public interface TableStatsRepository extends JpaRepository { + + /** + * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. No filters returns all rows. + */ + @Query( + "SELECT r FROM TableStatsRow r " + + "WHERE (:databaseId IS NULL OR r.databaseId = :databaseId) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List findFiltered( + @Param("databaseId") String databaseId, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid); +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java new file mode 100644 index 000000000..abb89ec42 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java @@ -0,0 +1,19 @@ +package com.linkedin.openhouse.optimizer; + +import org.junit.jupiter.api.Test; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; + +/** + * Validates that the Spring application context loads successfully against the H2 schema. This test + * exercises schema-SQL-init, JPA entity scanning, and repository wiring. + */ +@SpringBootTest +@ActiveProfiles("test") +class OptimizerServiceContextTest { + + @Test + void contextLoads() { + // Context load is the assertion — no additional assertions needed. + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java new file mode 100644 index 000000000..9bde34334 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -0,0 +1,189 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import java.time.Instant; +import java.util.List; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableOperationsHistoryRepositoryTest { + + @Autowired TableOperationsHistoryRepository repository; + + @Test + void appendAndFindByTableUuid() { + Instant t1 = Instant.parse("2024-01-01T10:00:00Z"); + Instant t2 = Instant.parse("2024-01-02T10:00:00Z"); + String tableUuid = UUID.randomUUID().toString(); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(t1) + .status(OperationHistoryStatus.SUCCESS) + .jobId("job-001") + .build()); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(t2) + .status(OperationHistoryStatus.FAILED) + .jobId("job-002") + .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) + .build()); + + List rows = repository.find(tableUuid, 10); + + assertThat(rows).hasSize(2); + // Newest first + assertThat(rows.get(0).getJobId()).isEqualTo("job-002"); + assertThat(rows.get(1).getJobId()).isEqualTo("job-001"); + } + + @Test + void appendIsNonDestructive_multipleRunsRetained() { + Instant now = Instant.now(); + String tableUuid = UUID.randomUUID().toString(); + for (int i = 0; i < 3; i++) { + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now.plusSeconds(i)) + .status(OperationHistoryStatus.SUCCESS) + .build()); + } + + List rows = repository.find(tableUuid, 10); + assertThat(rows).hasSize(3); + } + + @Test + void find_respectsLimit() { + Instant now = Instant.now(); + String tableUuid = UUID.randomUUID().toString(); + for (int i = 0; i < 5; i++) { + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl3") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now.plusSeconds(i)) + .status(OperationHistoryStatus.SUCCESS) + .build()); + } + + List rows = repository.find(tableUuid, 3); + assertThat(rows).hasSize(3); + } + + @Test + void findFiltered_noParams_returnsAll() { + Instant now = Instant.now(); + String uuid1 = UUID.randomUUID().toString(); + String uuid2 = UUID.randomUUID().toString(); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(uuid1) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now) + .status(OperationHistoryStatus.SUCCESS) + .build()); + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(uuid2) + .databaseName("db2") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now.plusSeconds(1)) + .status(OperationHistoryStatus.FAILED) + .build()); + + List rows = + repository.findFiltered(null, null, null, null, null, null, null, PageRequest.of(0, 100)); + assertThat(rows).hasSize(2); + // Newest first + assertThat(rows.get(0).getStatus()).isEqualTo(OperationHistoryStatus.FAILED); + } + + @Test + void findFiltered_byStatusAndTimeWindow() { + Instant old = Instant.parse("2024-01-01T00:00:00Z"); + Instant recent = Instant.parse("2024-06-01T00:00:00Z"); + String tableUuid = UUID.randomUUID().toString(); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(old) + .status(OperationHistoryStatus.SUCCESS) + .build()); + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(recent) + .status(OperationHistoryStatus.FAILED) + .build()); + + // Filter by status + List failed = + repository.findFiltered( + null, + null, + null, + null, + OperationHistoryStatus.FAILED, + null, + null, + PageRequest.of(0, 100)); + assertThat(failed).hasSize(1); + assertThat(failed.get(0).getSubmittedAt()).isEqualTo(recent); + + // Filter by time window + Instant cutoff = Instant.parse("2024-03-01T00:00:00Z"); + List afterCutoff = + repository.findFiltered(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); + assertThat(afterCutoff).hasSize(1); + assertThat(afterCutoff.get(0).getSubmittedAt()).isEqualTo(recent); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java new file mode 100644 index 000000000..d7b8ee0b8 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -0,0 +1,135 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableOperationsRepositoryTest { + + @Autowired TableOperationsRepository repository; + + @Test + void saveAndFindById() { + String id = UUID.randomUUID().toString(); + + TableOperationsRow row = + TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + + repository.save(row); + + Optional found = repository.findById(id); + assertThat(found).isPresent(); + assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING); + } + + @Test + void findFiltered_noParams_returnsAll() { + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .build()); + + List rows = repository.findFiltered(null, null, null, null, null); + assertThat(rows).hasSize(2); + } + + @Test + void findFiltered_byStatus() { + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .build()); + + List pending = + repository.findFiltered(null, OperationStatus.PENDING, null, null, null); + assertThat(pending).hasSize(1); + assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); + + List scheduled = + repository.findFiltered(null, OperationStatus.SCHEDULED, null, null, null); + assertThat(scheduled).hasSize(1); + assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); + } + + @Test + void findFiltered_byDatabaseAndTable() { + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db2") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + + assertThat(repository.findFiltered(null, null, "db1", null, null)).hasSize(1); + assertThat(repository.findFiltered(null, null, "db2", "tbl2", null)).hasSize(1); + assertThat(repository.findFiltered(null, null, "db1", "tbl2", null)).isEmpty(); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java new file mode 100644 index 000000000..fb86762dc --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -0,0 +1,127 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.List; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableStatsHistoryRepositoryTest { + + @Autowired TableStatsHistoryRepository repository; + + @Test + void saveAndFindByTableUuid() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + repository.save(buildRow(tableUuid, "db1", "tbl1", 10L, 2L, now.minus(2, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); + + List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 100)); + + assertThat(rows).hasSize(3); + // newest first + assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); + assertThat(rows.get(2).getStats().getDelta().getNumFilesAdded()).isEqualTo(10L); + } + + @Test + void findByTableUuid_respectsLimit() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + for (int i = 0; i < 5; i++) { + repository.save(buildRow(tableUuid, "db1", "tbl1", i, 0L, now.minus(i, ChronoUnit.HOURS))); + } + + List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 3)); + + assertThat(rows).hasSize(3); + } + + @Test + void findByTableUuidSince_filtersOlderRows() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + Instant cutoff = now.minus(90, ChronoUnit.MINUTES); + + repository.save(buildRow(tableUuid, "db1", "tbl1", 10L, 2L, now.minus(2, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); + + List rows = + repository.findByTableUuidSince(tableUuid, cutoff, PageRequest.of(0, 100)); + + // only the 2 rows within the last 90 minutes + assertThat(rows).hasSize(2); + assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); + } + + @Test + void findByTableUuid_isolatesByTableUuid() { + String uuid1 = UUID.randomUUID().toString(); + String uuid2 = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + repository.save(buildRow(uuid1, "db1", "tbl1", 10L, 0L, now)); + repository.save(buildRow(uuid2, "db2", "tbl2", 20L, 0L, now)); + + assertThat(repository.findByTableUuid(uuid1, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.findByTableUuid(uuid2, PageRequest.of(0, 100))).hasSize(1); + } + + @Test + void autoIncrementId() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + TableStatsHistoryRow row1 = repository.save(buildRow(tableUuid, "db1", "tbl1", 1L, 0L, now)); + TableStatsHistoryRow row2 = repository.save(buildRow(tableUuid, "db1", "tbl1", 2L, 0L, now)); + + assertThat(row1.getId()).isNotNull(); + assertThat(row2.getId()).isNotNull(); + assertThat(row2.getId()).isGreaterThan(row1.getId()); + } + + private static TableStatsHistoryRow buildRow( + String tableUuid, + String databaseId, + String tableName, + long numFilesAdded, + long numFilesDeleted, + Instant recordedAt) { + return TableStatsHistoryRow.builder() + .tableUuid(tableUuid) + .databaseId(databaseId) + .tableName(tableName) + .stats( + TableStats.builder() + .snapshot( + TableStats.SnapshotMetrics.builder() + .clusterId("cl1") + .tableSizeBytes(1024L) + .build()) + .delta( + TableStats.CommitDelta.builder() + .numFilesAdded(numFilesAdded) + .numFilesDeleted(numFilesDeleted) + .build()) + .build()) + .recordedAt(recordedAt) + .build(); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java new file mode 100644 index 000000000..5efb49148 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -0,0 +1,141 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.time.Instant; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableStatsRepositoryTest { + + @Autowired TableStatsRepository repository; + + @Test + void saveAndFindById() { + String tableUuid = UUID.randomUUID().toString(); + TableStats stats = + TableStats.builder() + .snapshot( + TableStats.SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build()) + .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(1L).build()) + .build(); + + repository.save( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId("db1") + .tableName("tbl1") + .stats(stats) + .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) + .updatedAt(Instant.now()) + .build()); + + Optional found = repository.findById(tableUuid); + assertThat(found).isPresent(); + assertThat(found.get().getDatabaseId()).isEqualTo("db1"); + assertThat(found.get().getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(found.get().getTableProperties()) + .containsEntry("maintenance.optimizer.ofd.enabled", "true"); + } + + @Test + void upsert_overwritesPreviousStats() { + String tableUuid = UUID.randomUUID().toString(); + + repository.save( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + repository.save( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + assertThat(repository.findAll()).hasSize(1); + assertThat(repository.findById(tableUuid).get().getStats().getSnapshot().getTableSizeBytes()) + .isEqualTo(200L); + } + + @Test + void findFiltered_noParams_returnsAll() { + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db2") + .tableName("tbl2") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + assertThat(repository.findFiltered(null, null, null)).hasSize(2); + } + + @Test + void findFiltered_byDatabase() { + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db2") + .tableName("tbl2") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + assertThat(repository.findFiltered("db1", null, null)).hasSize(1); + assertThat(repository.findFiltered("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); + } +} From 7ff3b4360877580f395650223c19542849a5e1f7 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 11:35:45 -0700 Subject: [PATCH 04/81] =?UTF-8?q?fix:=20consolidate=20repo=20methods=20?= =?UTF-8?q?=E2=80=94=20single=20find=20with=20optional=20filters?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address PR review comments: rename findFiltered → find across all repos, remove redundant findByTableUuid/findByTableUuidSince from history repos, add explicit assertion to context test. Co-Authored-By: Claude Opus 4.6 --- .../TableOperationsHistoryRepository.java | 18 +-------------- .../repository/TableOperationsRepository.java | 2 +- .../TableStatsHistoryRepository.java | 22 +++++-------------- .../repository/TableStatsRepository.java | 2 +- .../OptimizerServiceContextTest.java | 8 ++++++- .../TableOperationsHistoryRepositoryTest.java | 19 +++++++++------- .../TableOperationsRepositoryTest.java | 18 +++++++-------- .../TableStatsHistoryRepositoryTest.java | 19 ++++++++-------- .../repository/TableStatsRepositoryTest.java | 10 ++++----- 9 files changed, 49 insertions(+), 69 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 2ba5bdf7a..71ab1cde4 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -18,22 +18,6 @@ public interface TableOperationsHistoryRepository extends JpaRepository { - /** - * Return the most recent history rows for a table UUID, newest first, up to {@code limit} rows. - * - * @param tableUuid the stable table UUID - * @param limit maximum number of rows to return - * @return history rows ordered by {@code submitted_at} descending - */ - @Query( - value = - "SELECT * FROM table_operations_history " - + "WHERE table_uuid = :tableUuid " - + "ORDER BY submitted_at DESC LIMIT :limit", - nativeQuery = true) - List find( - @Param("tableUuid") String tableUuid, @Param("limit") int limit); - /** * Return history rows matching the given filters, ordered by {@code submittedAt} descending. * Every parameter is optional — pass {@code null} to skip that filter. @@ -48,7 +32,7 @@ List find( + "AND (:since IS NULL OR r.submittedAt >= :since) " + "AND (:until IS NULL OR r.submittedAt <= :until) " + "ORDER BY r.submittedAt DESC") - List findFiltered( + List find( @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 69476991f..891322134 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -24,7 +24,7 @@ public interface TableOperationsRepository extends JpaRepository findFiltered( + List find( @Param("operationType") OperationType operationType, @Param("status") OperationStatus status, @Param("databaseName") String databaseName, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java index c6ec3befd..767d60c22 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -12,30 +12,18 @@ public interface TableStatsHistoryRepository extends JpaRepository { /** - * Return history rows for a table, newest first. + * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the + * time filter. * * @param tableUuid the stable table UUID + * @param since inclusive lower bound on recorded_at; {@code null} to skip * @param pageable use {@code PageRequest.of(0, limit)} to cap results */ @Query( "SELECT r FROM TableStatsHistoryRow r " + "WHERE r.tableUuid = :tableUuid " + + "AND (:since IS NULL OR r.recordedAt >= :since) " + "ORDER BY r.recordedAt DESC") - List findByTableUuid( - @Param("tableUuid") String tableUuid, Pageable pageable); - - /** - * Return history rows for a table recorded at or after {@code since}, newest first. - * - * @param tableUuid the stable table UUID - * @param since inclusive lower bound on recorded_at - * @param pageable use {@code PageRequest.of(0, limit)} to cap results - */ - @Query( - "SELECT r FROM TableStatsHistoryRow r " - + "WHERE r.tableUuid = :tableUuid " - + "AND r.recordedAt >= :since " - + "ORDER BY r.recordedAt DESC") - List findByTableUuidSince( + List find( @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 6c071cf5b..ecae70feb 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -18,7 +18,7 @@ public interface TableStatsRepository extends JpaRepository findFiltered( + List find( @Param("databaseId") String databaseId, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java index abb89ec42..fa373c57d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java @@ -1,7 +1,11 @@ package com.linkedin.openhouse.optimizer; +import static org.assertj.core.api.Assertions.assertThat; + import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.ApplicationContext; import org.springframework.test.context.ActiveProfiles; /** @@ -12,8 +16,10 @@ @ActiveProfiles("test") class OptimizerServiceContextTest { + @Autowired ApplicationContext context; + @Test void contextLoads() { - // Context load is the assertion — no additional assertions needed. + assertThat(context).isNotNull(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index 9bde34334..1a35a8fda 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -54,7 +54,8 @@ void appendAndFindByTableUuid() { .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) .build()); - List rows = repository.find(tableUuid, 10); + List rows = + repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); assertThat(rows).hasSize(2); // Newest first @@ -79,7 +80,8 @@ void appendIsNonDestructive_multipleRunsRetained() { .build()); } - List rows = repository.find(tableUuid, 10); + List rows = + repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); assertThat(rows).hasSize(3); } @@ -100,12 +102,13 @@ void find_respectsLimit() { .build()); } - List rows = repository.find(tableUuid, 3); + List rows = + repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @Test - void findFiltered_noParams_returnsAll() { + void find_noParams_returnsAll() { Instant now = Instant.now(); String uuid1 = UUID.randomUUID().toString(); String uuid2 = UUID.randomUUID().toString(); @@ -132,14 +135,14 @@ void findFiltered_noParams_returnsAll() { .build()); List rows = - repository.findFiltered(null, null, null, null, null, null, null, PageRequest.of(0, 100)); + repository.find(null, null, null, null, null, null, null, PageRequest.of(0, 100)); assertThat(rows).hasSize(2); // Newest first assertThat(rows.get(0).getStatus()).isEqualTo(OperationHistoryStatus.FAILED); } @Test - void findFiltered_byStatusAndTimeWindow() { + void find_byStatusAndTimeWindow() { Instant old = Instant.parse("2024-01-01T00:00:00Z"); Instant recent = Instant.parse("2024-06-01T00:00:00Z"); String tableUuid = UUID.randomUUID().toString(); @@ -167,7 +170,7 @@ void findFiltered_byStatusAndTimeWindow() { // Filter by status List failed = - repository.findFiltered( + repository.find( null, null, null, @@ -182,7 +185,7 @@ void findFiltered_byStatusAndTimeWindow() { // Filter by time window Instant cutoff = Instant.parse("2024-03-01T00:00:00Z"); List afterCutoff = - repository.findFiltered(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); + repository.find(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); assertThat(afterCutoff).hasSize(1); assertThat(afterCutoff.get(0).getSubmittedAt()).isEqualTo(recent); } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index d7b8ee0b8..b1342b12d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -45,7 +45,7 @@ void saveAndFindById() { } @Test - void findFiltered_noParams_returnsAll() { + void find_noParams_returnsAll() { repository.save( TableOperationsRow.builder() .id(UUID.randomUUID().toString()) @@ -67,12 +67,12 @@ void findFiltered_noParams_returnsAll() { .createdAt(Instant.now()) .build()); - List rows = repository.findFiltered(null, null, null, null, null); + List rows = repository.find(null, null, null, null, null); assertThat(rows).hasSize(2); } @Test - void findFiltered_byStatus() { + void find_byStatus() { repository.save( TableOperationsRow.builder() .id(UUID.randomUUID().toString()) @@ -95,18 +95,18 @@ void findFiltered_byStatus() { .build()); List pending = - repository.findFiltered(null, OperationStatus.PENDING, null, null, null); + repository.find(null, OperationStatus.PENDING, null, null, null); assertThat(pending).hasSize(1); assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); List scheduled = - repository.findFiltered(null, OperationStatus.SCHEDULED, null, null, null); + repository.find(null, OperationStatus.SCHEDULED, null, null, null); assertThat(scheduled).hasSize(1); assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); } @Test - void findFiltered_byDatabaseAndTable() { + void find_byDatabaseAndTable() { repository.save( TableOperationsRow.builder() .id(UUID.randomUUID().toString()) @@ -128,8 +128,8 @@ void findFiltered_byDatabaseAndTable() { .createdAt(Instant.now()) .build()); - assertThat(repository.findFiltered(null, null, "db1", null, null)).hasSize(1); - assertThat(repository.findFiltered(null, null, "db2", "tbl2", null)).hasSize(1); - assertThat(repository.findFiltered(null, null, "db1", "tbl2", null)).isEmpty(); + assertThat(repository.find(null, null, "db1", null, null)).hasSize(1); + assertThat(repository.find(null, null, "db2", "tbl2", null)).hasSize(1); + assertThat(repository.find(null, null, "db1", "tbl2", null)).isEmpty(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index fb86762dc..a76c7155d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -23,7 +23,7 @@ class TableStatsHistoryRepositoryTest { @Autowired TableStatsHistoryRepository repository; @Test - void saveAndFindByTableUuid() { + void saveAndFind() { String tableUuid = UUID.randomUUID().toString(); Instant now = Instant.now(); @@ -31,7 +31,7 @@ void saveAndFindByTableUuid() { repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); - List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 100)); + List rows = repository.find(tableUuid, null, PageRequest.of(0, 100)); assertThat(rows).hasSize(3); // newest first @@ -40,7 +40,7 @@ void saveAndFindByTableUuid() { } @Test - void findByTableUuid_respectsLimit() { + void find_respectsLimit() { String tableUuid = UUID.randomUUID().toString(); Instant now = Instant.now(); @@ -48,13 +48,13 @@ void findByTableUuid_respectsLimit() { repository.save(buildRow(tableUuid, "db1", "tbl1", i, 0L, now.minus(i, ChronoUnit.HOURS))); } - List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 3)); + List rows = repository.find(tableUuid, null, PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @Test - void findByTableUuidSince_filtersOlderRows() { + void find_withSince_filtersOlderRows() { String tableUuid = UUID.randomUUID().toString(); Instant now = Instant.now(); Instant cutoff = now.minus(90, ChronoUnit.MINUTES); @@ -63,8 +63,7 @@ void findByTableUuidSince_filtersOlderRows() { repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); - List rows = - repository.findByTableUuidSince(tableUuid, cutoff, PageRequest.of(0, 100)); + List rows = repository.find(tableUuid, cutoff, PageRequest.of(0, 100)); // only the 2 rows within the last 90 minutes assertThat(rows).hasSize(2); @@ -72,7 +71,7 @@ void findByTableUuidSince_filtersOlderRows() { } @Test - void findByTableUuid_isolatesByTableUuid() { + void find_isolatesByTableUuid() { String uuid1 = UUID.randomUUID().toString(); String uuid2 = UUID.randomUUID().toString(); Instant now = Instant.now(); @@ -80,8 +79,8 @@ void findByTableUuid_isolatesByTableUuid() { repository.save(buildRow(uuid1, "db1", "tbl1", 10L, 0L, now)); repository.save(buildRow(uuid2, "db2", "tbl2", 20L, 0L, now)); - assertThat(repository.findByTableUuid(uuid1, PageRequest.of(0, 100))).hasSize(1); - assertThat(repository.findByTableUuid(uuid2, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.find(uuid1, null, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.find(uuid2, null, PageRequest.of(0, 100))).hasSize(1); } @Test diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index 5efb49148..a8ac1cbbb 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -83,7 +83,7 @@ void upsert_overwritesPreviousStats() { } @Test - void findFiltered_noParams_returnsAll() { + void find_noParams_returnsAll() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) @@ -107,11 +107,11 @@ void findFiltered_noParams_returnsAll() { .updatedAt(Instant.now()) .build()); - assertThat(repository.findFiltered(null, null, null)).hasSize(2); + assertThat(repository.find(null, null, null)).hasSize(2); } @Test - void findFiltered_byDatabase() { + void find_byDatabase() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) @@ -135,7 +135,7 @@ void findFiltered_byDatabase() { .updatedAt(Instant.now()) .build()); - assertThat(repository.findFiltered("db1", null, null)).hasSize(1); - assertThat(repository.findFiltered("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); + assertThat(repository.find("db1", null, null)).hasSize(1); + assertThat(repository.find("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); } } From f7f6812639a9a478d6abe9f003f17464af1f80d0 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 10:59:15 -0700 Subject: [PATCH 05/81] feat(optimizer): add REST service layer, controllers, and shared module Service interface and implementation for all optimizer CRUD operations including complete-operation lifecycle, stats upsert with history double-write, and filtered queries. Three REST controllers expose the endpoints. The apps/optimizer shared module provides lightweight entity/repo copies for the analyzer and scheduler apps. Co-Authored-By: Claude Opus 4.6 --- apps/optimizer/build.gradle | 13 ++ .../entity/TableOperationHistoryRow.java | 37 ++++ .../optimizer/entity/TableOperationRow.java | 55 +++++ .../optimizer/entity/TableStatsRow.java | 53 +++++ .../openhouse/optimizer/model/TableStats.java | 45 ++++ .../TableOperationHistoryRepository.java | 23 ++ .../repository/TableOperationsRepository.java | 75 +++++++ .../repository/TableStatsRepository.java | 26 +++ .../controller/TableOperationsController.java | 66 ++++++ .../TableOperationsHistoryController.java | 60 ++++++ .../api/controller/TableStatsController.java | 69 ++++++ .../api/model/CompleteOperationRequest.java | 6 - .../service/OptimizerDataService.java | 98 +++++++++ .../service/OptimizerDataServiceImpl.java | 202 ++++++++++++++++++ settings.gradle | 1 + 15 files changed, 823 insertions(+), 6 deletions(-) create mode 100644 apps/optimizer/build.gradle create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java diff --git a/apps/optimizer/build.gradle b/apps/optimizer/build.gradle new file mode 100644 index 000000000..f14969274 --- /dev/null +++ b/apps/optimizer/build.gradle @@ -0,0 +1,13 @@ +plugins { + id 'openhouse.java-minimal-conventions' +} + +// Avoid build-directory collision with services:optimizer (same project.name 'optimizer'). +buildDir = "${rootProject.buildDir}/apps-optimizer" + +dependencies { + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' + testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' + testRuntimeOnly 'com.h2database:h2' +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java new file mode 100644 index 000000000..4e638e2e1 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java @@ -0,0 +1,37 @@ +package com.linkedin.openhouse.optimizer.entity; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** Lightweight JPA entity for reading {@code table_operations_history} rows. */ +@Entity +@Table(name = "table_operations_history") +@Getter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationHistoryRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "operation_type", nullable = false, length = 50) + private String operationType; + + @Column(name = "submitted_at", nullable = false) + private Instant submittedAt; + + @Column(name = "status", nullable = false, length = 20) + private String status; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java new file mode 100644 index 000000000..fc0104604 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.optimizer.entity; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +/** JPA entity mapping to the {@code table_operations} table in the optimizer DB. */ +@Entity +@Table(name = "table_operations") +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 255) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Column(name = "operation_type", nullable = false, length = 50) + private String operationType; + + @Column(name = "status", nullable = false, length = 20) + private String status; + + @Column(name = "created_at") + private Instant createdAt; + + @Column(name = "scheduled_at") + private Instant scheduledAt; + + @Column(name = "job_id", length = 255) + private String jobId; + + /** Plain version column — not managed by JPA optimistic locking. */ + @Column(name = "version") + private Long version; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java new file mode 100644 index 000000000..5cdf16a97 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -0,0 +1,53 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import java.util.Map; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every + * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table(name = "table_stats") +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsRow { + + @Id + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_id", nullable = false, length = 255) + private String databaseId; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Type(type = "json") + @Column(name = "table_properties", columnDefinition = "TEXT") + private Map tableProperties; + + @Column(name = "updated_at", nullable = false) + private Instant updatedAt; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java new file mode 100644 index 000000000..5e0f51468 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -0,0 +1,45 @@ +package com.linkedin.openhouse.optimizer.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Combined stats payload stored as a single JSON blob per table in {@code table_stats}. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long deletedSizeBytes; + } +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java new file mode 100644 index 000000000..a9434b4b7 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -0,0 +1,23 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Repository for reading {@code table_operations_history} in the Analyzer. */ +public interface TableOperationHistoryRepository + extends JpaRepository { + + /** + * Returns all history rows for an operation type, newest first. Loaded once per analysis run and + * grouped in memory by {@code tableUuid} to eliminate per-table N+1 queries in the circuit + * breaker check. + */ + @Query( + "SELECT r FROM TableOperationHistoryRow r " + + "WHERE r.operationType = :opType " + + "ORDER BY r.submittedAt DESC") + List findAllByOperationType(@Param("opType") String operationType); +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java new file mode 100644 index 000000000..404aaf873 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -0,0 +1,75 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import java.time.Instant; +import java.util.Collection; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Modifying; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ +public interface TableOperationsRepository extends JpaRepository { + + /** + * Returns rows for the given operation type whose status is in {@code statuses}. Used by the + * Scheduler to load all PENDING rows in one query. + */ + @Query( + "SELECT r FROM TableOperationRow r WHERE r.operationType = :type" + + " AND r.status IN :statuses") + List findByTypeAndStatuses( + @Param("type") String operationType, @Param("statuses") Collection statuses); + + /** + * Returns all rows for the given operation type regardless of status. Used by the Analyzer to + * find the most recent row per table_uuid for scheduling decisions. + */ + @Query("SELECT r FROM TableOperationRow r WHERE r.operationType = :type") + List findByType(@Param("type") String operationType); + + /** + * Cancel older duplicate PENDING rows for the same (table_uuid, operation_type), keeping only the + * row identified by {@code keepId}. Called by the Scheduler before claiming to prevent duplicate + * job submissions from concurrent Analyzer runs. + * + * @return the number of rows marked CANCELED + */ + @Modifying + @Query( + "UPDATE TableOperationRow r SET r.status = 'CANCELED' " + + "WHERE r.tableUuid = :tableUuid AND r.operationType = :opType " + + "AND r.status = 'PENDING' AND r.id != :keepId") + int cancelDuplicatePending( + @Param("tableUuid") String tableUuid, + @Param("opType") String operationType, + @Param("keepId") String keepId); + + /** + * Atomically claim a PENDING row by flipping its status to SCHEDULING. + * + *

The {@code version} guard prevents double-scheduling when multiple scheduler instances run + * concurrently. Returns 1 if the claim succeeded, 0 if the row was already claimed by another + * instance. + */ + @Modifying(flushAutomatically = true, clearAutomatically = true) + @Query( + "UPDATE TableOperationRow r SET r.status = 'SCHEDULING', r.scheduledAt = :now," + + " r.version = r.version + 1 WHERE r.id = :id AND r.version = :version") + int markScheduling( + @Param("id") String id, @Param("version") Long version, @Param("now") Instant now); + + /** + * Transition a SCHEDULING row to SCHEDULED after the Jobs Service returns a job ID. + * + * @return 1 if updated, 0 if not found or wrong version/status + */ + @Modifying(flushAutomatically = true, clearAutomatically = true) + @Query( + "UPDATE TableOperationRow r SET r.status = 'SCHEDULED', r.jobId = :jobId," + + " r.version = r.version + 1" + + " WHERE r.id = :id AND r.version = :version AND r.status = 'SCHEDULING'") + int markScheduled( + @Param("id") String id, @Param("version") Long version, @Param("jobId") String jobId); +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java new file mode 100644 index 000000000..3c0ef40b8 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -0,0 +1,26 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.stream.Stream; +import javax.persistence.QueryHint; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.jpa.repository.QueryHints; + +/** Spring Data JPA repository for {@code table_stats} rows in the optimizer DB. */ +public interface TableStatsRepository extends JpaRepository { + + /** + * Streams all rows as a JDBC cursor rather than buffering them in memory. The caller must consume + * the stream inside an active {@code @Transactional} method and close it when done. + * + *

{@code Integer.MIN_VALUE} is MySQL Connector/J's signal to enable row-by-row streaming + * instead of loading the full result set into the driver buffer. + */ + @Query("SELECT r FROM TableStatsRow r") + @QueryHints( + @QueryHint( + name = org.hibernate.jpa.QueryHints.HINT_FETCH_SIZE, + value = "" + Integer.MIN_VALUE)) + Stream streamAll(); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java new file mode 100644 index 000000000..d8ba13b11 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -0,0 +1,66 @@ +package com.linkedin.openhouse.optimizer.api.controller; + +import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.service.OptimizerDataService; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +/** REST controller for {@code table_operations}. */ +@RestController +@RequestMapping("/v1/table-operations") +@RequiredArgsConstructor +public class TableOperationsController { + + private final OptimizerDataService service; + + /** + * Report that an operation has completed. The backend looks up the operation row, writes a + * history entry with the operation's table metadata and the supplied result. Returns 201 Created + * with the history row, or 404 if the operation does not exist. + */ + @PostMapping("/{id}/complete") + public ResponseEntity completeOperation( + @PathVariable String id, @RequestBody CompleteOperationRequest request) { + return service + .completeOperation(id, request) + .map(dto -> ResponseEntity.status(HttpStatus.CREATED).body(dto)) + .orElse(ResponseEntity.notFound().build()); + } + + /** Fetch a single operation row by its ID, regardless of status. Returns 404 if not found. */ + @GetMapping("/{id}") + public ResponseEntity getTableOperation(@PathVariable String id) { + return service + .getTableOperation(id) + .map(ResponseEntity::ok) + .orElse(ResponseEntity.notFound().build()); + } + + /** + * List operations matching the given filters. All parameters are optional — omit all to return + * every row. + */ + @GetMapping + public ResponseEntity> listTableOperations( + @RequestParam(required = false) OperationType operationType, + @RequestParam(required = false) OperationStatus status, + @RequestParam(required = false) String databaseName, + @RequestParam(required = false) String tableName, + @RequestParam(required = false) String tableUuid) { + return ResponseEntity.ok( + service.listTableOperations(operationType, status, databaseName, tableName, tableUuid)); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java new file mode 100644 index 000000000..11c77a15d --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -0,0 +1,60 @@ +package com.linkedin.openhouse.optimizer.api.controller; + +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.service.OptimizerDataService; +import java.time.Instant; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +/** REST controller for {@code table_operations_history}. */ +@RestController +@RequestMapping("/v1/table-operations-history") +@RequiredArgsConstructor +public class TableOperationsHistoryController { + + private final OptimizerDataService service; + + /** Append a completed-job result. Called by the SparkJob after each run (success or failure). */ + @PostMapping + public ResponseEntity appendHistory( + @RequestBody TableOperationsHistoryDto dto) { + return ResponseEntity.status(HttpStatus.CREATED).body(service.appendHistory(dto)); + } + + /** Return the most recent history for a table, newest first, up to {@code limit} rows. */ + @GetMapping("/{tableUuid}") + public ResponseEntity> getHistory( + @PathVariable String tableUuid, @RequestParam(defaultValue = "100") int limit) { + return ResponseEntity.ok(service.getHistory(tableUuid, limit)); + } + + /** + * List history rows matching the given filters, ordered newest first. All parameters are optional + * — omit all to return every row up to {@code limit}. + */ + @GetMapping + public ResponseEntity> listHistory( + @RequestParam(required = false) String databaseName, + @RequestParam(required = false) String tableName, + @RequestParam(required = false) String tableUuid, + @RequestParam(required = false) OperationType operationType, + @RequestParam(required = false) OperationHistoryStatus status, + @RequestParam(required = false) Instant since, + @RequestParam(required = false) Instant until, + @RequestParam(defaultValue = "100") int limit) { + return ResponseEntity.ok( + service.listHistory( + databaseName, tableName, tableUuid, operationType, status, since, until, limit)); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java new file mode 100644 index 000000000..d469586a2 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -0,0 +1,69 @@ +package com.linkedin.openhouse.optimizer.api.controller; + +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; +import com.linkedin.openhouse.optimizer.service.OptimizerDataService; +import java.time.Instant; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PutMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +/** REST controller for managing per-table stats in the optimizer DB. */ +@RestController +@RequestMapping("/v1/table-stats") +@RequiredArgsConstructor +public class TableStatsController { + + private final OptimizerDataService service; + + /** + * Create or overwrite the stats row for {@code tableUuid}. Called by the Tables Service on every + * Iceberg commit. Idempotent. + */ + @PutMapping("/{tableUuid}") + public ResponseEntity upsertTableStats( + @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { + return ResponseEntity.ok(service.upsertTableStats(tableUuid, request)); + } + + /** Fetch the stats row for {@code tableUuid}. Returns 404 if no stats have been written yet. */ + @GetMapping("/{tableUuid}") + public ResponseEntity getTableStats(@PathVariable String tableUuid) { + return service + .getTableStats(tableUuid) + .map(ResponseEntity::ok) + .orElse(ResponseEntity.notFound().build()); + } + + /** + * List stats rows matching the given filters. All parameters are optional — omit all to return + * every row. + */ + @GetMapping + public ResponseEntity> listTableStats( + @RequestParam(required = false) String databaseId, + @RequestParam(required = false) String tableName, + @RequestParam(required = false) String tableUuid) { + return ResponseEntity.ok(service.listTableStats(databaseId, tableName, tableUuid)); + } + + /** + * Return per-commit stats history for {@code tableUuid}, newest first. Optionally filter by + * {@code since} (inclusive) and cap at {@code limit} rows. + */ + @GetMapping("/{tableUuid}/history") + public ResponseEntity> getStatsHistory( + @PathVariable String tableUuid, + @RequestParam(required = false) Instant since, + @RequestParam(defaultValue = "100") int limit) { + return ResponseEntity.ok(service.getStatsHistory(tableUuid, since, limit)); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index c26893197..35f7ba782 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -22,10 +22,4 @@ public class CompleteOperationRequest { /** Error details on failure; {@code null} on success. */ private JobResult result; - - /** Number of orphan files deleted; set by OFD Spark app on success. */ - private Integer orphanFilesDeleted; - - /** Bytes reclaimed by orphan file deletion; set by OFD Spark app on success. */ - private Long orphanBytesDeleted; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java new file mode 100644 index 000000000..ce3120400 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -0,0 +1,98 @@ +package com.linkedin.openhouse.optimizer.service; + +import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; +import java.time.Instant; +import java.util.List; +import java.util.Optional; + +/** Service interface for optimizer data operations. */ +public interface OptimizerDataService { + + // --- TableOperations --- + + /** + * List operations matching the given filters. Every parameter is optional — pass {@code null} to + * skip that filter. No filters returns all rows. + */ + List listTableOperations( + OperationType operationType, + OperationStatus status, + String databaseName, + String tableName, + String tableUuid); + + /** + * Complete an operation by writing a history entry. Looks up the operation row by {@code id}, + * copies its table metadata into a new history row, and saves it. Returns the history DTO, or + * empty if the operation does not exist. + */ + Optional completeOperation( + String id, CompleteOperationRequest request); + + /** + * Return the operation row for {@code id} regardless of status, or empty if it does not exist. + * Used to poll a specific operation (e.g. waiting for SUCCESS after a Spark job completes). + */ + Optional getTableOperation(String id); + + // --- TableStats --- + + /** + * Create or update the stats row for {@code tableUuid}. Fully idempotent: the same call + * overwrites the previous snapshot with the latest commit values. + */ + TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest request); + + /** Return the stats row for {@code tableUuid}, or empty if none exists. */ + Optional getTableStats(String tableUuid); + + /** + * List stats rows matching the given filters. Every parameter is optional — pass {@code null} to + * skip that filter. No filters returns all rows. + */ + List listTableStats(String databaseId, String tableName, String tableUuid); + + /** + * Return per-commit stats history for {@code tableUuid}, newest first. + * + * @param tableUuid the stable table UUID + * @param since if non-null, only return rows recorded at or after this instant + * @param limit maximum number of rows to return + */ + List getStatsHistory(String tableUuid, Instant since, int limit); + + // --- TableOperationsHistory --- + + /** Append a completed-job result record. */ + TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto); + + /** + * Return the most recent history rows for a table UUID, newest first. + * + * @param tableUuid the stable table UUID + * @param limit maximum number of rows to return + */ + List getHistory(String tableUuid, int limit); + + /** + * List history rows matching the given filters, ordered newest first. Every parameter is optional + * — pass {@code null} to skip that filter. No filters returns all rows up to {@code limit}. + */ + List listHistory( + String databaseName, + String tableName, + String tableUuid, + OperationType operationType, + OperationHistoryStatus status, + Instant since, + Instant until, + int limit); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java new file mode 100644 index 000000000..dbc5f466b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -0,0 +1,202 @@ +package com.linkedin.openhouse.optimizer.service; + +import com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper; +import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; +import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; +import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import lombok.RequiredArgsConstructor; +import org.springframework.data.domain.PageRequest; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +/** Implementation of {@link OptimizerDataService}. */ +@Service +@RequiredArgsConstructor +public class OptimizerDataServiceImpl implements OptimizerDataService { + + private final TableOperationsRepository operationsRepository; + private final TableOperationsHistoryRepository historyRepository; + private final TableStatsRepository statsRepository; + private final TableStatsHistoryRepository statsHistoryRepository; + private final OptimizerMapper mapper; + + // --- TableOperations --- + + @Override + public List listTableOperations( + OperationType operationType, + OperationStatus status, + String databaseName, + String tableName, + String tableUuid) { + return operationsRepository + .findFiltered(operationType, status, databaseName, tableName, tableUuid).stream() + .map(mapper::toDto) + .collect(Collectors.toList()); + } + + @Override + @Transactional + public Optional completeOperation( + String id, CompleteOperationRequest request) { + return operationsRepository + .findById(id) + .map( + row -> { + TableOperationsHistoryRow historyRow = + TableOperationsHistoryRow.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(row.getOperationType()) + .submittedAt(Instant.now()) + .status(request.getStatus()) + .jobId(row.getJobId()) + .result(request.getResult()) + .build(); + return mapper.toDto(historyRepository.save(historyRow)); + }); + } + + @Override + public Optional getTableOperation(String id) { + return operationsRepository.findById(id).map(mapper::toDto); + } + + // --- TableStats --- + + @Override + @Transactional + public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest request) { + Instant now = Instant.now(); + TableStatsRow row = + statsRepository + .findById(tableUuid) + .map( + existing -> + existing + .toBuilder() + .databaseId(request.getDatabaseId()) + .tableName(request.getTableName()) + .stats(request.getStats()) + .tableProperties(request.getTableProperties()) + .updatedAt(now) + .build()) + .orElse( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId(request.getDatabaseId()) + .tableName(request.getTableName()) + .stats(request.getStats()) + .tableProperties(request.getTableProperties()) + .updatedAt(now) + .build()); + TableStatsDto saved = mapper.toDto(statsRepository.save(row)); + + statsHistoryRepository.save( + TableStatsHistoryRow.builder() + .tableUuid(tableUuid) + .databaseId(request.getDatabaseId()) + .tableName(request.getTableName()) + .stats(request.getStats()) + .recordedAt(now) + .build()); + + return saved; + } + + @Override + public Optional getTableStats(String tableUuid) { + return statsRepository.findById(tableUuid).map(mapper::toDto); + } + + @Override + public List listTableStats(String databaseId, String tableName, String tableUuid) { + return statsRepository.findFiltered(databaseId, tableName, tableUuid).stream() + .map(mapper::toDto) + .collect(Collectors.toList()); + } + + @Override + public List getStatsHistory(String tableUuid, Instant since, int limit) { + PageRequest page = PageRequest.of(0, limit); + if (since != null) { + return statsHistoryRepository.findByTableUuidSince(tableUuid, since, page).stream() + .map(mapper::toDto) + .collect(Collectors.toList()); + } + return statsHistoryRepository.findByTableUuid(tableUuid, page).stream() + .map(mapper::toDto) + .collect(Collectors.toList()); + } + + // --- TableOperationsHistory --- + + @Override + @Transactional + public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto) { + TableOperationsHistoryRow row = + TableOperationsHistoryRow.builder() + .id(dto.getId()) + .tableUuid(dto.getTableUuid()) + .databaseName(dto.getDatabaseName()) + .tableName(dto.getTableName()) + .operationType(dto.getOperationType()) + .submittedAt(dto.getSubmittedAt() != null ? dto.getSubmittedAt() : Instant.now()) + .status(dto.getStatus()) + .jobId(dto.getJobId()) + .result(dto.getResult()) + .build(); + return mapper.toDto(historyRepository.save(row)); + } + + @Override + public List getHistory(String tableUuid, int limit) { + return historyRepository.find(tableUuid, limit).stream() + .map(mapper::toDto) + .collect(Collectors.toList()); + } + + @Override + public List listHistory( + String databaseName, + String tableName, + String tableUuid, + OperationType operationType, + OperationHistoryStatus status, + Instant since, + Instant until, + int limit) { + return historyRepository + .findFiltered( + databaseName, + tableName, + tableUuid, + operationType, + status, + since, + until, + PageRequest.of(0, limit)) + .stream() + .map(mapper::toDto) + .collect(Collectors.toList()); + } +} diff --git a/settings.gradle b/settings.gradle index cad06785e..0d64dad53 100644 --- a/settings.gradle +++ b/settings.gradle @@ -50,6 +50,7 @@ include ':services:common' include ':services:housetables' include ':services:jobs' include ':services:optimizer' +include ':apps:optimizer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From ef3260f9303a692f218f9d72985c42da421da5d3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 11:37:07 -0700 Subject: [PATCH 06/81] fix: update service impl to use consolidated find methods Align OptimizerDataServiceImpl with renamed repository methods from optimizer-1 review feedback. Co-Authored-By: Claude Opus 4.6 --- .../service/OptimizerDataServiceImpl.java | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index dbc5f466b..629853156 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -46,8 +46,8 @@ public List listTableOperations( String databaseName, String tableName, String tableUuid) { - return operationsRepository - .findFiltered(operationType, status, databaseName, tableName, tableUuid).stream() + return operationsRepository.find(operationType, status, databaseName, tableName, tableUuid) + .stream() .map(mapper::toDto) .collect(Collectors.toList()); } @@ -130,20 +130,14 @@ public Optional getTableStats(String tableUuid) { @Override public List listTableStats(String databaseId, String tableName, String tableUuid) { - return statsRepository.findFiltered(databaseId, tableName, tableUuid).stream() + return statsRepository.find(databaseId, tableName, tableUuid).stream() .map(mapper::toDto) .collect(Collectors.toList()); } @Override public List getStatsHistory(String tableUuid, Instant since, int limit) { - PageRequest page = PageRequest.of(0, limit); - if (since != null) { - return statsHistoryRepository.findByTableUuidSince(tableUuid, since, page).stream() - .map(mapper::toDto) - .collect(Collectors.toList()); - } - return statsHistoryRepository.findByTableUuid(tableUuid, page).stream() + return statsHistoryRepository.find(tableUuid, since, PageRequest.of(0, limit)).stream() .map(mapper::toDto) .collect(Collectors.toList()); } @@ -170,7 +164,8 @@ public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto) { @Override public List getHistory(String tableUuid, int limit) { - return historyRepository.find(tableUuid, limit).stream() + return historyRepository + .find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, limit)).stream() .map(mapper::toDto) .collect(Collectors.toList()); } @@ -186,7 +181,7 @@ public List listHistory( Instant until, int limit) { return historyRepository - .findFiltered( + .find( databaseName, tableName, tableUuid, From ac1da013711ca3ac680bb24e48f3859813f099a2 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 12:09:53 -0700 Subject: [PATCH 07/81] feat(optimizer): add apps/optimizer shared module with find-only repos Shared JPA entities and repositories for optimizer apps (analyzer, scheduler). All repos expose a single find method with optional filters. Co-Authored-By: Claude Opus 4.6 --- apps/optimizer/build.gradle | 13 +++++ .../entity/TableOperationHistoryRow.java | 37 +++++++++++++ .../optimizer/entity/TableOperationRow.java | 55 +++++++++++++++++++ .../optimizer/entity/TableStatsRow.java | 53 ++++++++++++++++++ .../openhouse/optimizer/model/TableStats.java | 45 +++++++++++++++ .../TableOperationHistoryRepository.java | 32 +++++++++++ .../repository/TableOperationsRepository.java | 29 ++++++++++ .../repository/TableStatsRepository.java | 25 +++++++++ settings.gradle | 1 + 9 files changed, 290 insertions(+) create mode 100644 apps/optimizer/build.gradle create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java diff --git a/apps/optimizer/build.gradle b/apps/optimizer/build.gradle new file mode 100644 index 000000000..f14969274 --- /dev/null +++ b/apps/optimizer/build.gradle @@ -0,0 +1,13 @@ +plugins { + id 'openhouse.java-minimal-conventions' +} + +// Avoid build-directory collision with services:optimizer (same project.name 'optimizer'). +buildDir = "${rootProject.buildDir}/apps-optimizer" + +dependencies { + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' + testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' + testRuntimeOnly 'com.h2database:h2' +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java new file mode 100644 index 000000000..4e638e2e1 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java @@ -0,0 +1,37 @@ +package com.linkedin.openhouse.optimizer.entity; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** Lightweight JPA entity for reading {@code table_operations_history} rows. */ +@Entity +@Table(name = "table_operations_history") +@Getter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationHistoryRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "operation_type", nullable = false, length = 50) + private String operationType; + + @Column(name = "submitted_at", nullable = false) + private Instant submittedAt; + + @Column(name = "status", nullable = false, length = 20) + private String status; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java new file mode 100644 index 000000000..fc0104604 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.optimizer.entity; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +/** JPA entity mapping to the {@code table_operations} table in the optimizer DB. */ +@Entity +@Table(name = "table_operations") +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 255) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Column(name = "operation_type", nullable = false, length = 50) + private String operationType; + + @Column(name = "status", nullable = false, length = 20) + private String status; + + @Column(name = "created_at") + private Instant createdAt; + + @Column(name = "scheduled_at") + private Instant scheduledAt; + + @Column(name = "job_id", length = 255) + private String jobId; + + /** Plain version column — not managed by JPA optimistic locking. */ + @Column(name = "version") + private Long version; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java new file mode 100644 index 000000000..5cdf16a97 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -0,0 +1,53 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import java.util.Map; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every + * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table(name = "table_stats") +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsRow { + + @Id + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_id", nullable = false, length = 255) + private String databaseId; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Type(type = "json") + @Column(name = "table_properties", columnDefinition = "TEXT") + private Map tableProperties; + + @Column(name = "updated_at", nullable = false) + private Instant updatedAt; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java new file mode 100644 index 000000000..5e0f51468 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -0,0 +1,45 @@ +package com.linkedin.openhouse.optimizer.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Combined stats payload stored as a single JSON blob per table in {@code table_stats}. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long deletedSizeBytes; + } +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java new file mode 100644 index 000000000..f2ea9e3c8 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -0,0 +1,32 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Repository for reading {@code table_operations_history} in the Analyzer. */ +public interface TableOperationHistoryRepository + extends JpaRepository { + + /** + * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Every parameter is optional — pass {@code null} to skip that filter. + */ + @Query( + "SELECT r FROM TableOperationHistoryRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:since IS NULL OR r.submittedAt >= :since) " + + "ORDER BY r.submittedAt DESC") + List find( + @Param("operationType") String operationType, + @Param("tableUuid") String tableUuid, + @Param("status") String status, + @Param("since") Instant since, + Pageable pageable); +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java new file mode 100644 index 000000000..27424dfdc --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -0,0 +1,29 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ +public interface TableOperationsRepository extends JpaRepository { + + /** + * Return operations matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. + */ + @Query( + "SELECT r FROM TableOperationRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName)") + List find( + @Param("operationType") String operationType, + @Param("status") String status, + @Param("tableUuid") String tableUuid, + @Param("databaseName") String databaseName, + @Param("tableName") String tableName); +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java new file mode 100644 index 000000000..6effe19c2 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for {@code table_stats} rows in the optimizer DB. */ +public interface TableStatsRepository extends JpaRepository { + + /** + * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. + */ + @Query( + "SELECT r FROM TableStatsRow r " + + "WHERE (:databaseId IS NULL OR r.databaseId = :databaseId) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List find( + @Param("databaseId") String databaseId, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid); +} diff --git a/settings.gradle b/settings.gradle index cad06785e..0d64dad53 100644 --- a/settings.gradle +++ b/settings.gradle @@ -50,6 +50,7 @@ include ':services:common' include ':services:housetables' include ':services:jobs' include ':services:optimizer' +include ':apps:optimizer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From 02a5ab31c62a0847e665f674b1fb3e8684bb3433 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 12:19:37 -0700 Subject: [PATCH 08/81] fix: remove orphan fields from CompleteOperationRequest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These fields never belonged in the data model — remove them at the source rather than adding then deleting in a later PR. Co-Authored-By: Claude Opus 4.6 --- .../optimizer/api/model/CompleteOperationRequest.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index c26893197..35f7ba782 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -22,10 +22,4 @@ public class CompleteOperationRequest { /** Error details on failure; {@code null} on success. */ private JobResult result; - - /** Number of orphan files deleted; set by OFD Spark app on success. */ - private Integer orphanFilesDeleted; - - /** Bytes reclaimed by orphan file deletion; set by OFD Spark app on success. */ - private Long orphanBytesDeleted; } From 01466c70cd4f7ad4f56db31897e23f681512a31a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 12:34:29 -0700 Subject: [PATCH 09/81] feat(optimizer): add service-layer integration tests H2 integration tests for OptimizerDataServiceImpl covering completeOperation (write history, not-found) and upsertTableStats (create, update, history append). Co-Authored-By: Claude Opus 4.6 --- .../service/OptimizerDataServiceImplTest.java | 159 ++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java new file mode 100644 index 000000000..6e3194018 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -0,0 +1,159 @@ +package com.linkedin.openhouse.optimizer.service; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; +import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; +import java.time.Instant; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class OptimizerDataServiceImplTest { + + @Autowired OptimizerDataService service; + @Autowired TableOperationsRepository operationsRepository; + @Autowired TableStatsRepository statsRepository; + @Autowired TableStatsHistoryRepository statsHistoryRepository; + + // --- completeOperation --- + + @Test + void completeOperation_writesHistoryFromOperationRow() { + String id = UUID.randomUUID().toString(); + String tableUuid = UUID.randomUUID().toString(); + operationsRepository.save( + TableOperationsRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .scheduledAt(Instant.now()) + .jobId("spark-job-123") + .build()); + + Optional result = + service.completeOperation( + id, CompleteOperationRequest.builder().status(OperationHistoryStatus.SUCCESS).build()); + + assertThat(result).isPresent(); + assertThat(result.get().getStatus()).isEqualTo(OperationHistoryStatus.SUCCESS); + assertThat(result.get().getTableUuid()).isEqualTo(tableUuid); + assertThat(result.get().getJobId()).isEqualTo("spark-job-123"); + assertThat(result.get().getOperationType()).isEqualTo(OperationType.ORPHAN_FILES_DELETION); + assertThat(result.get().getDatabaseName()).isEqualTo("db1"); + assertThat(result.get().getSubmittedAt()).isNotNull(); + } + + @Test + void completeOperation_notFound_returnsEmpty() { + Optional result = + service.completeOperation( + UUID.randomUUID().toString(), + CompleteOperationRequest.builder() + .status(OperationHistoryStatus.FAILED) + .result( + JobResult.builder().errorMessage("boom").errorType("RuntimeException").build()) + .build()); + + assertThat(result).isEmpty(); + } + + // --- upsertTableStats --- + + @Test + void upsertTableStats_createsNewRow() { + String tableUuid = UUID.randomUUID().toString(); + TableStats stats = + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(1024L).build()) + .build(); + + TableStatsDto dto = + service.upsertTableStats( + tableUuid, + UpsertTableStatsRequest.builder() + .databaseId("db1") + .tableName("tbl1") + .stats(stats) + .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) + .build()); + + assertThat(dto.getTableUuid()).isEqualTo(tableUuid); + assertThat(dto.getDatabaseId()).isEqualTo("db1"); + assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(dto.getTableProperties()).containsEntry("maintenance.optimizer.ofd.enabled", "true"); + assertThat(statsRepository.findById(tableUuid)).isPresent(); + } + + @Test + void upsertTableStats_updatesExistingRow() { + String tableUuid = UUID.randomUUID().toString(); + UpsertTableStatsRequest first = + UpsertTableStatsRequest.builder() + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .build(); + UpsertTableStatsRequest second = + UpsertTableStatsRequest.builder() + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .build(); + + service.upsertTableStats(tableUuid, first); + TableStatsDto dto = service.upsertTableStats(tableUuid, second); + + assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(200L); + assertThat(statsRepository.findAll()).hasSize(1); + } + + @Test + void upsertTableStats_appendsHistoryOnEveryCall() { + String tableUuid = UUID.randomUUID().toString(); + UpsertTableStatsRequest request = + UpsertTableStatsRequest.builder() + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .build(); + + service.upsertTableStats(tableUuid, request); + service.upsertTableStats(tableUuid, request); + + assertThat(statsHistoryRepository.find(tableUuid, null, PageRequest.of(0, 100))).hasSize(2); + } +} From ff07fde3cbfc8dd0cb2c2fde49748dc84ee6734c Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 12:43:44 -0700 Subject: [PATCH 10/81] fix: assert stats history delta values in upsert test Strengthen upsertTableStats test to verify history rows contain the raw delta stats from each call, not just the row count. Co-Authored-By: Claude Opus 4.6 --- .../service/OptimizerDataServiceImplTest.java | 69 +++++++++---------- 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 6e3194018..244acb204 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -12,10 +12,12 @@ import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; import java.time.Instant; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.UUID; @@ -110,50 +112,45 @@ void upsertTableStats_createsNewRow() { } @Test - void upsertTableStats_updatesExistingRow() { + void upsertTableStats_updatesExistingRow_andAppendsHistory() { String tableUuid = UUID.randomUUID().toString(); - UpsertTableStatsRequest first = - UpsertTableStatsRequest.builder() - .databaseId("db1") - .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .build()) + TableStats firstStats = + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .delta(TableStats.CommitDelta.builder().numFilesAdded(5L).numFilesDeleted(1L).build()) .build(); - UpsertTableStatsRequest second = - UpsertTableStatsRequest.builder() - .databaseId("db1") - .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .build()) + TableStats secondStats = + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(0L).build()) .build(); - service.upsertTableStats(tableUuid, first); - TableStatsDto dto = service.upsertTableStats(tableUuid, second); - - assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(200L); - assertThat(statsRepository.findAll()).hasSize(1); - } - - @Test - void upsertTableStats_appendsHistoryOnEveryCall() { - String tableUuid = UUID.randomUUID().toString(); - UpsertTableStatsRequest request = + service.upsertTableStats( + tableUuid, UpsertTableStatsRequest.builder() .databaseId("db1") .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .build()) - .build(); + .stats(firstStats) + .build()); + TableStatsDto dto = + service.upsertTableStats( + tableUuid, + UpsertTableStatsRequest.builder() + .databaseId("db1") + .tableName("tbl1") + .stats(secondStats) + .build()); - service.upsertTableStats(tableUuid, request); - service.upsertTableStats(tableUuid, request); + // Current row reflects the latest upsert + assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(200L); + assertThat(statsRepository.findAll()).hasSize(1); - assertThat(statsHistoryRepository.find(tableUuid, null, PageRequest.of(0, 100))).hasSize(2); + // History has one row per upsert with the raw delta from each call + List history = + statsHistoryRepository.find(tableUuid, null, PageRequest.of(0, 100)); + assertThat(history).hasSize(2); + // Newest first + assertThat(history.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); + assertThat(history.get(1).getStats().getDelta().getNumFilesAdded()).isEqualTo(5L); } } From f82d1b3ef3e0b1197487a68b851fa394ef9b9c7a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 10:14:18 -0700 Subject: [PATCH 11/81] fix(optimizer): address PR #527 review feedback - Widen-to-tighten: VARCHAR(255) -> VARCHAR(128) for database_name and table_name across all entities and the schema, aligning with prod conventions (can always be widened later, not tightened). - Rename databaseId -> databaseName in TableStatsRow, TableStatsHistoryRow, TableStatsDto, TableStatsHistoryDto, and UpsertTableStatsRequest for consistency with the operations entities and DTOs. - Drop the unused metrics field from TableOperationsRow, TableOperationsDto, and the schema. Add a TODO note in the schema that per-operation metric columns will be added as operations are onboarded. - Rename submittedAt -> completedAt in TableOperationsHistoryRow, TableOperationsHistoryDto, and the schema (column submitted_at -> completed_at, index idx_submitted_at -> idx_completed_at). The history row is written when the complete endpoint is called, so the timestamp captures completion; submission time is already on table_operations.scheduled_at. - Change TableStatsHistoryRow.id from BIGINT auto-increment to VARCHAR(36) UUID, set by the caller, matching the other id-bearing entities. - Add @JsonIgnoreProperties(ignoreUnknown = true) to CommitDelta for consistency with TableStats and SnapshotMetrics. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/model/TableOperationsDto.java | 3 --- .../api/model/TableOperationsHistoryDto.java | 2 +- .../optimizer/api/model/TableStats.java | 1 + .../optimizer/api/model/TableStatsDto.java | 2 +- .../api/model/TableStatsHistoryDto.java | 6 ++--- .../api/model/UpsertTableStatsRequest.java | 4 ++-- .../entity/TableOperationsHistoryRow.java | 10 ++++----- .../optimizer/entity/TableOperationsRow.java | 11 ++-------- .../entity/TableStatsHistoryRow.java | 13 +++++------ .../optimizer/entity/TableStatsRow.java | 6 ++--- .../main/resources/db/optimizer-schema.sql | 22 +++++++++---------- 11 files changed, 34 insertions(+), 46 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index 9c33d8907..d41bd6906 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -39,7 +39,4 @@ public class TableOperationsDto { /** Job ID returned by the Jobs Service after successful submission. */ private String jobId; - - /** Reserved for future per-operation metadata; currently unused. */ - private String metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index efc9bebbb..2a901ad2b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -24,7 +24,7 @@ public class TableOperationsHistoryDto { private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ - private Instant submittedAt; + private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ private OperationHistoryStatus status; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index 51aa8a712..64c99061a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -40,6 +40,7 @@ public static class SnapshotMetrics { @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) public static class CommitDelta { private Long numFilesAdded; private Long numFilesDeleted; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index a668af434..81dd6b802 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -18,7 +18,7 @@ public class TableStatsDto { private String tableUuid; /** Denormalized database name for display. */ - private String databaseId; + private String databaseName; /** Denormalized table name for display. */ private String tableName; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 0604e07de..4a994fdb3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -13,14 +13,14 @@ @AllArgsConstructor public class TableStatsHistoryDto { - /** Auto-increment primary key. */ - private Long id; + /** UUID primary key set by the caller. */ + private String id; /** Stable Iceberg table UUID. */ private String tableUuid; /** Denormalized database name for display. */ - private String databaseId; + private String databaseName; /** Denormalized table name for display. */ private String tableName; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 721c3deaf..02290bad5 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -9,7 +9,7 @@ /** * Request body for {@code PUT /v1/table-stats/{tableUuid}}. * - *

{@code tableUuid} comes from the path variable. {@code databaseId} and {@code tableName} are + *

{@code tableUuid} comes from the path variable. {@code databaseName} and {@code tableName} are * denormalized display columns carried in the body. */ @Data @@ -19,7 +19,7 @@ public class UpsertTableStatsRequest { /** Denormalized database name for display. */ - private String databaseId; + private String databaseName; /** Denormalized table name for display. */ private String tableName; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index e7493024c..6ac5db173 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -34,7 +34,7 @@ indexes = { @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), @Index(name = "idx_op_type_hist", columnList = "operation_type"), - @Index(name = "idx_submitted_at", columnList = "submitted_at"), + @Index(name = "idx_completed_at", columnList = "completed_at"), @Index(name = "idx_status_hist", columnList = "status"), @Index(name = "idx_job_id", columnList = "job_id") }) @@ -53,10 +53,10 @@ public class TableOperationsHistoryRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_name", nullable = false, length = 255) + @Column(name = "database_name", nullable = false, length = 128) private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Enumerated(EnumType.STRING) @@ -64,8 +64,8 @@ public class TableOperationsHistoryRow { private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ - @Column(name = "submitted_at", nullable = false) - private Instant submittedAt; + @Column(name = "completed_at", nullable = false) + private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ @Enumerated(EnumType.STRING) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index e5493b510..43778495a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -53,10 +53,10 @@ public class TableOperationsRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_name", nullable = false, length = 255) + @Column(name = "database_name", nullable = false, length = 128) private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Enumerated(EnumType.STRING) @@ -85,11 +85,4 @@ public class TableOperationsRow { */ @Column(name = "version") private Long version; - - /** - * Reserved for future per-operation metadata. Stored as JSON text; currently unused. The Analyzer - * reads stats directly from {@code table_stats} instead of duplicating them here. - */ - @Column(name = "metrics") - private String metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java index 85d97a5eb..b0d92fc81 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -5,8 +5,6 @@ import java.time.Instant; import javax.persistence.Column; import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; import javax.persistence.Id; import javax.persistence.Index; import javax.persistence.Table; @@ -42,17 +40,16 @@ public class TableStatsHistoryRow { @Id - @GeneratedValue(strategy = GenerationType.IDENTITY) - @Column(name = "id", nullable = false) - private Long id; + @Column(name = "id", nullable = false, length = 36) + private String id; @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_id", nullable = false, length = 255) - private String databaseId; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Type(type = "json") diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index 71d6a9421..f682a3485 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -37,10 +37,10 @@ public class TableStatsRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_id", nullable = false, length = 255) - private String databaseId; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Type(type = "json") diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 098380e7f..49641efe2 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -3,22 +3,22 @@ CREATE TABLE IF NOT EXISTS table_operations ( id VARCHAR(36) NOT NULL, table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, operation_type VARCHAR(50) NOT NULL, status VARCHAR(20) NOT NULL, created_at TIMESTAMP(6) NOT NULL, scheduled_at TIMESTAMP(6), job_id VARCHAR(255), version BIGINT, - metrics TEXT, + -- TODO: per-operation metric columns will be added as operations are onboarded. PRIMARY KEY (id) ); CREATE TABLE IF NOT EXISTS table_stats ( table_uuid VARCHAR(36) NOT NULL, - database_id VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, stats TEXT, table_properties TEXT, updated_at TIMESTAMP(6) NOT NULL, @@ -26,10 +26,10 @@ CREATE TABLE IF NOT EXISTS table_stats ( ); CREATE TABLE IF NOT EXISTS table_stats_history ( - id BIGINT NOT NULL AUTO_INCREMENT, + id VARCHAR(36) NOT NULL, table_uuid VARCHAR(36) NOT NULL, - database_id VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, stats TEXT, recorded_at TIMESTAMP(6) NOT NULL, PRIMARY KEY (id), @@ -40,10 +40,10 @@ CREATE TABLE IF NOT EXISTS table_stats_history ( CREATE TABLE IF NOT EXISTS table_operations_history ( id VARCHAR(36) NOT NULL, table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, operation_type VARCHAR(50) NOT NULL, - submitted_at TIMESTAMP(6) NOT NULL, + completed_at TIMESTAMP(6) NOT NULL, status VARCHAR(20) NOT NULL, job_id VARCHAR(255), result TEXT, From a109f0231d2edc546b4a1f630ad4e986c14ade02 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 10:16:45 -0700 Subject: [PATCH 12/81] fix(optimizer): propagate optimizer-0 renames into repos and tests - Repositories: update JPQL and parameter names to match the renamed entity fields (databaseName, completedAt). Change TableOperationsHistoryRepository and TableStatsHistoryRepository ID type parameter from Long to String to match the entity PK (UUID set by the caller, not auto-generated). - Tests: update builders and getters to use the renamed fields (databaseName, completedAt). Replace the autoIncrementId test with callerSetIdIsPreserved which verifies the caller-set UUID round-trips through save/findById. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../TableOperationsHistoryRepository.java | 13 +++--- .../TableStatsHistoryRepository.java | 2 +- .../repository/TableStatsRepository.java | 4 +- .../TableOperationsHistoryRepositoryTest.java | 20 +++++----- .../TableStatsHistoryRepositoryTest.java | 40 +++++++++++++++---- .../repository/TableStatsRepositoryTest.java | 18 ++++----- 6 files changed, 61 insertions(+), 36 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 71ab1cde4..65d62818c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -12,14 +12,15 @@ import org.springframework.stereotype.Repository; /** - * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is auto-increment {@code id}. + * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is the UUID set by the caller + * (same UUID as the originating {@code table_operations.id}). */ @Repository public interface TableOperationsHistoryRepository - extends JpaRepository { + extends JpaRepository { /** - * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Return history rows matching the given filters, ordered by {@code completedAt} descending. * Every parameter is optional — pass {@code null} to skip that filter. */ @Query( @@ -29,9 +30,9 @@ public interface TableOperationsHistoryRepository + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.submittedAt >= :since) " - + "AND (:until IS NULL OR r.submittedAt <= :until) " - + "ORDER BY r.submittedAt DESC") + + "AND (:since IS NULL OR r.completedAt >= :since) " + + "AND (:until IS NULL OR r.completedAt <= :until) " + + "ORDER BY r.completedAt DESC") List find( @Param("databaseName") String databaseName, @Param("tableName") String tableName, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java index 767d60c22..aaa1b0050 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -9,7 +9,7 @@ import org.springframework.data.repository.query.Param; /** Append-only repository for per-commit stats history rows. */ -public interface TableStatsHistoryRepository extends JpaRepository { +public interface TableStatsHistoryRepository extends JpaRepository { /** * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index ecae70feb..9bcaab41b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -15,11 +15,11 @@ public interface TableStatsRepository extends JpaRepository find( - @Param("databaseId") String databaseId, + @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index 1a35a8fda..b9735a617 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -36,7 +36,7 @@ void appendAndFindByTableUuid() { .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(t1) + .completedAt(t1) .status(OperationHistoryStatus.SUCCESS) .jobId("job-001") .build()); @@ -48,7 +48,7 @@ void appendAndFindByTableUuid() { .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(t2) + .completedAt(t2) .status(OperationHistoryStatus.FAILED) .jobId("job-002") .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) @@ -75,7 +75,7 @@ void appendIsNonDestructive_multipleRunsRetained() { .databaseName("db1") .tableName("tbl2") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(now.plusSeconds(i)) + .completedAt(now.plusSeconds(i)) .status(OperationHistoryStatus.SUCCESS) .build()); } @@ -97,7 +97,7 @@ void find_respectsLimit() { .databaseName("db1") .tableName("tbl3") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(now.plusSeconds(i)) + .completedAt(now.plusSeconds(i)) .status(OperationHistoryStatus.SUCCESS) .build()); } @@ -120,7 +120,7 @@ void find_noParams_returnsAll() { .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(now) + .completedAt(now) .status(OperationHistoryStatus.SUCCESS) .build()); repository.save( @@ -130,7 +130,7 @@ void find_noParams_returnsAll() { .databaseName("db2") .tableName("tbl2") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(now.plusSeconds(1)) + .completedAt(now.plusSeconds(1)) .status(OperationHistoryStatus.FAILED) .build()); @@ -154,7 +154,7 @@ void find_byStatusAndTimeWindow() { .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(old) + .completedAt(old) .status(OperationHistoryStatus.SUCCESS) .build()); repository.save( @@ -164,7 +164,7 @@ void find_byStatusAndTimeWindow() { .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(recent) + .completedAt(recent) .status(OperationHistoryStatus.FAILED) .build()); @@ -180,13 +180,13 @@ void find_byStatusAndTimeWindow() { null, PageRequest.of(0, 100)); assertThat(failed).hasSize(1); - assertThat(failed.get(0).getSubmittedAt()).isEqualTo(recent); + assertThat(failed.get(0).getCompletedAt()).isEqualTo(recent); // Filter by time window Instant cutoff = Instant.parse("2024-03-01T00:00:00Z"); List afterCutoff = repository.find(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); assertThat(afterCutoff).hasSize(1); - assertThat(afterCutoff.get(0).getSubmittedAt()).isEqualTo(recent); + assertThat(afterCutoff.get(0).getCompletedAt()).isEqualTo(recent); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index a76c7155d..f3e72b52e 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -84,28 +84,52 @@ void find_isolatesByTableUuid() { } @Test - void autoIncrementId() { + void callerSetIdIsPreserved() { String tableUuid = UUID.randomUUID().toString(); + String id1 = UUID.randomUUID().toString(); + String id2 = UUID.randomUUID().toString(); Instant now = Instant.now(); - TableStatsHistoryRow row1 = repository.save(buildRow(tableUuid, "db1", "tbl1", 1L, 0L, now)); - TableStatsHistoryRow row2 = repository.save(buildRow(tableUuid, "db1", "tbl1", 2L, 0L, now)); + TableStatsHistoryRow row1 = + repository.save(buildRow(id1, tableUuid, "db1", "tbl1", 1L, 0L, now)); + TableStatsHistoryRow row2 = + repository.save(buildRow(id2, tableUuid, "db1", "tbl1", 2L, 0L, now)); - assertThat(row1.getId()).isNotNull(); - assertThat(row2.getId()).isNotNull(); - assertThat(row2.getId()).isGreaterThan(row1.getId()); + assertThat(row1.getId()).isEqualTo(id1); + assertThat(row2.getId()).isEqualTo(id2); + assertThat(repository.findById(id1)).isPresent(); + assertThat(repository.findById(id2)).isPresent(); } private static TableStatsHistoryRow buildRow( String tableUuid, - String databaseId, + String databaseName, + String tableName, + long numFilesAdded, + long numFilesDeleted, + Instant recordedAt) { + return buildRow( + UUID.randomUUID().toString(), + tableUuid, + databaseName, + tableName, + numFilesAdded, + numFilesDeleted, + recordedAt); + } + + private static TableStatsHistoryRow buildRow( + String id, + String tableUuid, + String databaseName, String tableName, long numFilesAdded, long numFilesDeleted, Instant recordedAt) { return TableStatsHistoryRow.builder() + .id(id) .tableUuid(tableUuid) - .databaseId(databaseId) + .databaseName(databaseName) .tableName(tableName) .stats( TableStats.builder() diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index a8ac1cbbb..b62371f53 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -34,7 +34,7 @@ void saveAndFindById() { repository.save( TableStatsRow.builder() .tableUuid(tableUuid) - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats(stats) .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) @@ -43,7 +43,7 @@ void saveAndFindById() { Optional found = repository.findById(tableUuid); assertThat(found).isPresent(); - assertThat(found.get().getDatabaseId()).isEqualTo("db1"); + assertThat(found.get().getDatabaseName()).isEqualTo("db1"); assertThat(found.get().getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); assertThat(found.get().getTableProperties()) .containsEntry("maintenance.optimizer.ofd.enabled", "true"); @@ -56,7 +56,7 @@ void upsert_overwritesPreviousStats() { repository.save( TableStatsRow.builder() .tableUuid(tableUuid) - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats( TableStats.builder() @@ -68,7 +68,7 @@ void upsert_overwritesPreviousStats() { repository.save( TableStatsRow.builder() .tableUuid(tableUuid) - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats( TableStats.builder() @@ -87,7 +87,7 @@ void find_noParams_returnsAll() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats( TableStats.builder() @@ -98,7 +98,7 @@ void find_noParams_returnsAll() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) - .databaseId("db2") + .databaseName("db2") .tableName("tbl2") .stats( TableStats.builder() @@ -115,7 +115,7 @@ void find_byDatabase() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats( TableStats.builder() @@ -126,7 +126,7 @@ void find_byDatabase() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) - .databaseId("db2") + .databaseName("db2") .tableName("tbl2") .stats( TableStats.builder() @@ -136,6 +136,6 @@ void find_byDatabase() { .build()); assertThat(repository.find("db1", null, null)).hasSize(1); - assertThat(repository.find("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); + assertThat(repository.find("db1", null, null).get(0).getDatabaseName()).isEqualTo("db1"); } } From df01c262d3ebfd4e0fcdc3f003e1ebab3ba90220 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 10:22:50 -0700 Subject: [PATCH 13/81] fix(optimizer): propagate optimizer-0 renames into service + controller - Service impl: rename databaseId -> databaseName in builder calls and method signatures (listTableStats); rename submittedAt -> completedAt for the history-row build path. Generate a UUID for the TableStatsHistoryRow on insert now that id is no longer DB-allocated. - Service interface: rename listTableStats parameter databaseId -> databaseName. - TableStatsController: rename the databaseId query parameter to databaseName to match the service signature. - Service test: rename builder/getter usages and the timestamp assertion (getSubmittedAt -> getCompletedAt). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/controller/TableStatsController.java | 4 ++-- .../optimizer/service/OptimizerDataService.java | 2 +- .../service/OptimizerDataServiceImpl.java | 17 ++++++++++------- .../service/OptimizerDataServiceImplTest.java | 10 +++++----- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index d469586a2..36e49055b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -49,10 +49,10 @@ public ResponseEntity getTableStats(@PathVariable String tableUui */ @GetMapping public ResponseEntity> listTableStats( - @RequestParam(required = false) String databaseId, + @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { - return ResponseEntity.ok(service.listTableStats(databaseId, tableName, tableUuid)); + return ResponseEntity.ok(service.listTableStats(databaseName, tableName, tableUuid)); } /** diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index ce3120400..dd2b2fd58 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -58,7 +58,7 @@ Optional completeOperation( * List stats rows matching the given filters. Every parameter is optional — pass {@code null} to * skip that filter. No filters returns all rows. */ - List listTableStats(String databaseId, String tableName, String tableUuid); + List listTableStats(String databaseName, String tableName, String tableUuid); /** * Return per-commit stats history for {@code tableUuid}, newest first. diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 629853156..285cea914 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -20,6 +20,7 @@ import java.time.Instant; import java.util.List; import java.util.Optional; +import java.util.UUID; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import org.springframework.data.domain.PageRequest; @@ -67,7 +68,7 @@ public Optional completeOperation( .databaseName(row.getDatabaseName()) .tableName(row.getTableName()) .operationType(row.getOperationType()) - .submittedAt(Instant.now()) + .completedAt(Instant.now()) .status(request.getStatus()) .jobId(row.getJobId()) .result(request.getResult()) @@ -94,7 +95,7 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest existing -> existing .toBuilder() - .databaseId(request.getDatabaseId()) + .databaseName(request.getDatabaseName()) .tableName(request.getTableName()) .stats(request.getStats()) .tableProperties(request.getTableProperties()) @@ -103,7 +104,7 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest .orElse( TableStatsRow.builder() .tableUuid(tableUuid) - .databaseId(request.getDatabaseId()) + .databaseName(request.getDatabaseName()) .tableName(request.getTableName()) .stats(request.getStats()) .tableProperties(request.getTableProperties()) @@ -113,8 +114,9 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest statsHistoryRepository.save( TableStatsHistoryRow.builder() + .id(UUID.randomUUID().toString()) .tableUuid(tableUuid) - .databaseId(request.getDatabaseId()) + .databaseName(request.getDatabaseName()) .tableName(request.getTableName()) .stats(request.getStats()) .recordedAt(now) @@ -129,8 +131,9 @@ public Optional getTableStats(String tableUuid) { } @Override - public List listTableStats(String databaseId, String tableName, String tableUuid) { - return statsRepository.find(databaseId, tableName, tableUuid).stream() + public List listTableStats( + String databaseName, String tableName, String tableUuid) { + return statsRepository.find(databaseName, tableName, tableUuid).stream() .map(mapper::toDto) .collect(Collectors.toList()); } @@ -154,7 +157,7 @@ public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto) { .databaseName(dto.getDatabaseName()) .tableName(dto.getTableName()) .operationType(dto.getOperationType()) - .submittedAt(dto.getSubmittedAt() != null ? dto.getSubmittedAt() : Instant.now()) + .completedAt(dto.getCompletedAt() != null ? dto.getCompletedAt() : Instant.now()) .status(dto.getStatus()) .jobId(dto.getJobId()) .result(dto.getResult()) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 244acb204..10605c002 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -67,7 +67,7 @@ void completeOperation_writesHistoryFromOperationRow() { assertThat(result.get().getJobId()).isEqualTo("spark-job-123"); assertThat(result.get().getOperationType()).isEqualTo(OperationType.ORPHAN_FILES_DELETION); assertThat(result.get().getDatabaseName()).isEqualTo("db1"); - assertThat(result.get().getSubmittedAt()).isNotNull(); + assertThat(result.get().getCompletedAt()).isNotNull(); } @Test @@ -98,14 +98,14 @@ void upsertTableStats_createsNewRow() { service.upsertTableStats( tableUuid, UpsertTableStatsRequest.builder() - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats(stats) .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) .build()); assertThat(dto.getTableUuid()).isEqualTo(tableUuid); - assertThat(dto.getDatabaseId()).isEqualTo("db1"); + assertThat(dto.getDatabaseName()).isEqualTo("db1"); assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); assertThat(dto.getTableProperties()).containsEntry("maintenance.optimizer.ofd.enabled", "true"); assertThat(statsRepository.findById(tableUuid)).isPresent(); @@ -128,7 +128,7 @@ void upsertTableStats_updatesExistingRow_andAppendsHistory() { service.upsertTableStats( tableUuid, UpsertTableStatsRequest.builder() - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats(firstStats) .build()); @@ -136,7 +136,7 @@ void upsertTableStats_updatesExistingRow_andAppendsHistory() { service.upsertTableStats( tableUuid, UpsertTableStatsRequest.builder() - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats(secondStats) .build()); From 027fccd61c362c1d9b3e2902583579b34d1907f7 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 11:01:56 -0700 Subject: [PATCH 14/81] fix(optimizer): add databaseName + tableName to apps/optimizer history row Address PR #530 review feedback: the lightweight read-side TableOperationHistoryRow in the apps/optimizer shared module did not surface the denormalized database_name and table_name columns, even though the underlying schema carries them. Add them so analyst-style queries from the analyzer/scheduler side can read operation history without joining back to table_operations. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/entity/TableOperationHistoryRow.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java index 4e638e2e1..4e3ace953 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java @@ -26,6 +26,12 @@ public class TableOperationHistoryRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + @Column(name = "operation_type", nullable = false, length = 50) private String operationType; From 79753f1da1ae63f84de9b127d1f7cac301a6666b Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 14:03:10 -0700 Subject: [PATCH 15/81] fix(optimizer): index table_operations_history on (database_name, table_name) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a composite secondary index on (database_name, table_name) to table_operations_history at the schema and entity layers. This backs a new name-based history-lookup endpoint added on optimizer-2; without the index, the query degrades to a full scan on a table that grows with every operation completion. The other three optimizer tables get no new indexes — no new query patterns on them this round. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/optimizer/entity/TableOperationsHistoryRow.java | 3 ++- services/optimizer/src/main/resources/db/optimizer-schema.sql | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index 6ac5db173..3b6ced892 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -36,7 +36,8 @@ @Index(name = "idx_op_type_hist", columnList = "operation_type"), @Index(name = "idx_completed_at", columnList = "completed_at"), @Index(name = "idx_status_hist", columnList = "status"), - @Index(name = "idx_job_id", columnList = "job_id") + @Index(name = "idx_job_id", columnList = "job_id"), + @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") }) @Getter @EqualsAndHashCode diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 49641efe2..4c2d9604b 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -47,5 +47,6 @@ CREATE TABLE IF NOT EXISTS table_operations_history ( status VARCHAR(20) NOT NULL, job_id VARCHAR(255), result TEXT, - PRIMARY KEY (id) + PRIMARY KEY (id), + INDEX idx_toph_db_table (database_name, table_name) ); From dceef974009ccc0c48cc5df274de4ca85bf74934 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 14:04:37 -0700 Subject: [PATCH 16/81] feat(optimizer): unify REST prefix to /v1/optimizer; add name-based history GET - Rename @RequestMapping prefix on the three optimizer controllers to share a /v1/optimizer/... namespace: /v1/table-operations -> /v1/optimizer/operations /v1/table-operations-history -> /v1/optimizer/operations-history /v1/table-stats -> /v1/optimizer/stats - Add TableByNameController hosting human/analyst-oriented name-keyed reads under /v1/optimizer/databases/{databaseName}/ tables/{tableName}. Today it carries one endpoint: GET .../operations-history (lists operation history by name). Other optimizer endpoints stay UUID-keyed because drop-and-recreate of a table produces a new optimizer identity (new stats, new storage, new operation history) and a name-only key would conflate two distinct identities. The new controller is structured for future expansion when more name-based use cases land. Backed by the composite index on table_operations_history (database_name, table_name) added on optimizer-0. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/controller/TableByNameController.java | 35 +++++++++++++++++++ .../controller/TableOperationsController.java | 2 +- .../TableOperationsHistoryController.java | 2 +- .../api/controller/TableStatsController.java | 2 +- 4 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java new file mode 100644 index 000000000..f1989ef3e --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java @@ -0,0 +1,35 @@ +package com.linkedin.openhouse.optimizer.api.controller; + +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.service.OptimizerDataService; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +/** + * Name-keyed read endpoints for human/analyst convenience. UUID-keyed endpoints elsewhere remain + * the canonical path for machine callers, since drop-and-recreate of a table produces a new + * optimizer identity that a name-only lookup would conflate with the dropped table. + */ +@RestController +@RequestMapping("/v1/optimizer/databases/{databaseName}/tables/{tableName}") +@RequiredArgsConstructor +public class TableByNameController { + + private final OptimizerDataService service; + + /** Operation history for a table by (database, table) name, newest first. */ + @GetMapping("/operations-history") + public ResponseEntity> getOperationsHistoryByName( + @PathVariable String databaseName, + @PathVariable String tableName, + @RequestParam(defaultValue = "100") int limit) { + return ResponseEntity.ok( + service.listHistory(databaseName, tableName, null, null, null, null, null, limit)); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index d8ba13b11..a0cab4b7c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -20,7 +20,7 @@ /** REST controller for {@code table_operations}. */ @RestController -@RequestMapping("/v1/table-operations") +@RequestMapping("/v1/optimizer/operations") @RequiredArgsConstructor public class TableOperationsController { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 11c77a15d..ff4c4a77d 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -19,7 +19,7 @@ /** REST controller for {@code table_operations_history}. */ @RestController -@RequestMapping("/v1/table-operations-history") +@RequestMapping("/v1/optimizer/operations-history") @RequiredArgsConstructor public class TableOperationsHistoryController { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 36e49055b..4e8624481 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -18,7 +18,7 @@ /** REST controller for managing per-table stats in the optimizer DB. */ @RestController -@RequestMapping("/v1/table-stats") +@RequestMapping("/v1/optimizer/stats") @RequiredArgsConstructor public class TableStatsController { From bf04488d2ee0f14c0c41095b513c8551333c151d Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:10:50 -0700 Subject: [PATCH 17/81] fix(optimizer): align apps/optimizer entities with services schema The apps/optimizer shared module was created in this PR with field names and column lengths that did not match the schema established in optimizer-0: - TableStatsRow.databaseId -> databaseName - TableOperationHistoryRow.submittedAt -> completedAt - database_name / table_name VARCHAR(255) -> VARCHAR(128) Repos updated to match (TableStatsRepository param, TableOperationHistoryRepository ORDER BY column). No services/optimizer or schema SQL change needed - those already used the correct names. This change was previously folded into a later commit on optimizer-3; moving it down to the PR that owns these files. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/entity/TableOperationHistoryRow.java | 4 ++-- .../openhouse/optimizer/entity/TableOperationRow.java | 4 ++-- .../linkedin/openhouse/optimizer/entity/TableStatsRow.java | 6 +++--- .../repository/TableOperationHistoryRepository.java | 6 +++--- .../optimizer/repository/TableStatsRepository.java | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java index 4e3ace953..d15eb6785 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java @@ -35,8 +35,8 @@ public class TableOperationHistoryRow { @Column(name = "operation_type", nullable = false, length = 50) private String operationType; - @Column(name = "submitted_at", nullable = false) - private Instant submittedAt; + @Column(name = "completed_at", nullable = false) + private Instant completedAt; @Column(name = "status", nullable = false, length = 20) private String status; diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java index fc0104604..33a83bd3f 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java @@ -28,10 +28,10 @@ public class TableOperationRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_name", nullable = false, length = 255) + @Column(name = "database_name", nullable = false, length = 128) private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Column(name = "operation_type", nullable = false, length = 50) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index 5cdf16a97..bc647d86e 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -34,10 +34,10 @@ public class TableStatsRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_id", nullable = false, length = 255) - private String databaseId; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Type(type = "json") diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java index f2ea9e3c8..fd9edd1f4 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -13,7 +13,7 @@ public interface TableOperationHistoryRepository extends JpaRepository { /** - * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Return history rows matching the given filters, ordered by {@code completedAt} descending. * Every parameter is optional — pass {@code null} to skip that filter. */ @Query( @@ -21,8 +21,8 @@ public interface TableOperationHistoryRepository + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.submittedAt >= :since) " - + "ORDER BY r.submittedAt DESC") + + "AND (:since IS NULL OR r.completedAt >= :since) " + + "ORDER BY r.completedAt DESC") List find( @Param("operationType") String operationType, @Param("tableUuid") String tableUuid, diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 6effe19c2..50f515d07 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -15,11 +15,11 @@ public interface TableStatsRepository extends JpaRepository find( - @Param("databaseId") String databaseId, + @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); } From 62f426a0a236f074c0db4c478b10e6e7b7949318 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:20:11 -0700 Subject: [PATCH 18/81] feat(optimizer): add findLatestPerTable to history repo The Analyzer evaluates cadence using only the most-recent history row per (table_uuid, operation_type); pulling the full history scan per analyzer pass is wasted I/O. Add a dedicated query that returns at most one row per (table_uuid, operation_type), restricted to a single operation type. The query uses a correlated MAX subquery for portability across MySQL and H2. For large history volume, a (operation_type, table_uuid, completed_at) index on the schema would make the subquery index-only; TODO noted in javadoc. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../TableOperationHistoryRepository.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java index fd9edd1f4..09930ab08 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -29,4 +29,24 @@ List find( @Param("status") String status, @Param("since") Instant since, Pageable pageable); + + /** + * Return the most-recent history row per {@code (table_uuid, operation_type)}, filtered to a + * single operation type. Used by the Analyzer to evaluate cadence without materializing every + * historical row. + * + *

The correlated subquery is portable across MySQL and H2 (MySQL mode). On a large {@code + * table_operations_history} table this benefits from an index on {@code (operation_type, + * table_uuid, completed_at)} — TODO add it to the schema. + * + *

Ties on {@code completed_at} for the same {@code (table_uuid, operation_type)} return all + * tied rows; callers should dedupe in memory. + */ + @Query( + "SELECT r FROM TableOperationHistoryRow r " + + "WHERE r.operationType = :operationType " + + "AND r.completedAt = (" + + " SELECT MAX(r2.completedAt) FROM TableOperationHistoryRow r2 " + + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") + List findLatestPerTable(@Param("operationType") String operationType); } From 3483b25f394e44b03c5bf94a22c1d644193466ba Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 08:55:28 -0700 Subject: [PATCH 19/81] perf(optimizer): index table_operations_history for findLatestPerTable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) on table_operations_history. TableOperationHistoryRepository.findLatestPerTable uses a correlated MAX(completed_at) subquery; without this index it degenerates to O(N²) and does not complete at 1M-row history scale. With it the inner subquery becomes an index-only lookup per outer row. Update the repo method's javadoc to point at the new index by name and drop the resolved TODO. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../repository/TableOperationHistoryRepository.java | 6 +++--- .../optimizer/src/main/resources/db/optimizer-schema.sql | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java index 09930ab08..26166271f 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -35,9 +35,9 @@ List find( * single operation type. Used by the Analyzer to evaluate cadence without materializing every * historical row. * - *

The correlated subquery is portable across MySQL and H2 (MySQL mode). On a large {@code - * table_operations_history} table this benefits from an index on {@code (operation_type, - * table_uuid, completed_at)} — TODO add it to the schema. + *

The correlated subquery is portable across MySQL and H2 (MySQL mode). Backed by index {@code + * idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at)} on {@code + * table_operations_history}, the subquery becomes an index-only lookup per outer row. * *

Ties on {@code completed_at} for the same {@code (table_uuid, operation_type)} return all * tied rows; callers should dedupe in memory. diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 4c2d9604b..322f3bf92 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -48,5 +48,9 @@ CREATE TABLE IF NOT EXISTS table_operations_history ( job_id VARCHAR(255), result TEXT, PRIMARY KEY (id), - INDEX idx_toph_db_table (database_name, table_name) + INDEX idx_toph_db_table (database_name, table_name), + -- Drives TableOperationHistoryRepository.findLatestPerTable: the correlated + -- MAX(completed_at) subquery becomes an index-only lookup per (operation_type, + -- table_uuid) instead of an O(N²) scan. + INDEX idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) ); From 02930094479750d10f25745849e9d511f5aa0aea Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 11:46:45 -0700 Subject: [PATCH 20/81] feat(optimizer): add findDistinctDatabaseNames to TableStatsRepository Enables per-database iteration in the analyzer. Returns the bounded set of database_name values present in table_stats; the analyzer uses it to drive the outer loop when no specific databaseName filter is supplied. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/repository/TableStatsRepository.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 50f515d07..4215237bc 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -22,4 +22,12 @@ List find( @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); + + /** + * Return the distinct {@code database_name} values present in {@code table_stats}. Used by the + * Analyzer to enumerate databases when iterating per-db; the result set size is bounded by the + * number of databases (small even at million-table scale). + */ + @Query("SELECT DISTINCT r.databaseName FROM TableStatsRow r") + List findDistinctDatabaseNames(); } From 6fa885db6bad708d54ace4ab61faa290ea208220 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 11:50:45 -0700 Subject: [PATCH 21/81] refactor(optimizer): Optional for optional filter params in service layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OptimizerDataService's filter-style methods (listTableOperations, listTableStats, getStatsHistory, listHistory) accepted nullable strings/ enums to mean "no filter". Switch to Optional at the service boundary; controllers wrap their nullable @RequestParam values via Optional.ofNullable. The implementation unwraps via .orElse(null) at the JPA repo call site — the @Query "IS NULL OR ..." pattern is idiomatic with nullable parameters and stays unchanged. No behavior change. No tests required updating. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/controller/TableByNameController.java | 11 +++- .../controller/TableOperationsController.java | 8 ++- .../TableOperationsHistoryController.java | 10 +++- .../api/controller/TableStatsController.java | 9 ++- .../service/OptimizerDataService.java | 42 +++++++------- .../service/OptimizerDataServiceImpl.java | 57 +++++++++++-------- 6 files changed, 88 insertions(+), 49 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java index f1989ef3e..e3582ff7e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java @@ -3,6 +3,7 @@ import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; +import java.util.Optional; import lombok.RequiredArgsConstructor; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.GetMapping; @@ -30,6 +31,14 @@ public ResponseEntity> getOperationsHistoryByNam @PathVariable String tableName, @RequestParam(defaultValue = "100") int limit) { return ResponseEntity.ok( - service.listHistory(databaseName, tableName, null, null, null, null, null, limit)); + service.listHistory( + Optional.of(databaseName), + Optional.of(tableName), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + limit)); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index a0cab4b7c..adc4d7a85 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -7,6 +7,7 @@ import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; +import java.util.Optional; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -61,6 +62,11 @@ public ResponseEntity> listTableOperations( @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { return ResponseEntity.ok( - service.listTableOperations(operationType, status, databaseName, tableName, tableUuid)); + service.listTableOperations( + Optional.ofNullable(operationType), + Optional.ofNullable(status), + Optional.ofNullable(databaseName), + Optional.ofNullable(tableName), + Optional.ofNullable(tableUuid))); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index ff4c4a77d..79fce5b8f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -6,6 +6,7 @@ import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.time.Instant; import java.util.List; +import java.util.Optional; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -55,6 +56,13 @@ public ResponseEntity> listHistory( @RequestParam(defaultValue = "100") int limit) { return ResponseEntity.ok( service.listHistory( - databaseName, tableName, tableUuid, operationType, status, since, until, limit)); + Optional.ofNullable(databaseName), + Optional.ofNullable(tableName), + Optional.ofNullable(tableUuid), + Optional.ofNullable(operationType), + Optional.ofNullable(status), + Optional.ofNullable(since), + Optional.ofNullable(until), + limit)); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 4e8624481..ef57598e8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -6,6 +6,7 @@ import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.time.Instant; import java.util.List; +import java.util.Optional; import lombok.RequiredArgsConstructor; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.GetMapping; @@ -52,7 +53,11 @@ public ResponseEntity> listTableStats( @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { - return ResponseEntity.ok(service.listTableStats(databaseName, tableName, tableUuid)); + return ResponseEntity.ok( + service.listTableStats( + Optional.ofNullable(databaseName), + Optional.ofNullable(tableName), + Optional.ofNullable(tableUuid))); } /** @@ -64,6 +69,6 @@ public ResponseEntity> getStatsHistory( @PathVariable String tableUuid, @RequestParam(required = false) Instant since, @RequestParam(defaultValue = "100") int limit) { - return ResponseEntity.ok(service.getStatsHistory(tableUuid, since, limit)); + return ResponseEntity.ok(service.getStatsHistory(tableUuid, Optional.ofNullable(since), limit)); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index dd2b2fd58..1c17d7a38 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -19,15 +19,15 @@ public interface OptimizerDataService { // --- TableOperations --- /** - * List operations matching the given filters. Every parameter is optional — pass {@code null} to - * skip that filter. No filters returns all rows. + * List operations matching the given filters. Every parameter is optional — pass {@link + * Optional#empty()} to skip that filter. No filters returns all rows. */ List listTableOperations( - OperationType operationType, - OperationStatus status, - String databaseName, - String tableName, - String tableUuid); + Optional operationType, + Optional status, + Optional databaseName, + Optional tableName, + Optional tableUuid); /** * Complete an operation by writing a history entry. Looks up the operation row by {@code id}, @@ -55,19 +55,20 @@ Optional completeOperation( Optional getTableStats(String tableUuid); /** - * List stats rows matching the given filters. Every parameter is optional — pass {@code null} to - * skip that filter. No filters returns all rows. + * List stats rows matching the given filters. Every parameter is optional — pass {@link + * Optional#empty()} to skip that filter. No filters returns all rows. */ - List listTableStats(String databaseName, String tableName, String tableUuid); + List listTableStats( + Optional databaseName, Optional tableName, Optional tableUuid); /** * Return per-commit stats history for {@code tableUuid}, newest first. * * @param tableUuid the stable table UUID - * @param since if non-null, only return rows recorded at or after this instant + * @param since if present, only return rows recorded at or after this instant * @param limit maximum number of rows to return */ - List getStatsHistory(String tableUuid, Instant since, int limit); + List getStatsHistory(String tableUuid, Optional since, int limit); // --- TableOperationsHistory --- @@ -84,15 +85,16 @@ Optional completeOperation( /** * List history rows matching the given filters, ordered newest first. Every parameter is optional - * — pass {@code null} to skip that filter. No filters returns all rows up to {@code limit}. + * — pass {@link Optional#empty()} to skip that filter. No filters returns all rows up to {@code + * limit}. */ List listHistory( - String databaseName, - String tableName, - String tableUuid, - OperationType operationType, - OperationHistoryStatus status, - Instant since, - Instant until, + Optional databaseName, + Optional tableName, + Optional tableUuid, + Optional operationType, + Optional status, + Optional since, + Optional until, int limit); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 285cea914..de4faa465 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -42,12 +42,18 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { @Override public List listTableOperations( - OperationType operationType, - OperationStatus status, - String databaseName, - String tableName, - String tableUuid) { - return operationsRepository.find(operationType, status, databaseName, tableName, tableUuid) + Optional operationType, + Optional status, + Optional databaseName, + Optional tableName, + Optional tableUuid) { + return operationsRepository + .find( + operationType.orElse(null), + status.orElse(null), + databaseName.orElse(null), + tableName.orElse(null), + tableUuid.orElse(null)) .stream() .map(mapper::toDto) .collect(Collectors.toList()); @@ -132,15 +138,18 @@ public Optional getTableStats(String tableUuid) { @Override public List listTableStats( - String databaseName, String tableName, String tableUuid) { - return statsRepository.find(databaseName, tableName, tableUuid).stream() + Optional databaseName, Optional tableName, Optional tableUuid) { + return statsRepository + .find(databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null)).stream() .map(mapper::toDto) .collect(Collectors.toList()); } @Override - public List getStatsHistory(String tableUuid, Instant since, int limit) { - return statsHistoryRepository.find(tableUuid, since, PageRequest.of(0, limit)).stream() + public List getStatsHistory( + String tableUuid, Optional since, int limit) { + return statsHistoryRepository.find(tableUuid, since.orElse(null), PageRequest.of(0, limit)) + .stream() .map(mapper::toDto) .collect(Collectors.toList()); } @@ -175,23 +184,23 @@ public List getHistory(String tableUuid, int limit) { @Override public List listHistory( - String databaseName, - String tableName, - String tableUuid, - OperationType operationType, - OperationHistoryStatus status, - Instant since, - Instant until, + Optional databaseName, + Optional tableName, + Optional tableUuid, + Optional operationType, + Optional status, + Optional since, + Optional until, int limit) { return historyRepository .find( - databaseName, - tableName, - tableUuid, - operationType, - status, - since, - until, + databaseName.orElse(null), + tableName.orElse(null), + tableUuid.orElse(null), + operationType.orElse(null), + status.orElse(null), + since.orElse(null), + until.orElse(null), PageRequest.of(0, limit)) .stream() .map(mapper::toDto) From eba1392e44d9170a3f0a484a7d631c1683cccf91 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 17:01:41 -0700 Subject: [PATCH 22/81] feat(optimizer): promote internal model types to shared apps/optimizer Move Table, TableOperation, OperationType, OperationStatus, HistoryStatus from the analyzer-internal package into the shared apps/optimizer module. The scheduler will consume the same domain types as the analyzer. Per-layer types still hold (wire-API, internal model, DB each define their own representation); this just consolidates the internal layer so multiple internal consumers (analyzer, scheduler) share one set of classes. TableOperation gains a nullable, non-persisted fileCount field. Consumers that need it (OFD bin-packing) populate it at read time from table_stats; the DB row does not carry it. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/model/HistoryStatus.java | 13 +++ .../optimizer/model/OperationStatus.java | 15 +++ .../optimizer/model/OperationType.java | 10 ++ .../openhouse/optimizer/model/Table.java | 41 +++++++ .../optimizer/model/TableOperation.java | 106 ++++++++++++++++++ 5 files changed, 185 insertions(+) create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java new file mode 100644 index 000000000..d29c88719 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal lifecycle outcomes for a completed operation. Mirrors the values written to {@code + * table_operations_history.status}; parsed at the boundary so callers switch on a typed value + * instead of comparing strings. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum HistoryStatus { + SUCCESS, + FAILED +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java new file mode 100644 index 000000000..66f213c73 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java @@ -0,0 +1,15 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal lifecycle states for an operation. The analyzer writes {@link #PENDING}; the scheduler + * transitions through {@link #SCHEDULING} and {@link #SCHEDULED}. {@link #CANCELED} marks + * deduplicated PENDING rows. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum OperationStatus { + PENDING, + SCHEDULING, + SCHEDULED, + CANCELED +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java new file mode 100644 index 000000000..bea44018b --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -0,0 +1,10 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal enum for the operation types the analyzer and scheduler know about. Intentionally + * separate from the wire-API and DB representations so the internal model can evolve its set of + * supported operations without churning either boundary. + */ +public enum OperationType { + ORPHAN_FILES_DELETION +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java new file mode 100644 index 000000000..e232803dd --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -0,0 +1,41 @@ +package com.linkedin.openhouse.optimizer.model; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.Collections; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An OpenHouse table enriched with stats and properties, built by combining data sources. Consumed + * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads + * stats for bin-packing). + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class Table { + + private String tableUuid; + private String databaseName; + private String tableId; + + @Builder.Default private Map tableProperties = Collections.emptyMap(); + + private TableStats stats; + + /** Build a {@code Table} from a {@code table_stats} row. */ + public static Table from(TableStatsRow row) { + return Table.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableId(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + .stats(row.getStats()) + .build(); + } +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java new file mode 100644 index 000000000..d1390ee79 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -0,0 +1,106 @@ +package com.linkedin.openhouse.optimizer.model; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import java.time.Instant; +import java.util.Comparator; +import java.util.UUID; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks + * up and submits. Built either from an existing {@link TableOperationRow} (when loading current + * state) or from a {@link Table} (when creating a new PENDING operation). Converts back to a JPA + * row via {@link #toRow()}. + * + *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., + * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not + * carry it. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperation { + + /** Unique operation ID (UUID). */ + private String id; + + /** The table this operation targets. */ + private String tableUuid; + + /** Database name. */ + private String databaseName; + + /** Table name. */ + private String tableName; + + /** Operation type. */ + private OperationType operationType; + + /** Current lifecycle status. */ + private OperationStatus status; + + /** When this operation record was created. */ + private Instant createdAt; + + /** When the scheduler last submitted a job for this operation. */ + private Instant scheduledAt; + + /** + * Number of current data files on the table at evaluation time. Non-persisted enrichment; + * populated by consumers that need it. Null when not enriched. + */ + private Long fileCount; + + /** Build a {@code TableOperation} from an existing JPA row. */ + public static TableOperation from(TableOperationRow row) { + return TableOperation.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.valueOf(row.getOperationType())) + .status(OperationStatus.valueOf(row.getStatus())) + .createdAt(row.getCreatedAt()) + .scheduledAt(row.getScheduledAt()) + .build(); + } + + /** Create a new PENDING operation for the given table and operation type. */ + public static TableOperation pending(Table table, OperationType operationType) { + return TableOperation.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(table.getTableUuid()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .operationType(operationType) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + } + + /** Convert to a JPA entity for persistence. */ + public TableOperationRow toRow() { + return TableOperationRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType.name()) + .status(status.name()) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .version(0L) + .build(); + } + + /** Return the more recently created of two operations. */ + public static TableOperation mostRecent(TableOperation a, TableOperation b) { + Comparator byCreatedAt = + Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); + return byCreatedAt.compare(a, b) >= 0 ? a : b; + } +} From e57659391cc238cc4609682af943843502d8b9b8 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 17:51:44 -0700 Subject: [PATCH 23/81] refactor(optimizer): rename apps/optimizer entities + repos to plural; add TableStatsHistory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aligns apps/optimizer with the SQL table names (table_operations, table_operations_history) and the existing services/optimizer convention: - TableOperationRow → TableOperationsRow - TableOperationHistoryRow → TableOperationsHistoryRow - TableOperationHistoryRepository → TableOperationsHistoryRepository Adds the missing TableStatsHistoryRow + TableStatsHistoryRepository so apps/optimizer is a complete entity set covering all four optimizer DB tables. services/optimizer will consume these in a follow-up commit on optimizer-2 (the services-side duplicates will be deleted). Adds an explanatory javadoc on TableOperationsRow.version documenting the application-level optimistic-concurrency-control role used by the scheduler's CAS transitions (resolves PR #530 thread 3231557313). Co-Authored-By: Claude Opus 4.7 (1M context) --- ...ow.java => TableOperationsHistoryRow.java} | 2 +- ...rationRow.java => TableOperationsRow.java} | 10 ++- .../entity/TableStatsHistoryRow.java | 61 +++++++++++++++++++ .../optimizer/model/TableOperation.java | 10 +-- ... => TableOperationsHistoryRepository.java} | 16 ++--- .../repository/TableOperationsRepository.java | 8 +-- .../TableStatsHistoryRepository.java | 29 +++++++++ 7 files changed, 116 insertions(+), 20 deletions(-) rename apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/{TableOperationHistoryRow.java => TableOperationsHistoryRow.java} (96%) rename apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/{TableOperationRow.java => TableOperationsRow.java} (71%) create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java rename apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/{TableOperationHistoryRepository.java => TableOperationsHistoryRepository.java} (79%) create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java similarity index 96% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java rename to apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index d15eb6785..e5ff2bd01 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -17,7 +17,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperationHistoryRow { +public class TableOperationsHistoryRow { @Id @Column(name = "id", nullable = false, length = 36) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java similarity index 71% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java rename to apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index 33a83bd3f..0e23761ae 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -19,7 +19,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperationRow { +public class TableOperationsRow { @Id @Column(name = "id", nullable = false, length = 36) @@ -49,7 +49,13 @@ public class TableOperationRow { @Column(name = "job_id", length = 255) private String jobId; - /** Plain version column — not managed by JPA optimistic locking. */ + /** + * Monotonically-increasing version for application-level optimistic concurrency control. The + * scheduler's CAS transitions (e.g. {@code markScheduling}, {@code markScheduled}) match this + * value in the WHERE clause and bump it by one on UPDATE, ensuring two scheduler instances can't + * both move the same row out of PENDING. Not managed by JPA optimistic locking — kept as a plain + * column so the WHERE-clause-based CAS pattern works portably across MySQL and H2. + */ @Column(name = "version") private Long version; } diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java new file mode 100644 index 000000000..6f41881d6 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -0,0 +1,61 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * Append-only record of per-commit stats reported by the Tables Service. + * + *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers + * query this table to reconstruct change rates over arbitrary time windows. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table( + name = "table_stats_history", + indexes = { + @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsHistoryRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Column(name = "recorded_at", nullable = false) + private Instant recordedAt; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java index d1390ee79..d49625a57 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.model; -import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import java.time.Instant; import java.util.Comparator; import java.util.UUID; @@ -11,7 +11,7 @@ /** * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks - * up and submits. Built either from an existing {@link TableOperationRow} (when loading current + * up and submits. Built either from an existing {@link TableOperationsRow} (when loading current * state) or from a {@link Table} (when creating a new PENDING operation). Converts back to a JPA * row via {@link #toRow()}. * @@ -56,7 +56,7 @@ public class TableOperation { private Long fileCount; /** Build a {@code TableOperation} from an existing JPA row. */ - public static TableOperation from(TableOperationRow row) { + public static TableOperation from(TableOperationsRow row) { return TableOperation.builder() .id(row.getId()) .tableUuid(row.getTableUuid()) @@ -83,8 +83,8 @@ public static TableOperation pending(Table table, OperationType operationType) { } /** Convert to a JPA entity for persistence. */ - public TableOperationRow toRow() { - return TableOperationRow.builder() + public TableOperationsRow toRow() { + return TableOperationsRow.builder() .id(id) .tableUuid(tableUuid) .databaseName(databaseName) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java similarity index 79% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java rename to apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 26166271f..f8fe90b0c 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; import java.time.Instant; import java.util.List; import org.springframework.data.domain.Pageable; @@ -9,21 +9,21 @@ import org.springframework.data.repository.query.Param; /** Repository for reading {@code table_operations_history} in the Analyzer. */ -public interface TableOperationHistoryRepository - extends JpaRepository { +public interface TableOperationsHistoryRepository + extends JpaRepository { /** * Return history rows matching the given filters, ordered by {@code completedAt} descending. * Every parameter is optional — pass {@code null} to skip that filter. */ @Query( - "SELECT r FROM TableOperationHistoryRow r " + "SELECT r FROM TableOperationsHistoryRow r " + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:status IS NULL OR r.status = :status) " + "AND (:since IS NULL OR r.completedAt >= :since) " + "ORDER BY r.completedAt DESC") - List find( + List find( @Param("operationType") String operationType, @Param("tableUuid") String tableUuid, @Param("status") String status, @@ -43,10 +43,10 @@ List find( * tied rows; callers should dedupe in memory. */ @Query( - "SELECT r FROM TableOperationHistoryRow r " + "SELECT r FROM TableOperationsHistoryRow r " + "WHERE r.operationType = :operationType " + "AND r.completedAt = (" - + " SELECT MAX(r2.completedAt) FROM TableOperationHistoryRow r2 " + + " SELECT MAX(r2.completedAt) FROM TableOperationsHistoryRow r2 " + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") - List findLatestPerTable(@Param("operationType") String operationType); + List findLatestPerTable(@Param("operationType") String operationType); } diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 27424dfdc..c7a08cabc 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -1,26 +1,26 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; /** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ -public interface TableOperationsRepository extends JpaRepository { +public interface TableOperationsRepository extends JpaRepository { /** * Return operations matching the given filters. Every parameter is optional — pass {@code null} * to skip that filter. */ @Query( - "SELECT r FROM TableOperationRow r " + "SELECT r FROM TableOperationsRow r " + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:status IS NULL OR r.status = :status) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + "AND (:tableName IS NULL OR r.tableName = :tableName)") - List find( + List find( @Param("operationType") String operationType, @Param("status") String status, @Param("tableUuid") String tableUuid, diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java new file mode 100644 index 000000000..aaa1b0050 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -0,0 +1,29 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Append-only repository for per-commit stats history rows. */ +public interface TableStatsHistoryRepository extends JpaRepository { + + /** + * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the + * time filter. + * + * @param tableUuid the stable table UUID + * @param since inclusive lower bound on recorded_at; {@code null} to skip + * @param pageable use {@code PageRequest.of(0, limit)} to cap results + */ + @Query( + "SELECT r FROM TableStatsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "AND (:since IS NULL OR r.recordedAt >= :since) " + + "ORDER BY r.recordedAt DESC") + List find( + @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); +} From 6f98e1ad65aba584d4d87e9df9ee0a911b451261 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 18:07:09 -0700 Subject: [PATCH 24/81] refactor(optimizer): consolidate entities/repos into apps/optimizer; rename wire HistoryStatus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit services/optimizer no longer maintains its own JPA entities or Spring Data repositories for the four optimizer DB tables. Apps/optimizer is the single source of truth; services/optimizer depends on apps/optimizer (renamed via project(':apps:optimizer').name = 'optimizer-data' to dodge the project.name collision that previously caused a self-referential dependency error). Removed from services/optimizer: - entity/{TableOperationsRow, TableOperationsHistoryRow, TableStatsRow, TableStatsHistoryRow}.java - repository/{TableOperationsRepository, TableOperationsHistoryRepository, TableStatsRepository, TableStatsHistoryRepository}.java - api/model/TableStats.java (duplicate) - api/model/OperationHistoryStatus.java (renamed → HistoryStatus to match the internal enum naming) - config/JobResultConverter.java (no longer needed — entity stores result as raw String JSON and the mapper converts at the wire boundary) Added on apps-side: - TableStatsHistoryRow + TableStatsHistoryRepository (previously only on services-side) - jobId + result fields on TableOperationsHistoryRow so it covers all services-side use cases - find(...) on TableOperationsHistoryRepository extended to the 8-filter service-layer shape (databaseName, tableName, tableUuid, operationType, status, since, until, pageable) - toBuilder = true on TableStatsRow so OptimizerDataServiceImpl.upsertTableStats can use the existing.toBuilder() pattern Mapper updates: - OptimizerMapper gains String ↔ wire-enum helpers and a JSON ↔ JobResult pair (replaces the old JPA AttributeConverter approach). - OptimizerDataServiceImpl unwraps Optional filters via .name() before calling the now-shared apps-side repos. Tests updated to match: entity-builder calls pass enum.name() Strings; repo.find(...) args reordered to apps-side (operationType, status, tableUuid, databaseName, tableName); JobResult.builder() in test fixtures replaced with literal JSON strings to match the String-typed result column. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../entity/TableOperationsHistoryRow.java | 6 ++ .../optimizer/entity/TableStatsRow.java | 2 +- .../TableOperationsHistoryRepository.java | 10 ++- services/optimizer/build.gradle | 1 + .../TableOperationsHistoryController.java | 4 +- .../optimizer/api/mapper/OptimizerMapper.java | 60 +++++++++++++ .../api/model/CompleteOperationRequest.java | 2 +- ...nHistoryStatus.java => HistoryStatus.java} | 2 +- .../api/model/TableOperationsHistoryDto.java | 2 +- .../optimizer/api/model/TableStats.java | 50 ----------- .../optimizer/api/model/TableStatsDto.java | 1 + .../api/model/TableStatsHistoryDto.java | 1 + .../api/model/UpsertTableStatsRequest.java | 1 + .../optimizer/config/JobResultConverter.java | 39 -------- .../entity/TableOperationsHistoryRow.java | 84 ------------------ .../optimizer/entity/TableOperationsRow.java | 88 ------------------- .../entity/TableStatsHistoryRow.java | 61 ------------- .../optimizer/entity/TableStatsRow.java | 57 ------------ .../TableOperationsHistoryRepository.java | 45 ---------- .../repository/TableOperationsRepository.java | 33 ------- .../TableStatsHistoryRepository.java | 29 ------ .../repository/TableStatsRepository.java | 25 ------ .../service/OptimizerDataService.java | 4 +- .../service/OptimizerDataServiceImpl.java | 26 +++--- .../TableOperationsHistoryRepositoryTest.java | 41 +++++---- .../TableOperationsRepositoryTest.java | 44 +++++----- .../TableStatsHistoryRepositoryTest.java | 2 +- .../repository/TableStatsRepositoryTest.java | 2 +- .../service/OptimizerDataServiceImplTest.java | 14 +-- settings.gradle | 1 + 30 files changed, 151 insertions(+), 586 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationHistoryStatus.java => HistoryStatus.java} (78%) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index e5ff2bd01..09eb7fc21 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -40,4 +40,10 @@ public class TableOperationsHistoryRow { @Column(name = "status", nullable = false, length = 20) private String status; + + @Column(name = "job_id", length = 255) + private String jobId; + + @Column(name = "result", columnDefinition = "TEXT") + private String result; } diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index bc647d86e..b1fad275c 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -25,7 +25,7 @@ @Table(name = "table_stats") @Getter @Setter -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class TableStatsRow { diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index f8fe90b0c..61e0316e5 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -18,16 +18,22 @@ public interface TableOperationsHistoryRepository */ @Query( "SELECT r FROM TableOperationsHistoryRow r " - + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:status IS NULL OR r.status = :status) " + "AND (:since IS NULL OR r.completedAt >= :since) " + + "AND (:until IS NULL OR r.completedAt < :until) " + "ORDER BY r.completedAt DESC") List find( - @Param("operationType") String operationType, + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, @Param("tableUuid") String tableUuid, + @Param("operationType") String operationType, @Param("status") String status, @Param("since") Instant since, + @Param("until") Instant until, Pageable pageable); /** diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle index c05c7f9c3..31051b65c 100644 --- a/services/optimizer/build.gradle +++ b/services/optimizer/build.gradle @@ -4,6 +4,7 @@ plugins { } dependencies { + implementation project(':apps:optimizer-data') implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 79fce5b8f..b14156d5b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; @@ -50,7 +50,7 @@ public ResponseEntity> listHistory( @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid, @RequestParam(required = false) OperationType operationType, - @RequestParam(required = false) OperationHistoryStatus status, + @RequestParam(required = false) HistoryStatus status, @RequestParam(required = false) Instant since, @RequestParam(required = false) Instant until, @RequestParam(defaultValue = "100") int limit) { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java index 8c0b17462..db9acc27e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java @@ -1,5 +1,11 @@ package com.linkedin.openhouse.optimizer.api.mapper; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; @@ -14,10 +20,16 @@ * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. * *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. + * + *

Type-conversion helpers below bridge the entity's raw String/JSON shape (apps/optimizer + * entities use Strings at the JPA boundary for portability) and the wire DTO's typed enums and + * nested objects. */ @Mapper(componentModel = "spring") public interface OptimizerMapper { + ObjectMapper JSON = new ObjectMapper(); + /** Map a {@link TableOperationsRow} to its DTO. */ TableOperationsDto toDto(TableOperationsRow row); @@ -29,4 +41,52 @@ public interface OptimizerMapper { /** Map a {@link TableStatsHistoryRow} to its DTO. */ TableStatsHistoryDto toDto(TableStatsHistoryRow row); + + // --- entity String ↔ wire enum/object helpers --- + + default OperationType toOperationType(String value) { + return value == null ? null : OperationType.valueOf(value); + } + + default String fromOperationType(OperationType value) { + return value == null ? null : value.name(); + } + + default OperationStatus toOperationStatus(String value) { + return value == null ? null : OperationStatus.valueOf(value); + } + + default String fromOperationStatus(OperationStatus value) { + return value == null ? null : value.name(); + } + + default HistoryStatus toHistoryStatus(String value) { + return value == null ? null : HistoryStatus.valueOf(value); + } + + default String fromHistoryStatus(HistoryStatus value) { + return value == null ? null : value.name(); + } + + default JobResult toJobResult(String json) { + if (json == null) { + return null; + } + try { + return JSON.readValue(json, JobResult.class); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to parse JobResult JSON from DB", e); + } + } + + default String fromJobResult(JobResult value) { + if (value == null) { + return null; + } + try { + return JSON.writeValueAsString(value); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize JobResult to JSON", e); + } + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 35f7ba782..4f3f6535a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -18,7 +18,7 @@ public class CompleteOperationRequest { /** Outcome of the operation. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Error details on failure; {@code null} on success. */ private JobResult result; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java similarity index 78% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index 791d910a6..2fbcf6235 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Terminal states for a completed Spark maintenance job. */ -public enum OperationHistoryStatus { +public enum HistoryStatus { SUCCESS, FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 2a901ad2b..a7a9d9dc6 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -27,7 +27,7 @@ public class TableOperationsHistoryDto { private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Job ID from the Jobs Service. */ private String jobId; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java deleted file mode 100644 index 64c99061a..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ /dev/null @@ -1,50 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** Combined stats payload stored as a single JSON blob per table. */ -@Data -@Builder(toBuilder = true) -@NoArgsConstructor -@AllArgsConstructor -@JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { - - /** Snapshot fields — overwritten on every upsert. */ - private SnapshotMetrics snapshot; - - /** Delta fields — accumulated across commit events. */ - private CommitDelta delta; - - /** Point-in-time metadata read from Iceberg at scan time. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class SnapshotMetrics { - private String clusterId; - private String tableVersion; - private String tableLocation; - private Long tableSizeBytes; - /** Total number of data files as of the latest snapshot — used for bin-packing. */ - private Long numCurrentFiles; - } - - /** Per-commit incremental counters; accumulated across all recorded commit events. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class CommitDelta { - private Long numFilesAdded; - private Long numFilesDeleted; - private Long addedSizeBytes; - private Long deletedSizeBytes; - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 81dd6b802..4aad1e18f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 4a994fdb3..6d515a543 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 02290bad5..8bb317676 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java deleted file mode 100644 index 4c9bfbe76..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.linkedin.openhouse.optimizer.config; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import java.io.IOException; -import javax.persistence.AttributeConverter; -import javax.persistence.Converter; - -/** JPA {@link AttributeConverter} that serializes {@link JobResult} to/from a JSON string. */ -@Converter -public class JobResultConverter implements AttributeConverter { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @Override - public String convertToDatabaseColumn(JobResult attribute) { - if (attribute == null) { - return null; - } - try { - return OBJECT_MAPPER.writeValueAsString(attribute); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize JobResult to JSON", e); - } - } - - @Override - public JobResult convertToEntityAttribute(String dbData) { - if (dbData == null) { - return null; - } - try { - return OBJECT_MAPPER.readValue(dbData, JobResult.class); - } catch (IOException e) { - throw new IllegalStateException("Failed to deserialize JobResult from JSON: " + dbData, e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java deleted file mode 100644 index 3b6ced892..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ /dev/null @@ -1,84 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.config.JobResultConverter; -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Convert; -import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; - -/** - * Append-only record of a completed maintenance operation. - * - *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the - * originating {@code table_operations.id}, tying each history entry back to the operation cycle - * that produced it. Multiple runs of the same operation on the same table produce multiple rows - * (each cycle gets a new UUID from the Analyzer). - */ -@Entity -@Table( - name = "table_operations_history", - indexes = { - @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), - @Index(name = "idx_op_type_hist", columnList = "operation_type"), - @Index(name = "idx_completed_at", columnList = "completed_at"), - @Index(name = "idx_status_hist", columnList = "status"), - @Index(name = "idx_job_id", columnList = "job_id"), - @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableOperationsHistoryRow { - - /** Same UUID as the originating {@code table_operations.id}. Set by the caller; not generated. */ - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Enumerated(EnumType.STRING) - @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; - - /** When the operation completed, as recorded by the complete endpoint. */ - @Column(name = "completed_at", nullable = false) - private Instant completedAt; - - /** {@code SUCCESS} or {@code FAILED}. */ - @Enumerated(EnumType.STRING) - @Column(name = "status", nullable = false, length = 20) - private OperationHistoryStatus status; - - /** Spark job ID; indexed for job → result lookups. */ - @Column(name = "job_id", length = 255) - private String jobId; - - /** Job result: error details on failure, both fields null on success. */ - @Convert(converter = JobResultConverter.class) - @Column(name = "result") - private JobResult result; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java deleted file mode 100644 index 43778495a..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ /dev/null @@ -1,88 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; - -/** - * JPA entity representing an Analyzer recommendation for a table maintenance operation. - * - *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row - * when it first recommends an operation for a table, or when re-recommending after a prior terminal - * state (SUCCESS/FAILED). Old terminal rows accumulate — they serve as implicit history. {@code - * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). - * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, - * operation_type)} at a time. - */ -@Entity -@Table( - name = "table_operations", - indexes = { - @Index(name = "idx_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_op_type", columnList = "operation_type"), - @Index(name = "idx_status", columnList = "status"), - @Index(name = "idx_created_at", columnList = "created_at"), - @Index(name = "idx_scheduled_at", columnList = "scheduled_at") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableOperationsRow { - - /** Client-generated UUID identifying this specific operation recommendation. */ - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Enumerated(EnumType.STRING) - @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; - - @Enumerated(EnumType.STRING) - @Column(name = "status", nullable = false, length = 20) - private OperationStatus status; - - /** When the Analyzer first created this row. Set by the service on insert; never updated. */ - @Column(name = "created_at", nullable = false) - private Instant createdAt; - - /** Set when the operation is claimed; {@code null} while {@code PENDING}. */ - @Column(name = "scheduled_at") - private Instant scheduledAt; - - /** Job ID returned by the Jobs Service after successful submission. */ - @Column(name = "job_id", length = 255) - private String jobId; - - /** - * Manual optimistic lock for the Scheduler claim. Incremented by the raw {@code claimOperation} - * UPDATE query; must NOT use JPA {@code @Version} since the claim bypasses JPA entity management. - */ - @Column(name = "version") - private Long version; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java deleted file mode 100644 index b0d92fc81..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ /dev/null @@ -1,61 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.api.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * Append-only record of per-commit stats reported by the Tables Service. - * - *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can - * query this table to reconstruct change rates over arbitrary time windows. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table( - name = "table_stats_history", - indexes = { - @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableStatsHistoryRow { - - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Column(name = "recorded_at", nullable = false) - private Instant recordedAt; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java deleted file mode 100644 index f682a3485..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.api.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import java.util.Map; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * JPA entity representing a per-table stats snapshot in the optimizer DB. - * - *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA - * to enumerate tables and check scheduling eligibility. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table(name = "table_stats") -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableStatsRow { - - @Id - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Type(type = "json") - @Column(name = "table_properties", columnDefinition = "TEXT") - private Map tableProperties; - - /** Set on every upsert. Used for stats pipeline staleness monitoring. */ - @Column(name = "updated_at", nullable = false) - private Instant updatedAt; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java deleted file mode 100644 index 65d62818c..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ /dev/null @@ -1,45 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import java.time.Instant; -import java.util.List; -import org.springframework.data.domain.Pageable; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; -import org.springframework.stereotype.Repository; - -/** - * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is the UUID set by the caller - * (same UUID as the originating {@code table_operations.id}). - */ -@Repository -public interface TableOperationsHistoryRepository - extends JpaRepository { - - /** - * Return history rows matching the given filters, ordered by {@code completedAt} descending. - * Every parameter is optional — pass {@code null} to skip that filter. - */ - @Query( - "SELECT r FROM TableOperationsHistoryRow r " - + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " - + "AND (:operationType IS NULL OR r.operationType = :operationType) " - + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.completedAt >= :since) " - + "AND (:until IS NULL OR r.completedAt <= :until) " - + "ORDER BY r.completedAt DESC") - List find( - @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid, - @Param("operationType") OperationType operationType, - @Param("status") OperationHistoryStatus status, - @Param("since") Instant since, - @Param("until") Instant until, - Pageable pageable); -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java deleted file mode 100644 index 891322134..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; -import java.util.List; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; -import org.springframework.stereotype.Repository; - -/** Repository for {@link TableOperationsRow}. PK is the client-generated UUID {@code id}. */ -@Repository -public interface TableOperationsRepository extends JpaRepository { - - /** - * Return operations matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. No filters returns all rows. - */ - @Query( - "SELECT r FROM TableOperationsRow r " - + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " - + "AND (:status IS NULL OR r.status = :status) " - + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") - List find( - @Param("operationType") OperationType operationType, - @Param("status") OperationStatus status, - @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid); -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java deleted file mode 100644 index aaa1b0050..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import java.time.Instant; -import java.util.List; -import org.springframework.data.domain.Pageable; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Append-only repository for per-commit stats history rows. */ -public interface TableStatsHistoryRepository extends JpaRepository { - - /** - * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the - * time filter. - * - * @param tableUuid the stable table UUID - * @param since inclusive lower bound on recorded_at; {@code null} to skip - * @param pageable use {@code PageRequest.of(0, limit)} to cap results - */ - @Query( - "SELECT r FROM TableStatsHistoryRow r " - + "WHERE r.tableUuid = :tableUuid " - + "AND (:since IS NULL OR r.recordedAt >= :since) " - + "ORDER BY r.recordedAt DESC") - List find( - @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java deleted file mode 100644 index 9bcaab41b..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; -import java.util.List; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Spring Data JPA repository for reading and writing {@code table_stats} rows. */ -public interface TableStatsRepository extends JpaRepository { - - /** - * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. No filters returns all rows. - */ - @Query( - "SELECT r FROM TableStatsRow r " - + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") - List find( - @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid); -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index 1c17d7a38..2909b8b5a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.service; import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; @@ -93,7 +93,7 @@ List listHistory( Optional tableName, Optional tableUuid, Optional operationType, - Optional status, + Optional status, Optional since, Optional until, int limit); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index de4faa465..0c9af5107 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -2,7 +2,7 @@ import com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper; import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; @@ -49,11 +49,11 @@ public List listTableOperations( Optional tableUuid) { return operationsRepository .find( - operationType.orElse(null), - status.orElse(null), + operationType.map(OperationType::name).orElse(null), + status.map(OperationStatus::name).orElse(null), + tableUuid.orElse(null), databaseName.orElse(null), - tableName.orElse(null), - tableUuid.orElse(null)) + tableName.orElse(null)) .stream() .map(mapper::toDto) .collect(Collectors.toList()); @@ -75,9 +75,9 @@ public Optional completeOperation( .tableName(row.getTableName()) .operationType(row.getOperationType()) .completedAt(Instant.now()) - .status(request.getStatus()) + .status(request.getStatus().name()) .jobId(row.getJobId()) - .result(request.getResult()) + .result(mapper.fromJobResult(request.getResult())) .build(); return mapper.toDto(historyRepository.save(historyRow)); }); @@ -165,11 +165,11 @@ public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto) { .tableUuid(dto.getTableUuid()) .databaseName(dto.getDatabaseName()) .tableName(dto.getTableName()) - .operationType(dto.getOperationType()) + .operationType(dto.getOperationType() != null ? dto.getOperationType().name() : null) .completedAt(dto.getCompletedAt() != null ? dto.getCompletedAt() : Instant.now()) - .status(dto.getStatus()) + .status(dto.getStatus() != null ? dto.getStatus().name() : null) .jobId(dto.getJobId()) - .result(dto.getResult()) + .result(mapper.fromJobResult(dto.getResult())) .build(); return mapper.toDto(historyRepository.save(row)); } @@ -188,7 +188,7 @@ public List listHistory( Optional tableName, Optional tableUuid, Optional operationType, - Optional status, + Optional status, Optional since, Optional until, int limit) { @@ -197,8 +197,8 @@ public List listHistory( databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null), - operationType.orElse(null), - status.orElse(null), + operationType.map(OperationType::name).orElse(null), + status.map(HistoryStatus::name).orElse(null), since.orElse(null), until.orElse(null), PageRequest.of(0, limit)) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index b9735a617..7f0879d83 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -2,8 +2,7 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; import java.time.Instant; @@ -35,9 +34,9 @@ void appendAndFindByTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(t1) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .jobId("job-001") .build()); @@ -47,11 +46,11 @@ void appendAndFindByTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(t2) - .status(OperationHistoryStatus.FAILED) + .status(HistoryStatus.FAILED.name()) .jobId("job-002") - .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) + .result("{\"errorMessage\":\"out of memory\",\"errorType\":\"OOM\"}") .build()); List rows = @@ -74,9 +73,9 @@ void appendIsNonDestructive_multipleRunsRetained() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(now.plusSeconds(i)) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .build()); } @@ -96,9 +95,9 @@ void find_respectsLimit() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl3") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(now.plusSeconds(i)) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .build()); } @@ -119,9 +118,9 @@ void find_noParams_returnsAll() { .tableUuid(uuid1) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(now) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .build()); repository.save( TableOperationsHistoryRow.builder() @@ -129,16 +128,16 @@ void find_noParams_returnsAll() { .tableUuid(uuid2) .databaseName("db2") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(now.plusSeconds(1)) - .status(OperationHistoryStatus.FAILED) + .status(HistoryStatus.FAILED.name()) .build()); List rows = repository.find(null, null, null, null, null, null, null, PageRequest.of(0, 100)); assertThat(rows).hasSize(2); // Newest first - assertThat(rows.get(0).getStatus()).isEqualTo(OperationHistoryStatus.FAILED); + assertThat(rows.get(0).getStatus()).isEqualTo(HistoryStatus.FAILED.name()); } @Test @@ -153,9 +152,9 @@ void find_byStatusAndTimeWindow() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(old) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .build()); repository.save( TableOperationsHistoryRow.builder() @@ -163,9 +162,9 @@ void find_byStatusAndTimeWindow() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(recent) - .status(OperationHistoryStatus.FAILED) + .status(HistoryStatus.FAILED.name()) .build()); // Filter by status @@ -175,7 +174,7 @@ void find_byStatusAndTimeWindow() { null, null, null, - OperationHistoryStatus.FAILED, + HistoryStatus.FAILED.name(), null, null, PageRequest.of(0, 100)); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index b1342b12d..2ca8dc61e 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -32,8 +32,8 @@ void saveAndFindById() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build(); @@ -41,7 +41,7 @@ void saveAndFindById() { Optional found = repository.findById(id); assertThat(found).isPresent(); - assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING); + assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING.name()); } @Test @@ -52,8 +52,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); repository.save( @@ -62,8 +62,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.SCHEDULED.name()) .createdAt(Instant.now()) .build()); @@ -79,8 +79,8 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); repository.save( @@ -89,20 +89,20 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.SCHEDULED.name()) .createdAt(Instant.now()) .build()); List pending = - repository.find(null, OperationStatus.PENDING, null, null, null); + repository.find(null, OperationStatus.PENDING.name(), null, null, null); assertThat(pending).hasSize(1); - assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); + assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING.name()); List scheduled = - repository.find(null, OperationStatus.SCHEDULED, null, null, null); + repository.find(null, OperationStatus.SCHEDULED.name(), null, null, null); assertThat(scheduled).hasSize(1); - assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); + assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED.name()); } @Test @@ -113,8 +113,8 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); repository.save( @@ -123,13 +123,13 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db2") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); - assertThat(repository.find(null, null, "db1", null, null)).hasSize(1); - assertThat(repository.find(null, null, "db2", "tbl2", null)).hasSize(1); - assertThat(repository.find(null, null, "db1", "tbl2", null)).isEmpty(); + assertThat(repository.find(null, null, null, "db1", null)).hasSize(1); + assertThat(repository.find(null, null, null, "db2", "tbl2")).hasSize(1); + assertThat(repository.find(null, null, null, "db1", "tbl2")).isEmpty(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index f3e72b52e..475196630 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.List; diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index b62371f53..240d512ef 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import java.util.Optional; diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 10605c002..17ab55278 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -3,16 +3,16 @@ import static org.assertj.core.api.Assertions.assertThat; import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -50,8 +50,8 @@ void completeOperation_writesHistoryFromOperationRow() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.SCHEDULED.name()) .createdAt(Instant.now()) .scheduledAt(Instant.now()) .jobId("spark-job-123") @@ -59,10 +59,10 @@ void completeOperation_writesHistoryFromOperationRow() { Optional result = service.completeOperation( - id, CompleteOperationRequest.builder().status(OperationHistoryStatus.SUCCESS).build()); + id, CompleteOperationRequest.builder().status(HistoryStatus.SUCCESS).build()); assertThat(result).isPresent(); - assertThat(result.get().getStatus()).isEqualTo(OperationHistoryStatus.SUCCESS); + assertThat(result.get().getStatus()).isEqualTo(HistoryStatus.SUCCESS); assertThat(result.get().getTableUuid()).isEqualTo(tableUuid); assertThat(result.get().getJobId()).isEqualTo("spark-job-123"); assertThat(result.get().getOperationType()).isEqualTo(OperationType.ORPHAN_FILES_DELETION); @@ -76,7 +76,7 @@ void completeOperation_notFound_returnsEmpty() { service.completeOperation( UUID.randomUUID().toString(), CompleteOperationRequest.builder() - .status(OperationHistoryStatus.FAILED) + .status(HistoryStatus.FAILED) .result( JobResult.builder().errorMessage("boom").errorType("RuntimeException").build()) .build()); diff --git a/settings.gradle b/settings.gradle index 0d64dad53..c5544a193 100644 --- a/settings.gradle +++ b/settings.gradle @@ -51,6 +51,7 @@ include ':services:housetables' include ':services:jobs' include ':services:optimizer' include ':apps:optimizer' +project(':apps:optimizer').name = 'optimizer-data' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From d90c26fcb36952e94d29e53a12049afc37be395b Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 10:05:07 -0700 Subject: [PATCH 25/81] refactor(optimizer): move apps/optimizer module into services/optimizer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit apps/optimizer was a misplaced shared library duplicating the JPA layer. services/optimizer is the canonical optimizer module — schedulers and analyzers embed it directly as a library. This commit consolidates by moving the JPA entities, repositories, and in-memory domain model from apps/optimizer into services/optimizer, deleting the apps module, and updating the service-side wiring + tests accordingly. - git mv 13 files (entities/repos/model) from apps/optimizer to services/optimizer; preserves history. - Delete services-side pre-R7 duplicates: 4 entities, 4 repos, the duplicate api/model/TableStats DTO, the now-unneeded JobResultConverter. - Rename services-side wire-API enum OperationHistoryStatus → HistoryStatus. - Drop the apps/optimizer module entry from settings.gradle. - OptimizerMapper: add String↔OperationType, String↔OperationStatus, String↔HistoryStatus, String↔JobResult default helpers so MapStruct can bridge the entity (String at JPA boundary) and the wire DTOs. - Update DTOs that import TableStats/HistoryStatus to the new package locations. - Rewrite repo tests against the simplified history repo shape and fix a stale find(...) positional-arg signature in the operations repo test. --- apps/optimizer/build.gradle | 13 -- .../entity/TableOperationsHistoryRow.java | 43 ------ .../optimizer/entity/TableOperationsRow.java | 61 -------- .../entity/TableStatsHistoryRow.java | 61 -------- .../optimizer/entity/TableStatsRow.java | 53 ------- .../TableOperationsHistoryRepository.java | 52 ------- .../repository/TableOperationsRepository.java | 29 ---- .../TableStatsHistoryRepository.java | 29 ---- .../repository/TableStatsRepository.java | 33 ----- .../optimizer/api/mapper/OptimizerMapper.java | 59 ++++++++ .../api/model/CompleteOperationRequest.java | 2 +- ...nHistoryStatus.java => HistoryStatus.java} | 2 +- .../api/model/TableOperationsHistoryDto.java | 2 +- .../optimizer/api/model/TableStats.java | 50 ------- .../optimizer/api/model/TableStatsDto.java | 1 + .../api/model/TableStatsHistoryDto.java | 1 + .../api/model/UpsertTableStatsRequest.java | 1 + .../optimizer/config/JobResultConverter.java | 39 ------ .../entity/TableOperationsHistoryRow.java | 53 ++----- .../optimizer/entity/TableOperationsRow.java | 57 ++------ .../entity/TableStatsHistoryRow.java | 4 +- .../optimizer/entity/TableStatsRow.java | 20 ++- .../optimizer/model/HistoryStatus.java | 0 .../optimizer/model/OperationStatus.java | 0 .../optimizer/model/OperationType.java | 0 .../openhouse/optimizer/model/Table.java | 0 .../optimizer/model/TableOperation.java | 0 .../openhouse/optimizer/model/TableStats.java | 0 .../TableOperationsHistoryRepository.java | 51 +++---- .../repository/TableOperationsRepository.java | 20 ++- .../repository/TableStatsRepository.java | 12 +- .../TableOperationsHistoryRepositoryTest.java | 131 +++++------------- .../TableOperationsRepositoryTest.java | 44 +++--- .../TableStatsHistoryRepositoryTest.java | 2 +- .../repository/TableStatsRepositoryTest.java | 2 +- settings.gradle | 1 - 36 files changed, 200 insertions(+), 728 deletions(-) delete mode 100644 apps/optimizer/build.gradle delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationHistoryStatus.java => HistoryStatus.java} (78%) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java (100%) rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java (100%) rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java (100%) rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java (100%) rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java (100%) rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java (100%) diff --git a/apps/optimizer/build.gradle b/apps/optimizer/build.gradle deleted file mode 100644 index f14969274..000000000 --- a/apps/optimizer/build.gradle +++ /dev/null @@ -1,13 +0,0 @@ -plugins { - id 'openhouse.java-minimal-conventions' -} - -// Avoid build-directory collision with services:optimizer (same project.name 'optimizer'). -buildDir = "${rootProject.buildDir}/apps-optimizer" - -dependencies { - implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' - implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' - testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' - testRuntimeOnly 'com.h2database:h2' -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java deleted file mode 100644 index e5ff2bd01..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ /dev/null @@ -1,43 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Getter; -import lombok.NoArgsConstructor; - -/** Lightweight JPA entity for reading {@code table_operations_history} rows. */ -@Entity -@Table(name = "table_operations_history") -@Getter -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class TableOperationsHistoryRow { - - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Column(name = "operation_type", nullable = false, length = 50) - private String operationType; - - @Column(name = "completed_at", nullable = false) - private Instant completedAt; - - @Column(name = "status", nullable = false, length = 20) - private String status; -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java deleted file mode 100644 index 0e23761ae..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ /dev/null @@ -1,61 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; - -/** JPA entity mapping to the {@code table_operations} table in the optimizer DB. */ -@Entity -@Table(name = "table_operations") -@Getter -@Setter -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class TableOperationsRow { - - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Column(name = "operation_type", nullable = false, length = 50) - private String operationType; - - @Column(name = "status", nullable = false, length = 20) - private String status; - - @Column(name = "created_at") - private Instant createdAt; - - @Column(name = "scheduled_at") - private Instant scheduledAt; - - @Column(name = "job_id", length = 255) - private String jobId; - - /** - * Monotonically-increasing version for application-level optimistic concurrency control. The - * scheduler's CAS transitions (e.g. {@code markScheduling}, {@code markScheduled}) match this - * value in the WHERE clause and bump it by one on UPDATE, ensuring two scheduler instances can't - * both move the same row out of PENDING. Not managed by JPA optimistic locking — kept as a plain - * column so the WHERE-clause-based CAS pattern works portably across MySQL and H2. - */ - @Column(name = "version") - private Long version; -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java deleted file mode 100644 index 6f41881d6..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ /dev/null @@ -1,61 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * Append-only record of per-commit stats reported by the Tables Service. - * - *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers - * query this table to reconstruct change rates over arbitrary time windows. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table( - name = "table_stats_history", - indexes = { - @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableStatsHistoryRow { - - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Column(name = "recorded_at", nullable = false) - private Instant recordedAt; -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java deleted file mode 100644 index bc647d86e..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import java.util.Map; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every - * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table(name = "table_stats") -@Getter -@Setter -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class TableStatsRow { - - @Id - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Type(type = "json") - @Column(name = "table_properties", columnDefinition = "TEXT") - private Map tableProperties; - - @Column(name = "updated_at", nullable = false) - private Instant updatedAt; -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java deleted file mode 100644 index f8fe90b0c..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ /dev/null @@ -1,52 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import java.time.Instant; -import java.util.List; -import org.springframework.data.domain.Pageable; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Repository for reading {@code table_operations_history} in the Analyzer. */ -public interface TableOperationsHistoryRepository - extends JpaRepository { - - /** - * Return history rows matching the given filters, ordered by {@code completedAt} descending. - * Every parameter is optional — pass {@code null} to skip that filter. - */ - @Query( - "SELECT r FROM TableOperationsHistoryRow r " - + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " - + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.completedAt >= :since) " - + "ORDER BY r.completedAt DESC") - List find( - @Param("operationType") String operationType, - @Param("tableUuid") String tableUuid, - @Param("status") String status, - @Param("since") Instant since, - Pageable pageable); - - /** - * Return the most-recent history row per {@code (table_uuid, operation_type)}, filtered to a - * single operation type. Used by the Analyzer to evaluate cadence without materializing every - * historical row. - * - *

The correlated subquery is portable across MySQL and H2 (MySQL mode). Backed by index {@code - * idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at)} on {@code - * table_operations_history}, the subquery becomes an index-only lookup per outer row. - * - *

Ties on {@code completed_at} for the same {@code (table_uuid, operation_type)} return all - * tied rows; callers should dedupe in memory. - */ - @Query( - "SELECT r FROM TableOperationsHistoryRow r " - + "WHERE r.operationType = :operationType " - + "AND r.completedAt = (" - + " SELECT MAX(r2.completedAt) FROM TableOperationsHistoryRow r2 " - + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") - List findLatestPerTable(@Param("operationType") String operationType); -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java deleted file mode 100644 index c7a08cabc..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; -import java.util.List; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ -public interface TableOperationsRepository extends JpaRepository { - - /** - * Return operations matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. - */ - @Query( - "SELECT r FROM TableOperationsRow r " - + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " - + "AND (:status IS NULL OR r.status = :status) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " - + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName)") - List find( - @Param("operationType") String operationType, - @Param("status") String status, - @Param("tableUuid") String tableUuid, - @Param("databaseName") String databaseName, - @Param("tableName") String tableName); -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java deleted file mode 100644 index aaa1b0050..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import java.time.Instant; -import java.util.List; -import org.springframework.data.domain.Pageable; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Append-only repository for per-commit stats history rows. */ -public interface TableStatsHistoryRepository extends JpaRepository { - - /** - * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the - * time filter. - * - * @param tableUuid the stable table UUID - * @param since inclusive lower bound on recorded_at; {@code null} to skip - * @param pageable use {@code PageRequest.of(0, limit)} to cap results - */ - @Query( - "SELECT r FROM TableStatsHistoryRow r " - + "WHERE r.tableUuid = :tableUuid " - + "AND (:since IS NULL OR r.recordedAt >= :since) " - + "ORDER BY r.recordedAt DESC") - List find( - @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java deleted file mode 100644 index 4215237bc..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; -import java.util.List; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Spring Data JPA repository for {@code table_stats} rows in the optimizer DB. */ -public interface TableStatsRepository extends JpaRepository { - - /** - * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. - */ - @Query( - "SELECT r FROM TableStatsRow r " - + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") - List find( - @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid); - - /** - * Return the distinct {@code database_name} values present in {@code table_stats}. Used by the - * Analyzer to enumerate databases when iterating per-db; the result set size is bounded by the - * number of databases (small even at million-table scale). - */ - @Query("SELECT DISTINCT r.databaseName FROM TableStatsRow r") - List findDistinctDatabaseNames(); -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java index 8c0b17462..ddf33a30f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java @@ -1,5 +1,11 @@ package com.linkedin.openhouse.optimizer.api.mapper; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; @@ -14,10 +20,15 @@ * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. * *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. + * + *

Type-conversion helpers bridge the entity's raw String/JSON shape (the entities use Strings at + * the JPA boundary for portability) and the wire DTO's typed enums and nested objects. */ @Mapper(componentModel = "spring") public interface OptimizerMapper { + ObjectMapper JSON = new ObjectMapper(); + /** Map a {@link TableOperationsRow} to its DTO. */ TableOperationsDto toDto(TableOperationsRow row); @@ -29,4 +40,52 @@ public interface OptimizerMapper { /** Map a {@link TableStatsHistoryRow} to its DTO. */ TableStatsHistoryDto toDto(TableStatsHistoryRow row); + + // --- entity String ↔ wire enum/object helpers --- + + default OperationType toOperationType(String value) { + return value == null ? null : OperationType.valueOf(value); + } + + default String fromOperationType(OperationType value) { + return value == null ? null : value.name(); + } + + default OperationStatus toOperationStatus(String value) { + return value == null ? null : OperationStatus.valueOf(value); + } + + default String fromOperationStatus(OperationStatus value) { + return value == null ? null : value.name(); + } + + default HistoryStatus toHistoryStatus(String value) { + return value == null ? null : HistoryStatus.valueOf(value); + } + + default String fromHistoryStatus(HistoryStatus value) { + return value == null ? null : value.name(); + } + + default JobResult toJobResult(String json) { + if (json == null) { + return null; + } + try { + return JSON.readValue(json, JobResult.class); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to parse JobResult JSON from DB", e); + } + } + + default String fromJobResult(JobResult value) { + if (value == null) { + return null; + } + try { + return JSON.writeValueAsString(value); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize JobResult to JSON", e); + } + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 35f7ba782..4f3f6535a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -18,7 +18,7 @@ public class CompleteOperationRequest { /** Outcome of the operation. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Error details on failure; {@code null} on success. */ private JobResult result; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java similarity index 78% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index 791d910a6..2fbcf6235 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Terminal states for a completed Spark maintenance job. */ -public enum OperationHistoryStatus { +public enum HistoryStatus { SUCCESS, FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 2a901ad2b..a7a9d9dc6 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -27,7 +27,7 @@ public class TableOperationsHistoryDto { private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Job ID from the Jobs Service. */ private String jobId; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java deleted file mode 100644 index 64c99061a..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ /dev/null @@ -1,50 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** Combined stats payload stored as a single JSON blob per table. */ -@Data -@Builder(toBuilder = true) -@NoArgsConstructor -@AllArgsConstructor -@JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { - - /** Snapshot fields — overwritten on every upsert. */ - private SnapshotMetrics snapshot; - - /** Delta fields — accumulated across commit events. */ - private CommitDelta delta; - - /** Point-in-time metadata read from Iceberg at scan time. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class SnapshotMetrics { - private String clusterId; - private String tableVersion; - private String tableLocation; - private Long tableSizeBytes; - /** Total number of data files as of the latest snapshot — used for bin-packing. */ - private Long numCurrentFiles; - } - - /** Per-commit incremental counters; accumulated across all recorded commit events. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class CommitDelta { - private Long numFilesAdded; - private Long numFilesDeleted; - private Long addedSizeBytes; - private Long deletedSizeBytes; - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 81dd6b802..4aad1e18f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 4a994fdb3..6d515a543 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 02290bad5..8bb317676 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java deleted file mode 100644 index 4c9bfbe76..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.linkedin.openhouse.optimizer.config; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import java.io.IOException; -import javax.persistence.AttributeConverter; -import javax.persistence.Converter; - -/** JPA {@link AttributeConverter} that serializes {@link JobResult} to/from a JSON string. */ -@Converter -public class JobResultConverter implements AttributeConverter { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @Override - public String convertToDatabaseColumn(JobResult attribute) { - if (attribute == null) { - return null; - } - try { - return OBJECT_MAPPER.writeValueAsString(attribute); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize JobResult to JSON", e); - } - } - - @Override - public JobResult convertToEntityAttribute(String dbData) { - if (dbData == null) { - return null; - } - try { - return OBJECT_MAPPER.readValue(dbData, JobResult.class); - } catch (IOException e) { - throw new IllegalStateException("Failed to deserialize JobResult from JSON: " + dbData, e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index 3b6ced892..09eb7fc21 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -1,52 +1,24 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.config.JobResultConverter; import java.time.Instant; import javax.persistence.Column; -import javax.persistence.Convert; import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; import javax.persistence.Id; -import javax.persistence.Index; import javax.persistence.Table; -import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; -/** - * Append-only record of a completed maintenance operation. - * - *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the - * originating {@code table_operations.id}, tying each history entry back to the operation cycle - * that produced it. Multiple runs of the same operation on the same table produce multiple rows - * (each cycle gets a new UUID from the Analyzer). - */ +/** Lightweight JPA entity for reading {@code table_operations_history} rows. */ @Entity -@Table( - name = "table_operations_history", - indexes = { - @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), - @Index(name = "idx_op_type_hist", columnList = "operation_type"), - @Index(name = "idx_completed_at", columnList = "completed_at"), - @Index(name = "idx_status_hist", columnList = "status"), - @Index(name = "idx_job_id", columnList = "job_id"), - @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") - }) +@Table(name = "table_operations_history") @Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) +@Builder +@NoArgsConstructor +@AllArgsConstructor public class TableOperationsHistoryRow { - /** Same UUID as the originating {@code table_operations.id}. Set by the caller; not generated. */ @Id @Column(name = "id", nullable = false, length = 36) private String id; @@ -60,25 +32,18 @@ public class TableOperationsHistoryRow { @Column(name = "table_name", nullable = false, length = 128) private String tableName; - @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; + private String operationType; - /** When the operation completed, as recorded by the complete endpoint. */ @Column(name = "completed_at", nullable = false) private Instant completedAt; - /** {@code SUCCESS} or {@code FAILED}. */ - @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) - private OperationHistoryStatus status; + private String status; - /** Spark job ID; indexed for job → result lookups. */ @Column(name = "job_id", length = 255) private String jobId; - /** Job result: error details on failure, both fields null on success. */ - @Convert(converter = JobResultConverter.class) - @Column(name = "result") - private JobResult result; + @Column(name = "result", columnDefinition = "TEXT") + private String result; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index 43778495a..0e23761ae 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -1,55 +1,30 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; import java.time.Instant; import javax.persistence.Column; import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; import javax.persistence.Id; -import javax.persistence.Index; import javax.persistence.Table; -import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; +import lombok.Setter; -/** - * JPA entity representing an Analyzer recommendation for a table maintenance operation. - * - *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row - * when it first recommends an operation for a table, or when re-recommending after a prior terminal - * state (SUCCESS/FAILED). Old terminal rows accumulate — they serve as implicit history. {@code - * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). - * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, - * operation_type)} at a time. - */ +/** JPA entity mapping to the {@code table_operations} table in the optimizer DB. */ @Entity -@Table( - name = "table_operations", - indexes = { - @Index(name = "idx_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_op_type", columnList = "operation_type"), - @Index(name = "idx_status", columnList = "status"), - @Index(name = "idx_created_at", columnList = "created_at"), - @Index(name = "idx_scheduled_at", columnList = "scheduled_at") - }) +@Table(name = "table_operations") @Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor public class TableOperationsRow { - /** Client-generated UUID identifying this specific operation recommendation. */ @Id @Column(name = "id", nullable = false, length = 36) private String id; - /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; @@ -59,29 +34,27 @@ public class TableOperationsRow { @Column(name = "table_name", nullable = false, length = 128) private String tableName; - @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; + private String operationType; - @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) - private OperationStatus status; + private String status; - /** When the Analyzer first created this row. Set by the service on insert; never updated. */ - @Column(name = "created_at", nullable = false) + @Column(name = "created_at") private Instant createdAt; - /** Set when the operation is claimed; {@code null} while {@code PENDING}. */ @Column(name = "scheduled_at") private Instant scheduledAt; - /** Job ID returned by the Jobs Service after successful submission. */ @Column(name = "job_id", length = 255) private String jobId; /** - * Manual optimistic lock for the Scheduler claim. Incremented by the raw {@code claimOperation} - * UPDATE query; must NOT use JPA {@code @Version} since the claim bypasses JPA entity management. + * Monotonically-increasing version for application-level optimistic concurrency control. The + * scheduler's CAS transitions (e.g. {@code markScheduling}, {@code markScheduled}) match this + * value in the WHERE clause and bump it by one on UPDATE, ensuring two scheduler instances can't + * both move the same row out of PENDING. Not managed by JPA optimistic locking — kept as a plain + * column so the WHERE-clause-based CAS pattern works portably across MySQL and H2. */ @Column(name = "version") private Long version; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java index b0d92fc81..6f41881d6 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.vladmihalcea.hibernate.type.json.JsonStringType; import java.time.Instant; import javax.persistence.Column; @@ -21,7 +21,7 @@ * Append-only record of per-commit stats reported by the Tables Service. * *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers * query this table to reconstruct change rates over arbitrary time windows. */ @TypeDef(name = "json", typeClass = JsonStringType.class) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index f682a3485..bc647d86e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.vladmihalcea.hibernate.type.json.JsonStringType; import java.time.Instant; import java.util.Map; @@ -8,29 +8,26 @@ import javax.persistence.Entity; import javax.persistence.Id; import javax.persistence.Table; -import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; +import lombok.Setter; import org.hibernate.annotations.Type; import org.hibernate.annotations.TypeDef; /** - * JPA entity representing a per-table stats snapshot in the optimizer DB. - * - *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA - * to enumerate tables and check scheduling eligibility. + * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every + * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. */ @TypeDef(name = "json", typeClass = JsonStringType.class) @Entity @Table(name = "table_stats") @Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor public class TableStatsRow { @Id @@ -51,7 +48,6 @@ public class TableStatsRow { @Column(name = "table_properties", columnDefinition = "TEXT") private Map tableProperties; - /** Set on every upsert. Used for stats pipeline staleness monitoring. */ @Column(name = "updated_at", nullable = false) private Instant updatedAt; } diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 65d62818c..ba2ce35a8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -1,45 +1,40 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import java.time.Instant; import java.util.List; import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; -import org.springframework.stereotype.Repository; -/** - * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is the UUID set by the caller - * (same UUID as the originating {@code table_operations.id}). - */ -@Repository +/** Repository for reading {@code table_operations_history}. */ public interface TableOperationsHistoryRepository extends JpaRepository { /** - * Return history rows matching the given filters, ordered by {@code completedAt} descending. - * Every parameter is optional — pass {@code null} to skip that filter. + * Return history rows for a single {@code tableUuid}, newest first. Used by the service-layer + * {@code getHistory} endpoint. + */ + List findByTableUuidOrderByCompletedAtDesc( + String tableUuid, Pageable pageable); + + /** + * Return the most-recent history row per {@code (table_uuid, operation_type)}, filtered to a + * single operation type. Used by the analyzer to evaluate cadence without materializing every + * historical row. + * + *

The correlated subquery is portable across MySQL and H2 (MySQL mode). Backed by index {@code + * idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at)} on {@code + * table_operations_history}, the subquery becomes an index-only lookup per outer row. + * + *

Ties on {@code completed_at} for the same {@code (table_uuid, operation_type)} return all + * tied rows; callers should dedupe in memory. */ @Query( "SELECT r FROM TableOperationsHistoryRow r " - + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " - + "AND (:operationType IS NULL OR r.operationType = :operationType) " - + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.completedAt >= :since) " - + "AND (:until IS NULL OR r.completedAt <= :until) " - + "ORDER BY r.completedAt DESC") - List find( - @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid, - @Param("operationType") OperationType operationType, - @Param("status") OperationHistoryStatus status, - @Param("since") Instant since, - @Param("until") Instant until, - Pageable pageable); + + "WHERE r.operationType = :operationType " + + "AND r.completedAt = (" + + " SELECT MAX(r2.completedAt) FROM TableOperationsHistoryRow r2 " + + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") + List findLatestPerTable(@Param("operationType") String operationType); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 891322134..c7a08cabc 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -1,33 +1,29 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; -import org.springframework.stereotype.Repository; -/** Repository for {@link TableOperationsRow}. PK is the client-generated UUID {@code id}. */ -@Repository +/** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ public interface TableOperationsRepository extends JpaRepository { /** * Return operations matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. No filters returns all rows. + * to skip that filter. */ @Query( "SELECT r FROM TableOperationsRow r " + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:status IS NULL OR r.status = :status) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + + "AND (:tableName IS NULL OR r.tableName = :tableName)") List find( - @Param("operationType") OperationType operationType, - @Param("status") OperationStatus status, + @Param("operationType") String operationType, + @Param("status") String status, + @Param("tableUuid") String tableUuid, @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid); + @Param("tableName") String tableName); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 9bcaab41b..4215237bc 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -6,12 +6,12 @@ import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; -/** Spring Data JPA repository for reading and writing {@code table_stats} rows. */ +/** Spring Data JPA repository for {@code table_stats} rows in the optimizer DB. */ public interface TableStatsRepository extends JpaRepository { /** * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. No filters returns all rows. + * to skip that filter. */ @Query( "SELECT r FROM TableStatsRow r " @@ -22,4 +22,12 @@ List find( @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); + + /** + * Return the distinct {@code database_name} values present in {@code table_stats}. Used by the + * Analyzer to enumerate databases when iterating per-db; the result set size is bounded by the + * number of databases (small even at million-table scale). + */ + @Query("SELECT DISTINCT r.databaseName FROM TableStatsRow r") + List findDistinctDatabaseNames(); } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index b9735a617..436d08066 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -2,8 +2,7 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; import java.time.Instant; @@ -24,7 +23,7 @@ class TableOperationsHistoryRepositoryTest { @Autowired TableOperationsHistoryRepository repository; @Test - void appendAndFindByTableUuid() { + void findByTableUuid_returnsRowsNewestFirst() { Instant t1 = Instant.parse("2024-01-01T10:00:00Z"); Instant t2 = Instant.parse("2024-01-02T10:00:00Z"); String tableUuid = UUID.randomUUID().toString(); @@ -35,9 +34,9 @@ void appendAndFindByTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(t1) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .jobId("job-001") .build()); @@ -47,46 +46,23 @@ void appendAndFindByTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(t2) - .status(OperationHistoryStatus.FAILED) + .status(HistoryStatus.FAILED.name()) .jobId("job-002") - .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) + .result("{\"errorMessage\":\"out of memory\",\"errorType\":\"OOM\"}") .build()); List rows = - repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); + repository.findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, 10)); assertThat(rows).hasSize(2); - // Newest first assertThat(rows.get(0).getJobId()).isEqualTo("job-002"); assertThat(rows.get(1).getJobId()).isEqualTo("job-001"); } @Test - void appendIsNonDestructive_multipleRunsRetained() { - Instant now = Instant.now(); - String tableUuid = UUID.randomUUID().toString(); - for (int i = 0; i < 3; i++) { - repository.save( - TableOperationsHistoryRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(tableUuid) - .databaseName("db1") - .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .completedAt(now.plusSeconds(i)) - .status(OperationHistoryStatus.SUCCESS) - .build()); - } - - List rows = - repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); - assertThat(rows).hasSize(3); - } - - @Test - void find_respectsLimit() { + void findByTableUuid_respectsLimit() { Instant now = Instant.now(); String tableUuid = UUID.randomUUID().toString(); for (int i = 0; i < 5; i++) { @@ -96,97 +72,62 @@ void find_respectsLimit() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl3") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(now.plusSeconds(i)) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .build()); } List rows = - repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 3)); + repository.findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @Test - void find_noParams_returnsAll() { - Instant now = Instant.now(); - String uuid1 = UUID.randomUUID().toString(); - String uuid2 = UUID.randomUUID().toString(); + void findLatestPerTable_returnsOneRowPerTableUuid() { + Instant t1 = Instant.parse("2024-01-01T10:00:00Z"); + Instant t2 = Instant.parse("2024-02-01T10:00:00Z"); + String tableUuid = UUID.randomUUID().toString(); + String otherUuid = UUID.randomUUID().toString(); repository.save( TableOperationsHistoryRow.builder() .id(UUID.randomUUID().toString()) - .tableUuid(uuid1) + .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .completedAt(now) - .status(OperationHistoryStatus.SUCCESS) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .completedAt(t1) + .status(HistoryStatus.SUCCESS.name()) .build()); - repository.save( - TableOperationsHistoryRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(uuid2) - .databaseName("db2") - .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .completedAt(now.plusSeconds(1)) - .status(OperationHistoryStatus.FAILED) - .build()); - - List rows = - repository.find(null, null, null, null, null, null, null, PageRequest.of(0, 100)); - assertThat(rows).hasSize(2); - // Newest first - assertThat(rows.get(0).getStatus()).isEqualTo(OperationHistoryStatus.FAILED); - } - - @Test - void find_byStatusAndTimeWindow() { - Instant old = Instant.parse("2024-01-01T00:00:00Z"); - Instant recent = Instant.parse("2024-06-01T00:00:00Z"); - String tableUuid = UUID.randomUUID().toString(); - repository.save( TableOperationsHistoryRow.builder() .id(UUID.randomUUID().toString()) .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .completedAt(old) - .status(OperationHistoryStatus.SUCCESS) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .completedAt(t2) + .status(HistoryStatus.FAILED.name()) .build()); repository.save( TableOperationsHistoryRow.builder() .id(UUID.randomUUID().toString()) - .tableUuid(tableUuid) + .tableUuid(otherUuid) .databaseName("db1") - .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .completedAt(recent) - .status(OperationHistoryStatus.FAILED) + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .completedAt(t1) + .status(HistoryStatus.SUCCESS.name()) .build()); - // Filter by status - List failed = - repository.find( - null, - null, - null, - null, - OperationHistoryStatus.FAILED, - null, - null, - PageRequest.of(0, 100)); - assertThat(failed).hasSize(1); - assertThat(failed.get(0).getCompletedAt()).isEqualTo(recent); + List latest = + repository.findLatestPerTable(OperationType.ORPHAN_FILES_DELETION.name()); - // Filter by time window - Instant cutoff = Instant.parse("2024-03-01T00:00:00Z"); - List afterCutoff = - repository.find(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); - assertThat(afterCutoff).hasSize(1); - assertThat(afterCutoff.get(0).getCompletedAt()).isEqualTo(recent); + assertThat(latest).hasSize(2); + TableOperationsHistoryRow forTarget = + latest.stream().filter(r -> r.getTableUuid().equals(tableUuid)).findFirst().orElseThrow(); + assertThat(forTarget.getCompletedAt()).isEqualTo(t2); + assertThat(forTarget.getStatus()).isEqualTo(HistoryStatus.FAILED.name()); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index b1342b12d..2ca8dc61e 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -32,8 +32,8 @@ void saveAndFindById() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build(); @@ -41,7 +41,7 @@ void saveAndFindById() { Optional found = repository.findById(id); assertThat(found).isPresent(); - assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING); + assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING.name()); } @Test @@ -52,8 +52,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); repository.save( @@ -62,8 +62,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.SCHEDULED.name()) .createdAt(Instant.now()) .build()); @@ -79,8 +79,8 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); repository.save( @@ -89,20 +89,20 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.SCHEDULED.name()) .createdAt(Instant.now()) .build()); List pending = - repository.find(null, OperationStatus.PENDING, null, null, null); + repository.find(null, OperationStatus.PENDING.name(), null, null, null); assertThat(pending).hasSize(1); - assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); + assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING.name()); List scheduled = - repository.find(null, OperationStatus.SCHEDULED, null, null, null); + repository.find(null, OperationStatus.SCHEDULED.name(), null, null, null); assertThat(scheduled).hasSize(1); - assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); + assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED.name()); } @Test @@ -113,8 +113,8 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); repository.save( @@ -123,13 +123,13 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db2") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); - assertThat(repository.find(null, null, "db1", null, null)).hasSize(1); - assertThat(repository.find(null, null, "db2", "tbl2", null)).hasSize(1); - assertThat(repository.find(null, null, "db1", "tbl2", null)).isEmpty(); + assertThat(repository.find(null, null, null, "db1", null)).hasSize(1); + assertThat(repository.find(null, null, null, "db2", "tbl2")).hasSize(1); + assertThat(repository.find(null, null, null, "db1", "tbl2")).isEmpty(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index f3e72b52e..475196630 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.List; diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index b62371f53..240d512ef 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import java.util.Optional; diff --git a/settings.gradle b/settings.gradle index 0d64dad53..cad06785e 100644 --- a/settings.gradle +++ b/settings.gradle @@ -50,7 +50,6 @@ include ':services:common' include ':services:housetables' include ':services:jobs' include ':services:optimizer' -include ':apps:optimizer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From 17e280ffc661380017170646fb572f24a639cb79 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 10:09:48 -0700 Subject: [PATCH 26/81] refactor(optimizer): drop apps/optimizer-data dep; simplify history API Follow-up to the optimizer-1 module move: - services/optimizer/build.gradle: drop the now-dead `implementation project(':apps:optimizer-data')` (target module removed in the prior merge). - Restore services-side TableStatsHistoryRepository (lost in the merge because optimizer-1 did not touch its services-side copy, but optimizer-2's HEAD had removed it during the R7-5 consolidation). - Drop the multi-filter `listHistory` service method, its controller endpoint, and the standalone TableByNameController. Callers use `getHistory(tableUuid, limit)` which now uses the simplified `findByTableUuidOrderByCompletedAtDesc` derived query. - TableStatsRow: enable `@Builder(toBuilder = true)` so `upsertTableStats` can build from the existing row. --- services/optimizer/build.gradle | 1 - .../api/controller/TableByNameController.java | 44 ------------------- .../TableOperationsHistoryController.java | 30 ------------- .../optimizer/entity/TableStatsRow.java | 2 +- .../TableStatsHistoryRepository.java | 29 ++++++++++++ .../service/OptimizerDataService.java | 16 ------- .../service/OptimizerDataServiceImpl.java | 28 +----------- 7 files changed, 31 insertions(+), 119 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle index 31051b65c..c05c7f9c3 100644 --- a/services/optimizer/build.gradle +++ b/services/optimizer/build.gradle @@ -4,7 +4,6 @@ plugins { } dependencies { - implementation project(':apps:optimizer-data') implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java deleted file mode 100644 index e3582ff7e..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.controller; - -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.service.OptimizerDataService; -import java.util.List; -import java.util.Optional; -import lombok.RequiredArgsConstructor; -import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PathVariable; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.bind.annotation.RestController; - -/** - * Name-keyed read endpoints for human/analyst convenience. UUID-keyed endpoints elsewhere remain - * the canonical path for machine callers, since drop-and-recreate of a table produces a new - * optimizer identity that a name-only lookup would conflate with the dropped table. - */ -@RestController -@RequestMapping("/v1/optimizer/databases/{databaseName}/tables/{tableName}") -@RequiredArgsConstructor -public class TableByNameController { - - private final OptimizerDataService service; - - /** Operation history for a table by (database, table) name, newest first. */ - @GetMapping("/operations-history") - public ResponseEntity> getOperationsHistoryByName( - @PathVariable String databaseName, - @PathVariable String tableName, - @RequestParam(defaultValue = "100") int limit) { - return ResponseEntity.ok( - service.listHistory( - Optional.of(databaseName), - Optional.of(tableName), - Optional.empty(), - Optional.empty(), - Optional.empty(), - Optional.empty(), - Optional.empty(), - limit)); - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index b14156d5b..17dc0670a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -1,12 +1,8 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; -import java.time.Instant; import java.util.List; -import java.util.Optional; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -39,30 +35,4 @@ public ResponseEntity> getHistory( @PathVariable String tableUuid, @RequestParam(defaultValue = "100") int limit) { return ResponseEntity.ok(service.getHistory(tableUuid, limit)); } - - /** - * List history rows matching the given filters, ordered newest first. All parameters are optional - * — omit all to return every row up to {@code limit}. - */ - @GetMapping - public ResponseEntity> listHistory( - @RequestParam(required = false) String databaseName, - @RequestParam(required = false) String tableName, - @RequestParam(required = false) String tableUuid, - @RequestParam(required = false) OperationType operationType, - @RequestParam(required = false) HistoryStatus status, - @RequestParam(required = false) Instant since, - @RequestParam(required = false) Instant until, - @RequestParam(defaultValue = "100") int limit) { - return ResponseEntity.ok( - service.listHistory( - Optional.ofNullable(databaseName), - Optional.ofNullable(tableName), - Optional.ofNullable(tableUuid), - Optional.ofNullable(operationType), - Optional.ofNullable(status), - Optional.ofNullable(since), - Optional.ofNullable(until), - limit)); - } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index bc647d86e..b1fad275c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -25,7 +25,7 @@ @Table(name = "table_stats") @Getter @Setter -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class TableStatsRow { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java new file mode 100644 index 000000000..aaa1b0050 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -0,0 +1,29 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Append-only repository for per-commit stats history rows. */ +public interface TableStatsHistoryRepository extends JpaRepository { + + /** + * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the + * time filter. + * + * @param tableUuid the stable table UUID + * @param since inclusive lower bound on recorded_at; {@code null} to skip + * @param pageable use {@code PageRequest.of(0, limit)} to cap results + */ + @Query( + "SELECT r FROM TableStatsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "AND (:since IS NULL OR r.recordedAt >= :since) " + + "ORDER BY r.recordedAt DESC") + List find( + @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index 2909b8b5a..6f71c708e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -1,7 +1,6 @@ package com.linkedin.openhouse.optimizer.service; import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; @@ -82,19 +81,4 @@ List listTableStats( * @param limit maximum number of rows to return */ List getHistory(String tableUuid, int limit); - - /** - * List history rows matching the given filters, ordered newest first. Every parameter is optional - * — pass {@link Optional#empty()} to skip that filter. No filters returns all rows up to {@code - * limit}. - */ - List listHistory( - Optional databaseName, - Optional tableName, - Optional tableUuid, - Optional operationType, - Optional status, - Optional since, - Optional until, - int limit); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 0c9af5107..93b9af2a0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -2,7 +2,6 @@ import com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper; import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; @@ -177,32 +176,7 @@ public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto) { @Override public List getHistory(String tableUuid, int limit) { return historyRepository - .find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, limit)).stream() - .map(mapper::toDto) - .collect(Collectors.toList()); - } - - @Override - public List listHistory( - Optional databaseName, - Optional tableName, - Optional tableUuid, - Optional operationType, - Optional status, - Optional since, - Optional until, - int limit) { - return historyRepository - .find( - databaseName.orElse(null), - tableName.orElse(null), - tableUuid.orElse(null), - operationType.map(OperationType::name).orElse(null), - status.map(HistoryStatus::name).orElse(null), - since.orElse(null), - until.orElse(null), - PageRequest.of(0, limit)) - .stream() + .findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, limit)).stream() .map(mapper::toDto) .collect(Collectors.toList()); } From 9a129a8ca7848bf1db15dbf05f847bc7c593e8ef Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:00:04 -0700 Subject: [PATCH 27/81] =?UTF-8?q?refactor(optimizer):=20align=20data=20mod?= =?UTF-8?q?el=20=E2=80=94=20rename=20HistoryStatus;=20String=20at=20JPA=20?= =?UTF-8?q?boundary?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Forward fix in response to review feedback that data-model decisions belong in this PR (optimizer-0), not in downstream stack layers. Brings the data-model end-state to where optimizer-1+ already are, so the optimizer-0..optimizer-1 diff is just repositories + wiring. - Rename api/model/OperationHistoryStatus → HistoryStatus. - Move api/model/TableStats → model/TableStats (the in-memory stats domain type is used by both entities and DTOs, so it lives in a neutral package rather than under api/model/). - Delete config/JobResultConverter. Entities now store the JobResult as a JSON String column directly; serialization happens at the wire-API boundary via OptimizerMapper helpers. - Switch the operation/status columns on TableOperationsRow and TableOperationsHistoryRow from JPA-bound enums to String. Keeps the entity layer decoupled from wire-API enum identity. - Add String↔OperationType, String↔OperationStatus, String↔HistoryStatus, and String↔JobResult default helpers to OptimizerMapper so MapStruct can bridge entity (String) and DTO (typed) columns. - Update DTO/entity imports to follow the renamed/moved types. --- .../optimizer/api/mapper/OptimizerMapper.java | 60 +++++++++++++++++++ .../api/model/CompleteOperationRequest.java | 2 +- ...nHistoryStatus.java => HistoryStatus.java} | 2 +- .../api/model/TableOperationsHistoryDto.java | 2 +- .../optimizer/api/model/TableStatsDto.java | 1 + .../api/model/TableStatsHistoryDto.java | 1 + .../api/model/UpsertTableStatsRequest.java | 1 + .../optimizer/config/JobResultConverter.java | 39 ------------ .../entity/TableOperationsHistoryRow.java | 25 ++++---- .../optimizer/entity/TableOperationsRow.java | 15 +++-- .../entity/TableStatsHistoryRow.java | 2 +- .../optimizer/entity/TableStatsRow.java | 2 +- .../optimizer/{api => }/model/TableStats.java | 2 +- 13 files changed, 86 insertions(+), 68 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationHistoryStatus.java => HistoryStatus.java} (78%) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/{api => }/model/TableStats.java (96%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java index 8c0b17462..36d4b5f4b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java @@ -1,5 +1,11 @@ package com.linkedin.openhouse.optimizer.api.mapper; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; @@ -14,10 +20,16 @@ * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. * *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. + * + *

Type-conversion helpers bridge the entity's raw String/JSON shape (the entities keep enum and + * structured-result columns as Strings to stay decoupled from wire-API identity) and the wire DTO's + * typed enums and nested objects. */ @Mapper(componentModel = "spring") public interface OptimizerMapper { + ObjectMapper JSON = new ObjectMapper(); + /** Map a {@link TableOperationsRow} to its DTO. */ TableOperationsDto toDto(TableOperationsRow row); @@ -29,4 +41,52 @@ public interface OptimizerMapper { /** Map a {@link TableStatsHistoryRow} to its DTO. */ TableStatsHistoryDto toDto(TableStatsHistoryRow row); + + // --- entity String ↔ wire enum/object helpers --- + + default OperationType toOperationType(String value) { + return value == null ? null : OperationType.valueOf(value); + } + + default String fromOperationType(OperationType value) { + return value == null ? null : value.name(); + } + + default OperationStatus toOperationStatus(String value) { + return value == null ? null : OperationStatus.valueOf(value); + } + + default String fromOperationStatus(OperationStatus value) { + return value == null ? null : value.name(); + } + + default HistoryStatus toHistoryStatus(String value) { + return value == null ? null : HistoryStatus.valueOf(value); + } + + default String fromHistoryStatus(HistoryStatus value) { + return value == null ? null : value.name(); + } + + default JobResult toJobResult(String json) { + if (json == null) { + return null; + } + try { + return JSON.readValue(json, JobResult.class); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to parse JobResult JSON from DB", e); + } + } + + default String fromJobResult(JobResult value) { + if (value == null) { + return null; + } + try { + return JSON.writeValueAsString(value); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize JobResult to JSON", e); + } + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 35f7ba782..4f3f6535a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -18,7 +18,7 @@ public class CompleteOperationRequest { /** Outcome of the operation. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Error details on failure; {@code null} on success. */ private JobResult result; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java similarity index 78% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index 791d910a6..2fbcf6235 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Terminal states for a completed Spark maintenance job. */ -public enum OperationHistoryStatus { +public enum HistoryStatus { SUCCESS, FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 2a901ad2b..a7a9d9dc6 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -27,7 +27,7 @@ public class TableOperationsHistoryDto { private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Job ID from the Jobs Service. */ private String jobId; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 81dd6b802..4aad1e18f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 4a994fdb3..6d515a543 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 02290bad5..8bb317676 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java deleted file mode 100644 index 4c9bfbe76..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.linkedin.openhouse.optimizer.config; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import java.io.IOException; -import javax.persistence.AttributeConverter; -import javax.persistence.Converter; - -/** JPA {@link AttributeConverter} that serializes {@link JobResult} to/from a JSON string. */ -@Converter -public class JobResultConverter implements AttributeConverter { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @Override - public String convertToDatabaseColumn(JobResult attribute) { - if (attribute == null) { - return null; - } - try { - return OBJECT_MAPPER.writeValueAsString(attribute); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize JobResult to JSON", e); - } - } - - @Override - public JobResult convertToEntityAttribute(String dbData) { - if (dbData == null) { - return null; - } - try { - return OBJECT_MAPPER.readValue(dbData, JobResult.class); - } catch (IOException e) { - throw new IllegalStateException("Failed to deserialize JobResult from JSON: " + dbData, e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index 3b6ced892..8303a4579 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -1,15 +1,8 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.config.JobResultConverter; import java.time.Instant; import javax.persistence.Column; -import javax.persistence.Convert; import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; import javax.persistence.Id; import javax.persistence.Index; import javax.persistence.Table; @@ -27,6 +20,11 @@ * originating {@code table_operations.id}, tying each history entry back to the operation cycle * that produced it. Multiple runs of the same operation on the same table produce multiple rows * (each cycle gets a new UUID from the Analyzer). + * + *

{@code operationType}, {@code status}, and {@code result} are stored as plain {@code String} + * (the last as a JSON blob) so the entity layer stays decoupled from the wire-API enum and + * structured-result types. The wire layer is responsible for converting at the boundary via {@link + * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. */ @Entity @Table( @@ -60,25 +58,22 @@ public class TableOperationsHistoryRow { @Column(name = "table_name", nullable = false, length = 128) private String tableName; - @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; + private String operationType; /** When the operation completed, as recorded by the complete endpoint. */ @Column(name = "completed_at", nullable = false) private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) - private OperationHistoryStatus status; + private String status; /** Spark job ID; indexed for job → result lookups. */ @Column(name = "job_id", length = 255) private String jobId; - /** Job result: error details on failure, both fields null on success. */ - @Convert(converter = JobResultConverter.class) - @Column(name = "result") - private JobResult result; + /** Job result JSON blob: error details on failure, both fields null on success. */ + @Column(name = "result", columnDefinition = "TEXT") + private String result; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index 43778495a..5d90f3d12 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -1,12 +1,8 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; import java.time.Instant; import javax.persistence.Column; import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; import javax.persistence.Id; import javax.persistence.Index; import javax.persistence.Table; @@ -26,6 +22,11 @@ * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, * operation_type)} at a time. + * + *

{@code operationType} and {@code status} are stored as {@code String} rather than JPA-bound + * enums so the entity layer stays decoupled from the wire-API enum identity. The wire layer is + * responsible for converting at the boundary via {@link + * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. */ @Entity @Table( @@ -59,13 +60,11 @@ public class TableOperationsRow { @Column(name = "table_name", nullable = false, length = 128) private String tableName; - @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; + private String operationType; - @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) - private OperationStatus status; + private String status; /** When the Analyzer first created this row. Set by the service on insert; never updated. */ @Column(name = "created_at", nullable = false) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java index b0d92fc81..6ead5e42c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.vladmihalcea.hibernate.type.json.JsonStringType; import java.time.Instant; import javax.persistence.Column; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index f682a3485..2a1414567 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.vladmihalcea.hibernate.type.json.JsonStringType; import java.time.Instant; import java.util.Map; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 64c99061a..3b56196ea 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.model; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.AllArgsConstructor; From dfb910291443bcfe4b6adfb724808dadcb0c8c5a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:07:53 -0700 Subject: [PATCH 28/81] refactor(optimizer): realign entity shapes with optimizer-0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit R7-1 imported the looser apps-side variant of TableStatsRow, TableStatsHistoryRow, and model/TableStats into the services-side paths, regressing the locked-down shape that optimizer-0 had. R8-1's git mv carried the regression forward. This commit makes optimizer-1's HEAD match optimizer-0's canonical shape so the optimizer-0..optimizer-1 diff no longer shows ghost model edits. - TableStatsRow: restore @EqualsAndHashCode, AccessLevel.PROTECTED on NoArgsConstructor + AllArgsConstructor, and toBuilder=true on @Builder. Drop @Setter (no callers — repo tests and downstream consumers use the builder). - TableStatsHistoryRow: restore the dropped "can" in the javadoc. - model/TableStats: restore @JsonIgnoreProperties(ignoreUnknown = true) on the outer class + both inner classes, and restore the CommitDelta.addedSizeBytes field that R7-1 dropped. --- .../optimizer/entity/TableStatsHistoryRow.java | 2 +- .../optimizer/entity/TableStatsRow.java | 18 +++++++++++------- .../openhouse/optimizer/model/TableStats.java | 9 +++++++-- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java index 6f41881d6..6ead5e42c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -21,7 +21,7 @@ * Append-only record of per-commit stats reported by the Tables Service. * *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can * query this table to reconstruct change rates over arbitrary time windows. */ @TypeDef(name = "json", typeClass = JsonStringType.class) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index bc647d86e..2a1414567 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -8,26 +8,29 @@ import javax.persistence.Entity; import javax.persistence.Id; import javax.persistence.Table; +import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; +import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; -import lombok.Setter; import org.hibernate.annotations.Type; import org.hibernate.annotations.TypeDef; /** - * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every - * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. + * JPA entity representing a per-table stats snapshot in the optimizer DB. + * + *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA + * to enumerate tables and check scheduling eligibility. */ @TypeDef(name = "json", typeClass = JsonStringType.class) @Entity @Table(name = "table_stats") @Getter -@Setter -@Builder -@NoArgsConstructor -@AllArgsConstructor +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) public class TableStatsRow { @Id @@ -48,6 +51,7 @@ public class TableStatsRow { @Column(name = "table_properties", columnDefinition = "TEXT") private Map tableProperties; + /** Set on every upsert. Used for stats pipeline staleness monitoring. */ @Column(name = "updated_at", nullable = false) private Instant updatedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 5e0f51468..3b56196ea 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -1,15 +1,17 @@ package com.linkedin.openhouse.optimizer.model; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; -/** Combined stats payload stored as a single JSON blob per table in {@code table_stats}. */ +/** Combined stats payload stored as a single JSON blob per table. */ @Data @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) public class TableStats { /** Snapshot fields — overwritten on every upsert. */ @@ -23,6 +25,7 @@ public class TableStats { @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { private String clusterId; private String tableVersion; @@ -32,14 +35,16 @@ public static class SnapshotMetrics { private Long numCurrentFiles; } - /** Per-commit incremental counters accumulated across all recorded commit events. */ + /** Per-commit incremental counters; accumulated across all recorded commit events. */ @Data @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) public static class CommitDelta { private Long numFilesAdded; private Long numFilesDeleted; + private Long addedSizeBytes; private Long deletedSizeBytes; } } From 681407ef6a1a1d2dc34dee2a4ca308c5d008ca3f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:18:39 -0700 Subject: [PATCH 29/81] feat(optimizer): add internal model layer Per-layer types: wire-API enums (api/model/), DB-side String at JPA boundary, and an internal in-memory model layer that is what the analyzer and scheduler operate on. The wire and DB sides convert at their boundary; consumers of the optimizer library work in the internal types. - model/HistoryStatus, model/OperationStatus, model/OperationType: internal enums mirroring the wire-API counterparts. Decoupled so the analyzer/scheduler can evolve their state machines without churning the wire or DB shapes. - model/Table: an OpenHouse table enriched with stats + properties. Built from a TableStatsRow. - model/TableOperation: analyzer's decision-to-schedule + scheduler's unit of work. Constructed from TableOperationsRow or from a Table; converts back via toRow(). --- .../optimizer/model/HistoryStatus.java | 13 +++ .../optimizer/model/OperationStatus.java | 15 +++ .../optimizer/model/OperationType.java | 10 ++ .../openhouse/optimizer/model/Table.java | 41 +++++++ .../optimizer/model/TableOperation.java | 106 ++++++++++++++++++ 5 files changed, 185 insertions(+) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java new file mode 100644 index 000000000..d29c88719 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal lifecycle outcomes for a completed operation. Mirrors the values written to {@code + * table_operations_history.status}; parsed at the boundary so callers switch on a typed value + * instead of comparing strings. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum HistoryStatus { + SUCCESS, + FAILED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java new file mode 100644 index 000000000..66f213c73 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java @@ -0,0 +1,15 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal lifecycle states for an operation. The analyzer writes {@link #PENDING}; the scheduler + * transitions through {@link #SCHEDULING} and {@link #SCHEDULED}. {@link #CANCELED} marks + * deduplicated PENDING rows. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum OperationStatus { + PENDING, + SCHEDULING, + SCHEDULED, + CANCELED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java new file mode 100644 index 000000000..bea44018b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -0,0 +1,10 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal enum for the operation types the analyzer and scheduler know about. Intentionally + * separate from the wire-API and DB representations so the internal model can evolve its set of + * supported operations without churning either boundary. + */ +public enum OperationType { + ORPHAN_FILES_DELETION +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java new file mode 100644 index 000000000..e232803dd --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -0,0 +1,41 @@ +package com.linkedin.openhouse.optimizer.model; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.Collections; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An OpenHouse table enriched with stats and properties, built by combining data sources. Consumed + * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads + * stats for bin-packing). + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class Table { + + private String tableUuid; + private String databaseName; + private String tableId; + + @Builder.Default private Map tableProperties = Collections.emptyMap(); + + private TableStats stats; + + /** Build a {@code Table} from a {@code table_stats} row. */ + public static Table from(TableStatsRow row) { + return Table.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableId(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + .stats(row.getStats()) + .build(); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java new file mode 100644 index 000000000..d49625a57 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -0,0 +1,106 @@ +package com.linkedin.openhouse.optimizer.model; + +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import java.time.Instant; +import java.util.Comparator; +import java.util.UUID; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks + * up and submits. Built either from an existing {@link TableOperationsRow} (when loading current + * state) or from a {@link Table} (when creating a new PENDING operation). Converts back to a JPA + * row via {@link #toRow()}. + * + *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., + * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not + * carry it. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperation { + + /** Unique operation ID (UUID). */ + private String id; + + /** The table this operation targets. */ + private String tableUuid; + + /** Database name. */ + private String databaseName; + + /** Table name. */ + private String tableName; + + /** Operation type. */ + private OperationType operationType; + + /** Current lifecycle status. */ + private OperationStatus status; + + /** When this operation record was created. */ + private Instant createdAt; + + /** When the scheduler last submitted a job for this operation. */ + private Instant scheduledAt; + + /** + * Number of current data files on the table at evaluation time. Non-persisted enrichment; + * populated by consumers that need it. Null when not enriched. + */ + private Long fileCount; + + /** Build a {@code TableOperation} from an existing JPA row. */ + public static TableOperation from(TableOperationsRow row) { + return TableOperation.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.valueOf(row.getOperationType())) + .status(OperationStatus.valueOf(row.getStatus())) + .createdAt(row.getCreatedAt()) + .scheduledAt(row.getScheduledAt()) + .build(); + } + + /** Create a new PENDING operation for the given table and operation type. */ + public static TableOperation pending(Table table, OperationType operationType) { + return TableOperation.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(table.getTableUuid()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .operationType(operationType) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + } + + /** Convert to a JPA entity for persistence. */ + public TableOperationsRow toRow() { + return TableOperationsRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType.name()) + .status(status.name()) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .version(0L) + .build(); + } + + /** Return the more recently created of two operations. */ + public static TableOperation mostRecent(TableOperation a, TableOperation b) { + Comparator byCreatedAt = + Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); + return byCreatedAt.compare(a, b) >= 0 ? a : b; + } +} From e3fb7770613e8635bb4f68bded2945e1845d7510 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:26:51 -0700 Subject: [PATCH 30/81] perf(optimizer): index table_operations_history for findLatestPerTable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add idx_toph_optype_uuid_completed on table_operations_history. Drives the correlated MAX(completed_at) subquery in TableOperationsHistoryRepository.findLatestPerTable (introduced in optimizer-1), turning it into an index-only lookup per (operation_type, table_uuid) instead of an O(N²) scan. Lands with the schema in optimizer-0 since the index is part of the data model definition; the query that depends on it lands with the repository in optimizer-1. --- .../optimizer/src/main/resources/db/optimizer-schema.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 4c2d9604b..322f3bf92 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -48,5 +48,9 @@ CREATE TABLE IF NOT EXISTS table_operations_history ( job_id VARCHAR(255), result TEXT, PRIMARY KEY (id), - INDEX idx_toph_db_table (database_name, table_name) + INDEX idx_toph_db_table (database_name, table_name), + -- Drives TableOperationHistoryRepository.findLatestPerTable: the correlated + -- MAX(completed_at) subquery becomes an index-only lookup per (operation_type, + -- table_uuid) instead of an O(N²) scan. + INDEX idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) ); From d3e17262f5ec8b0e97b54d8312da746278680a6f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:11:11 -0700 Subject: [PATCH 31/81] refactor(optimizer): enforce layer boundaries in api/ + model/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make data types in api/ and model/ self-contained — no cross-layer imports between them and no references into the DB layer. The internal model layer owns conversion to the api edge via a new mapper sub-package. api/ changes: - Add api/model/TableStats (api-layer copy of the JSON payload). - Drop cross-layer imports from TableStatsDto, TableStatsHistoryDto, UpsertTableStatsRequest; they pick up TableStats from the same package. model/ changes: - Add model/JobResult (internal copy of the result payload). - Add model/TableOperationsHistory (internal container mirroring the history-row field set in typed form). - Remove cross-layer factory methods Table.from(TableStatsRow), TableOperation.from(TableOperationsRow), and TableOperation.toRow(). Construction at the DB boundary moves to a future model/mapper/ ModelDbMapper that ships with the db/ rename on optimizer-1. - Add model/mapper/ApiModelMapper — converts api/ DTOs ↔ model/ types. Only place inside model/ where api/ types appear. Per-PR ownership: - api/ and model/ live on this PR. - db/ (currently entity/) and its boundary-side mapper (model/mapper/ModelDbMapper) land on optimizer-1. - The existing api/mapper/OptimizerMapper still references entity/ on this branch; it gets retired on optimizer-2 once the service routes through the new mappers. --- .../optimizer/api/model/TableStats.java | 55 ++++ .../optimizer/api/model/TableStatsDto.java | 1 - .../api/model/TableStatsHistoryDto.java | 1 - .../api/model/UpsertTableStatsRequest.java | 1 - .../openhouse/optimizer/model/JobResult.java | 25 ++ .../openhouse/optimizer/model/Table.java | 16 +- .../optimizer/model/TableOperation.java | 38 +-- .../model/TableOperationsHistory.java | 47 ++++ .../model/mapper/ApiModelMapper.java | 234 ++++++++++++++++++ 9 files changed, 369 insertions(+), 49 deletions(-) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java new file mode 100644 index 000000000..de268ffe7 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Combined stats payload exposed on the optimizer wire API. + * + *

API-layer copy of the stats payload — self-contained, evolved only when the wire contract + * changes. + */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters; accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long addedSizeBytes; + private Long deletedSizeBytes; + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 4aad1e18f..81dd6b802 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 6d515a543..4a994fdb3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 8bb317676..02290bad5 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java new file mode 100644 index 000000000..7e48dd0ef --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Internal-model result payload for a completed Spark maintenance job. + * + *

Internal-layer copy of the structured result. Both fields are {@code null} on success; + * populated on failure. Intentionally separate from the wire-API and DB representations. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class JobResult { + + /** Human-readable error message; {@code null} if the job succeeded. */ + private String errorMessage; + + /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ + private String errorType; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index e232803dd..c8bede225 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.model; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; @@ -12,6 +11,9 @@ * An OpenHouse table enriched with stats and properties, built by combining data sources. Consumed * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads * stats for bin-packing). + * + *

Pure internal-model type — no references to wire-API or DB types. Construct via {@link + * com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper#toTable} at the DB boundary. */ @Data @Builder @@ -26,16 +28,4 @@ public class Table { @Builder.Default private Map tableProperties = Collections.emptyMap(); private TableStats stats; - - /** Build a {@code Table} from a {@code table_stats} row. */ - public static Table from(TableStatsRow row) { - return Table.builder() - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableId(row.getTableName()) - .tableProperties( - row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) - .stats(row.getStats()) - .build(); - } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java index d49625a57..1f14dddff 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.model; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import java.time.Instant; import java.util.Comparator; import java.util.UUID; @@ -11,9 +10,11 @@ /** * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks - * up and submits. Built either from an existing {@link TableOperationsRow} (when loading current - * state) or from a {@link Table} (when creating a new PENDING operation). Converts back to a JPA - * row via {@link #toRow()}. + * up and submits. + * + *

Pure internal-model type — no references to wire-API or DB types. Cross-layer construction + * happens via {@link com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper} (DB boundary) or + * {@link com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper} (API boundary). * *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not @@ -55,20 +56,6 @@ public class TableOperation { */ private Long fileCount; - /** Build a {@code TableOperation} from an existing JPA row. */ - public static TableOperation from(TableOperationsRow row) { - return TableOperation.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .operationType(OperationType.valueOf(row.getOperationType())) - .status(OperationStatus.valueOf(row.getStatus())) - .createdAt(row.getCreatedAt()) - .scheduledAt(row.getScheduledAt()) - .build(); - } - /** Create a new PENDING operation for the given table and operation type. */ public static TableOperation pending(Table table, OperationType operationType) { return TableOperation.builder() @@ -82,21 +69,6 @@ public static TableOperation pending(Table table, OperationType operationType) { .build(); } - /** Convert to a JPA entity for persistence. */ - public TableOperationsRow toRow() { - return TableOperationsRow.builder() - .id(id) - .tableUuid(tableUuid) - .databaseName(databaseName) - .tableName(tableName) - .operationType(operationType.name()) - .status(status.name()) - .createdAt(createdAt) - .scheduledAt(scheduledAt) - .version(0L) - .build(); - } - /** Return the more recently created of two operations. */ public static TableOperation mostRecent(TableOperation a, TableOperation b) { Comparator byCreatedAt = diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java new file mode 100644 index 000000000..64e0d57b3 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -0,0 +1,47 @@ +package com.linkedin.openhouse.optimizer.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Internal-model view of a completed operation history record. + * + *

Mirrors the field set of the underlying history row but in internal types only. Used by + * components that need to reason about completed operations (e.g., scheduling-cadence analyzers). + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationsHistory { + + /** Same UUID as the originating live-operations row. */ + private String id; + + /** Stable table identity from the Tables Service. */ + private String tableUuid; + + /** Denormalized database name. */ + private String databaseName; + + /** Denormalized table name. */ + private String tableName; + + /** Operation type for this completed run. */ + private OperationType operationType; + + /** When the operation completed, as recorded by the complete endpoint. */ + private Instant completedAt; + + /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ + private HistoryStatus status; + + /** Spark job ID for the run that produced this record. */ + private String jobId; + + /** Job result payload; both inner fields {@code null} on success. */ + private JobResult result; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java new file mode 100644 index 000000000..2ae477e0d --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -0,0 +1,234 @@ +package com.linkedin.openhouse.optimizer.model.mapper; + +import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.model.JobResult; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStats; +import org.springframework.stereotype.Component; + +/** + * Converts between wire-API DTOs and internal {@code model/} domain objects. + * + *

The only place inside {@code model/} where {@code api/} types are referenced — this is the + * boundary at which the internal model meets the wire-API. Pure data types under {@code model/} + * stay free of any api-side imports. + * + *

API-layer enums + payloads are intentionally separate Java types from the internal-model + * counterparts; the two sides evolve independently. This mapper translates by name. + */ +@Component +public class ApiModelMapper { + + // --- TableOperationsDto <-> TableOperation --- + + public TableOperation toOperation(TableOperationsDto dto) { + if (dto == null) { + return null; + } + return TableOperation.builder() + .id(dto.getId()) + .tableUuid(dto.getTableUuid()) + .databaseName(dto.getDatabaseName()) + .tableName(dto.getTableName()) + .operationType(toModelOperationType(dto.getOperationType())) + .status(toModelOperationStatus(dto.getStatus())) + .createdAt(dto.getCreatedAt()) + .scheduledAt(dto.getScheduledAt()) + .build(); + } + + public TableOperationsDto toDto(TableOperation op) { + if (op == null) { + return null; + } + return TableOperationsDto.builder() + .id(op.getId()) + .tableUuid(op.getTableUuid()) + .databaseName(op.getDatabaseName()) + .tableName(op.getTableName()) + .operationType(toApiOperationType(op.getOperationType())) + .status(toApiOperationStatus(op.getStatus())) + .createdAt(op.getCreatedAt()) + .scheduledAt(op.getScheduledAt()) + .build(); + } + + // --- TableOperationsHistoryDto <-> TableOperationsHistory --- + + public TableOperationsHistory toHistory(TableOperationsHistoryDto dto) { + if (dto == null) { + return null; + } + return TableOperationsHistory.builder() + .id(dto.getId()) + .tableUuid(dto.getTableUuid()) + .databaseName(dto.getDatabaseName()) + .tableName(dto.getTableName()) + .operationType(toModelOperationType(dto.getOperationType())) + .completedAt(dto.getCompletedAt()) + .status(toModelHistoryStatus(dto.getStatus())) + .jobId(dto.getJobId()) + .result(toModelJobResult(dto.getResult())) + .build(); + } + + public TableOperationsHistoryDto toDto(TableOperationsHistory history) { + if (history == null) { + return null; + } + return TableOperationsHistoryDto.builder() + .id(history.getId()) + .tableUuid(history.getTableUuid()) + .databaseName(history.getDatabaseName()) + .tableName(history.getTableName()) + .operationType(toApiOperationType(history.getOperationType())) + .completedAt(history.getCompletedAt()) + .status(toApiHistoryStatus(history.getStatus())) + .jobId(history.getJobId()) + .result(toApiJobResult(history.getResult())) + .build(); + } + + // --- TableStats payload --- + + public TableStats toModelStats(com.linkedin.openhouse.optimizer.api.model.TableStats apiStats) { + if (apiStats == null) { + return null; + } + return TableStats.builder() + .snapshot(toModelSnapshot(apiStats.getSnapshot())) + .delta(toModelDelta(apiStats.getDelta())) + .build(); + } + + public com.linkedin.openhouse.optimizer.api.model.TableStats toApiStats(TableStats modelStats) { + if (modelStats == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.TableStats.builder() + .snapshot(toApiSnapshot(modelStats.getSnapshot())) + .delta(toApiDelta(modelStats.getDelta())) + .build(); + } + + // --- enum helpers --- + + public OperationType toModelOperationType( + com.linkedin.openhouse.optimizer.api.model.OperationType apiValue) { + return apiValue == null ? null : OperationType.valueOf(apiValue.name()); + } + + public com.linkedin.openhouse.optimizer.api.model.OperationType toApiOperationType( + OperationType modelValue) { + return modelValue == null + ? null + : com.linkedin.openhouse.optimizer.api.model.OperationType.valueOf(modelValue.name()); + } + + public OperationStatus toModelOperationStatus( + com.linkedin.openhouse.optimizer.api.model.OperationStatus apiValue) { + return apiValue == null ? null : OperationStatus.valueOf(apiValue.name()); + } + + public com.linkedin.openhouse.optimizer.api.model.OperationStatus toApiOperationStatus( + OperationStatus modelValue) { + return modelValue == null + ? null + : com.linkedin.openhouse.optimizer.api.model.OperationStatus.valueOf(modelValue.name()); + } + + public HistoryStatus toModelHistoryStatus( + com.linkedin.openhouse.optimizer.api.model.HistoryStatus apiValue) { + return apiValue == null ? null : HistoryStatus.valueOf(apiValue.name()); + } + + public com.linkedin.openhouse.optimizer.api.model.HistoryStatus toApiHistoryStatus( + HistoryStatus modelValue) { + return modelValue == null + ? null + : com.linkedin.openhouse.optimizer.api.model.HistoryStatus.valueOf(modelValue.name()); + } + + // --- JobResult --- + + public JobResult toModelJobResult(com.linkedin.openhouse.optimizer.api.model.JobResult apiValue) { + if (apiValue == null) { + return null; + } + return JobResult.builder() + .errorMessage(apiValue.getErrorMessage()) + .errorType(apiValue.getErrorType()) + .build(); + } + + public com.linkedin.openhouse.optimizer.api.model.JobResult toApiJobResult(JobResult modelValue) { + if (modelValue == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.JobResult.builder() + .errorMessage(modelValue.getErrorMessage()) + .errorType(modelValue.getErrorType()) + .build(); + } + + // --- TableStats inner classes --- + + private TableStats.SnapshotMetrics toModelSnapshot( + com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics apiValue) { + if (apiValue == null) { + return null; + } + return TableStats.SnapshotMetrics.builder() + .clusterId(apiValue.getClusterId()) + .tableVersion(apiValue.getTableVersion()) + .tableLocation(apiValue.getTableLocation()) + .tableSizeBytes(apiValue.getTableSizeBytes()) + .numCurrentFiles(apiValue.getNumCurrentFiles()) + .build(); + } + + private com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics toApiSnapshot( + TableStats.SnapshotMetrics modelValue) { + if (modelValue == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics.builder() + .clusterId(modelValue.getClusterId()) + .tableVersion(modelValue.getTableVersion()) + .tableLocation(modelValue.getTableLocation()) + .tableSizeBytes(modelValue.getTableSizeBytes()) + .numCurrentFiles(modelValue.getNumCurrentFiles()) + .build(); + } + + private TableStats.CommitDelta toModelDelta( + com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta apiValue) { + if (apiValue == null) { + return null; + } + return TableStats.CommitDelta.builder() + .numFilesAdded(apiValue.getNumFilesAdded()) + .numFilesDeleted(apiValue.getNumFilesDeleted()) + .addedSizeBytes(apiValue.getAddedSizeBytes()) + .deletedSizeBytes(apiValue.getDeletedSizeBytes()) + .build(); + } + + private com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta toApiDelta( + TableStats.CommitDelta modelValue) { + if (modelValue == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta.builder() + .numFilesAdded(modelValue.getNumFilesAdded()) + .numFilesDeleted(modelValue.getNumFilesDeleted()) + .addedSizeBytes(modelValue.getAddedSizeBytes()) + .deletedSizeBytes(modelValue.getDeletedSizeBytes()) + .build(); + } +} From 1d469a72fdb68133c95cd8def12027f428ab2acd Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:17:49 -0700 Subject: [PATCH 32/81] refactor(optimizer): remove db-layer types from optimizer-0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DB layer (entities + api↔db mapper) belongs to optimizer-1, not optimizer-0. optimizer-0 owns only the wire-API surface and the internal model. Delete from this PR: - entity/ package (TableOperationsRow, TableOperationsHistoryRow, TableStatsRow, TableStatsHistoryRow, package-info). - api/mapper/OptimizerMapper — was the api↔entity bridge. With the entity files moving out of this PR and the new model/mapper/ taking over conversion duties, this mapper is no longer needed here. optimizer-1 will re-introduce these as db/ (renamed) with db-side per-layer types and a model/mapper/ModelDbMapper. --- .../optimizer/api/mapper/OptimizerMapper.java | 92 ------------------- .../entity/TableOperationsHistoryRow.java | 79 ---------------- .../optimizer/entity/TableOperationsRow.java | 87 ------------------ .../entity/TableStatsHistoryRow.java | 61 ------------ .../optimizer/entity/TableStatsRow.java | 57 ------------ .../optimizer/entity/package-info.java | 2 - 6 files changed, 378 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java deleted file mode 100644 index 36d4b5f4b..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java +++ /dev/null @@ -1,92 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.mapper; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; -import org.mapstruct.Mapper; - -/** - * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. - * - *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. - * - *

Type-conversion helpers bridge the entity's raw String/JSON shape (the entities keep enum and - * structured-result columns as Strings to stay decoupled from wire-API identity) and the wire DTO's - * typed enums and nested objects. - */ -@Mapper(componentModel = "spring") -public interface OptimizerMapper { - - ObjectMapper JSON = new ObjectMapper(); - - /** Map a {@link TableOperationsRow} to its DTO. */ - TableOperationsDto toDto(TableOperationsRow row); - - /** Map a {@link TableOperationsHistoryRow} to its DTO. */ - TableOperationsHistoryDto toDto(TableOperationsHistoryRow row); - - /** Map a {@link TableStatsRow} to its DTO. */ - TableStatsDto toDto(TableStatsRow row); - - /** Map a {@link TableStatsHistoryRow} to its DTO. */ - TableStatsHistoryDto toDto(TableStatsHistoryRow row); - - // --- entity String ↔ wire enum/object helpers --- - - default OperationType toOperationType(String value) { - return value == null ? null : OperationType.valueOf(value); - } - - default String fromOperationType(OperationType value) { - return value == null ? null : value.name(); - } - - default OperationStatus toOperationStatus(String value) { - return value == null ? null : OperationStatus.valueOf(value); - } - - default String fromOperationStatus(OperationStatus value) { - return value == null ? null : value.name(); - } - - default HistoryStatus toHistoryStatus(String value) { - return value == null ? null : HistoryStatus.valueOf(value); - } - - default String fromHistoryStatus(HistoryStatus value) { - return value == null ? null : value.name(); - } - - default JobResult toJobResult(String json) { - if (json == null) { - return null; - } - try { - return JSON.readValue(json, JobResult.class); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to parse JobResult JSON from DB", e); - } - } - - default String fromJobResult(JobResult value) { - if (value == null) { - return null; - } - try { - return JSON.writeValueAsString(value); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize JobResult to JSON", e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java deleted file mode 100644 index 8303a4579..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ /dev/null @@ -1,79 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; - -/** - * Append-only record of a completed maintenance operation. - * - *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the - * originating {@code table_operations.id}, tying each history entry back to the operation cycle - * that produced it. Multiple runs of the same operation on the same table produce multiple rows - * (each cycle gets a new UUID from the Analyzer). - * - *

{@code operationType}, {@code status}, and {@code result} are stored as plain {@code String} - * (the last as a JSON blob) so the entity layer stays decoupled from the wire-API enum and - * structured-result types. The wire layer is responsible for converting at the boundary via {@link - * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. - */ -@Entity -@Table( - name = "table_operations_history", - indexes = { - @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), - @Index(name = "idx_op_type_hist", columnList = "operation_type"), - @Index(name = "idx_completed_at", columnList = "completed_at"), - @Index(name = "idx_status_hist", columnList = "status"), - @Index(name = "idx_job_id", columnList = "job_id"), - @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableOperationsHistoryRow { - - /** Same UUID as the originating {@code table_operations.id}. Set by the caller; not generated. */ - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Column(name = "operation_type", nullable = false, length = 50) - private String operationType; - - /** When the operation completed, as recorded by the complete endpoint. */ - @Column(name = "completed_at", nullable = false) - private Instant completedAt; - - /** {@code SUCCESS} or {@code FAILED}. */ - @Column(name = "status", nullable = false, length = 20) - private String status; - - /** Spark job ID; indexed for job → result lookups. */ - @Column(name = "job_id", length = 255) - private String jobId; - - /** Job result JSON blob: error details on failure, both fields null on success. */ - @Column(name = "result", columnDefinition = "TEXT") - private String result; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java deleted file mode 100644 index 5d90f3d12..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ /dev/null @@ -1,87 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; - -/** - * JPA entity representing an Analyzer recommendation for a table maintenance operation. - * - *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row - * when it first recommends an operation for a table, or when re-recommending after a prior terminal - * state (SUCCESS/FAILED). Old terminal rows accumulate — they serve as implicit history. {@code - * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). - * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, - * operation_type)} at a time. - * - *

{@code operationType} and {@code status} are stored as {@code String} rather than JPA-bound - * enums so the entity layer stays decoupled from the wire-API enum identity. The wire layer is - * responsible for converting at the boundary via {@link - * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. - */ -@Entity -@Table( - name = "table_operations", - indexes = { - @Index(name = "idx_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_op_type", columnList = "operation_type"), - @Index(name = "idx_status", columnList = "status"), - @Index(name = "idx_created_at", columnList = "created_at"), - @Index(name = "idx_scheduled_at", columnList = "scheduled_at") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableOperationsRow { - - /** Client-generated UUID identifying this specific operation recommendation. */ - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Column(name = "operation_type", nullable = false, length = 50) - private String operationType; - - @Column(name = "status", nullable = false, length = 20) - private String status; - - /** When the Analyzer first created this row. Set by the service on insert; never updated. */ - @Column(name = "created_at", nullable = false) - private Instant createdAt; - - /** Set when the operation is claimed; {@code null} while {@code PENDING}. */ - @Column(name = "scheduled_at") - private Instant scheduledAt; - - /** Job ID returned by the Jobs Service after successful submission. */ - @Column(name = "job_id", length = 255) - private String jobId; - - /** - * Manual optimistic lock for the Scheduler claim. Incremented by the raw {@code claimOperation} - * UPDATE query; must NOT use JPA {@code @Version} since the claim bypasses JPA entity management. - */ - @Column(name = "version") - private Long version; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java deleted file mode 100644 index 6ead5e42c..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ /dev/null @@ -1,61 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * Append-only record of per-commit stats reported by the Tables Service. - * - *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can - * query this table to reconstruct change rates over arbitrary time windows. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table( - name = "table_stats_history", - indexes = { - @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableStatsHistoryRow { - - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Column(name = "recorded_at", nullable = false) - private Instant recordedAt; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java deleted file mode 100644 index 2a1414567..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import java.util.Map; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * JPA entity representing a per-table stats snapshot in the optimizer DB. - * - *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA - * to enumerate tables and check scheduling eligibility. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table(name = "table_stats") -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableStatsRow { - - @Id - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Type(type = "json") - @Column(name = "table_properties", columnDefinition = "TEXT") - private Map tableProperties; - - /** Set on every upsert. Used for stats pipeline staleness monitoring. */ - @Column(name = "updated_at", nullable = false) - private Instant updatedAt; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java deleted file mode 100644 index 7c0ca1f67..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java +++ /dev/null @@ -1,2 +0,0 @@ -/** JPA entities for the optimizer service. */ -package com.linkedin.openhouse.optimizer.entity; From eee8ecae794fecdc7676e02c0fb286cd3c98e9fa Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:20:28 -0700 Subject: [PATCH 33/81] refactor(optimizer): remove DB schema + schema-init properties The DDL is part of the db/ layer's ownership (optimizer-1). Move the schema file and its schema-init properties out of optimizer-0 so this PR is purely api/ + model/. Delete: - src/main/resources/db/optimizer-schema.sql. - spring.sql.init.mode, spring.sql.init.schema-locations, and spring.jpa.defer-datasource-initialization from application.properties (they reference the deleted schema file). optimizer-1 re-introduces these alongside the db/ entities and repositories. --- .../src/main/resources/application.properties | 4 -- .../main/resources/db/optimizer-schema.sql | 56 ------------------- 2 files changed, 60 deletions(-) delete mode 100644 services/optimizer/src/main/resources/db/optimizer-schema.sql diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index c6c3f8437..00982d80e 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -2,10 +2,6 @@ spring.application.name=openhouse-optimizer-service server.port=8080 spring.jpa.hibernate.ddl-auto=none -spring.sql.init.mode=always -spring.jpa.defer-datasource-initialization=true -spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql - spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQL8Dialect spring.jpa.properties.hibernate.show_sql=false spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql deleted file mode 100644 index 322f3bf92..000000000 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ /dev/null @@ -1,56 +0,0 @@ --- Optimizer Service Schema --- Compatible with MySQL (production) and H2 in MySQL mode (tests). -CREATE TABLE IF NOT EXISTS table_operations ( - id VARCHAR(36) NOT NULL, - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - operation_type VARCHAR(50) NOT NULL, - status VARCHAR(20) NOT NULL, - created_at TIMESTAMP(6) NOT NULL, - scheduled_at TIMESTAMP(6), - job_id VARCHAR(255), - version BIGINT, - -- TODO: per-operation metric columns will be added as operations are onboarded. - PRIMARY KEY (id) -); - -CREATE TABLE IF NOT EXISTS table_stats ( - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - stats TEXT, - table_properties TEXT, - updated_at TIMESTAMP(6) NOT NULL, - PRIMARY KEY (table_uuid) -); - -CREATE TABLE IF NOT EXISTS table_stats_history ( - id VARCHAR(36) NOT NULL, - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - stats TEXT, - recorded_at TIMESTAMP(6) NOT NULL, - PRIMARY KEY (id), - INDEX idx_tsh_table_uuid (table_uuid), - INDEX idx_tsh_recorded_at (recorded_at) -); - -CREATE TABLE IF NOT EXISTS table_operations_history ( - id VARCHAR(36) NOT NULL, - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - operation_type VARCHAR(50) NOT NULL, - completed_at TIMESTAMP(6) NOT NULL, - status VARCHAR(20) NOT NULL, - job_id VARCHAR(255), - result TEXT, - PRIMARY KEY (id), - INDEX idx_toph_db_table (database_name, table_name), - -- Drives TableOperationHistoryRepository.findLatestPerTable: the correlated - -- MAX(completed_at) subquery becomes an index-only lookup per (operation_type, - -- table_uuid) instead of an O(N²) scan. - INDEX idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) -); From 328e5b91b4c1db0f5abf22e37a4dea787d351bef Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:23:10 -0700 Subject: [PATCH 34/81] refactor(optimizer): scrub MySQL / JPA / datasource references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DB-layer dependencies belong to optimizer-1. With entities, schema, and the api/mapper deleted from this PR, the JPA + MySQL stack is unused — remove the dependency declarations and configuration that referenced them. build.gradle: - Drop spring-boot-starter-data-jpa, mysql-connector-java, the vladmihalcea hibernate-types JSON serializer, and the h2 test runtime. application.properties: - Drop spring.jpa.* and spring.datasource.* lines. Delete services/optimizer/src/test/resources/application-test.properties (H2 test datasource config — re-introduced on optimizer-1 alongside the repositories and repo tests). --- services/optimizer/build.gradle | 4 ---- .../src/main/resources/application.properties | 11 ----------- .../src/test/resources/application-test.properties | 12 ------------ 3 files changed, 27 deletions(-) delete mode 100644 services/optimizer/src/test/resources/application-test.properties diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle index c05c7f9c3..2de8fd5c7 100644 --- a/services/optimizer/build.gradle +++ b/services/optimizer/build.gradle @@ -4,11 +4,7 @@ plugins { } dependencies { - implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' - implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' - implementation 'mysql:mysql-connector-java:8.+' - testImplementation 'com.h2database:h2:2.2.224' testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' } diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index 00982d80e..64c40d1f2 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -1,16 +1,5 @@ spring.application.name=openhouse-optimizer-service server.port=8080 -spring.jpa.hibernate.ddl-auto=none -spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQL8Dialect -spring.jpa.properties.hibernate.show_sql=false -spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl - -spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver -spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:mysql://localhost:3306/oh_db} -spring.datasource.username=${OPTIMIZER_DB_USERNAME:oh_user} -spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} -spring.datasource.hikari.maximum-pool-size=20 - management.endpoints.web.exposure.include=health,prometheus management.endpoint.health.enabled=true diff --git a/services/optimizer/src/test/resources/application-test.properties b/services/optimizer/src/test/resources/application-test.properties deleted file mode 100644 index 97b7841dc..000000000 --- a/services/optimizer/src/test/resources/application-test.properties +++ /dev/null @@ -1,12 +0,0 @@ -spring.datasource.url=jdbc:h2:mem:optimizer_test;MODE=MySQL;DATABASE_TO_LOWER=TRUE;DB_CLOSE_DELAY=-1 -spring.datasource.driver-class-name=org.h2.Driver -spring.datasource.username=sa -spring.datasource.password= - -spring.jpa.hibernate.ddl-auto=none -spring.sql.init.mode=always -spring.jpa.defer-datasource-initialization=true -spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect -spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl - -spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql From f7a5d208e106cb5c1c051bc450f14833be1bb093 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:25:41 -0700 Subject: [PATCH 35/81] refactor(optimizer): drop UpsertTableOperationsRequest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No external system creates table operations — operations are written by the in-process analyzer directly through the model layer. The request type has no wire consumer and no internal consumer, so it's dead code. Delete services/optimizer/.../api/model/UpsertTableOperationsRequest.java. --- .../model/UpsertTableOperationsRequest.java | 31 ------------------- 1 file changed, 31 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java deleted file mode 100644 index 21174c337..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * PUT request body for {@code /v1/table-operations/{id}}. - * - *

The Analyzer supplies the operation {@code id} (client-generated UUID) in the path and all - * table-identifying fields in this body. The service creates the row on first call. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class UpsertTableOperationsRequest { - - /** Stable Iceberg table UUID identifying the target table. */ - private String tableUuid; - - /** Denormalized database name for display. */ - private String databaseName; - - /** Denormalized table name for display. */ - private String tableName; - - /** The type of maintenance operation to create. */ - private OperationType operationType; -} From 2a532b577ed51507c72e836ea4d8778967f43062 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:47:42 -0700 Subject: [PATCH 36/81] refactor(optimizer): drop JobResult from the wire and internal model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JobResult is removed from the optimizer API. CompleteOperationRequest (user-edited) now carries only operationId + status — the failure detail abstraction has been retired. The internal model and DTOs no longer carry it either, and the type itself is deleted from both api/ and model/. CompleteOperationRequest: - operationId moved from path to body (user manual edit). - jobId field removed. - result field removed. api/model/TableOperationsHistoryDto: - Drop jobId and result fields. model/TableOperationsHistory: - Drop jobId and result fields. model/mapper/ApiModelMapper: - Remove toModelJobResult / toApiJobResult helpers + JobResult import. - toHistory()/toDto() no longer touch jobId or result. Delete: - services/optimizer/.../api/model/JobResult.java - services/optimizer/.../model/JobResult.java Downstream propagation: opt-2's service signature changes (completeOperation now takes only the request body); db/HistoryStatus remains needed on opt-1 but db/JobResult no longer is. See memory/tasks/mkuchenb-optimizer-3-fixes.md for the full propagation list. --- .../api/model/CompleteOperationRequest.java | 15 ++++++----- .../optimizer/api/model/JobResult.java | 25 ----------------- .../api/model/TableOperationsHistoryDto.java | 6 ----- .../openhouse/optimizer/model/JobResult.java | 25 ----------------- .../model/TableOperationsHistory.java | 6 ----- .../model/mapper/ApiModelMapper.java | 27 ------------------- 6 files changed, 9 insertions(+), 95 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 4f3f6535a..30648d497 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -8,8 +8,12 @@ /** * Request body for {@code POST /v1/table-operations/{id}/complete}. * - *

Reports the outcome of a completed operation. The backend looks up the operation row by {@code - * id} and writes a history entry with the operation's table metadata and the supplied result. + *

Reports the outcome of a single completed operation. The path's {@code id} is the per-cycle + * operation UUID — the service looks up that one row and writes a history entry for it. + * + *

A single Spark job typically processes N tables and yields N independent (status, result) + * pairs — one per operation. Callers issue one complete request per operation; the service does not + * bulk-complete by job. */ @Data @Builder @@ -17,9 +21,8 @@ @AllArgsConstructor public class CompleteOperationRequest { - /** Outcome of the operation. */ - private HistoryStatus status; + private String operationId; - /** Error details on failure; {@code null} on success. */ - private JobResult result; + /** Terminal outcome for this single operation. */ + private HistoryStatus status; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java deleted file mode 100644 index 74942243c..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Result payload for a completed Spark maintenance job. - * - *

Stored as JSON in the {@code result} column of {@code table_operations_history}. Both fields - * are {@code null} on success; populated on failure. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class JobResult { - - /** Human-readable error message; {@code null} if the job succeeded. */ - private String errorMessage; - - /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ - private String errorType; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index a7a9d9dc6..d9fa1f387 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -28,10 +28,4 @@ public class TableOperationsHistoryDto { /** {@code SUCCESS} or {@code FAILED}. */ private HistoryStatus status; - - /** Job ID from the Jobs Service. */ - private String jobId; - - /** Job result payload; both fields null on success. */ - private JobResult result; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java deleted file mode 100644 index 7e48dd0ef..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.linkedin.openhouse.optimizer.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Internal-model result payload for a completed Spark maintenance job. - * - *

Internal-layer copy of the structured result. Both fields are {@code null} on success; - * populated on failure. Intentionally separate from the wire-API and DB representations. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class JobResult { - - /** Human-readable error message; {@code null} if the job succeeded. */ - private String errorMessage; - - /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ - private String errorType; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index 64e0d57b3..fe5bee5f7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -38,10 +38,4 @@ public class TableOperationsHistory { /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ private HistoryStatus status; - - /** Spark job ID for the run that produced this record. */ - private String jobId; - - /** Job result payload; both inner fields {@code null} on success. */ - private JobResult result; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java index 2ae477e0d..35af7fb25 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -3,7 +3,6 @@ import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.JobResult; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.TableOperation; @@ -72,8 +71,6 @@ public TableOperationsHistory toHistory(TableOperationsHistoryDto dto) { .operationType(toModelOperationType(dto.getOperationType())) .completedAt(dto.getCompletedAt()) .status(toModelHistoryStatus(dto.getStatus())) - .jobId(dto.getJobId()) - .result(toModelJobResult(dto.getResult())) .build(); } @@ -89,8 +86,6 @@ public TableOperationsHistoryDto toDto(TableOperationsHistory history) { .operationType(toApiOperationType(history.getOperationType())) .completedAt(history.getCompletedAt()) .status(toApiHistoryStatus(history.getStatus())) - .jobId(history.getJobId()) - .result(toApiJobResult(history.getResult())) .build(); } @@ -154,28 +149,6 @@ public com.linkedin.openhouse.optimizer.api.model.HistoryStatus toApiHistoryStat : com.linkedin.openhouse.optimizer.api.model.HistoryStatus.valueOf(modelValue.name()); } - // --- JobResult --- - - public JobResult toModelJobResult(com.linkedin.openhouse.optimizer.api.model.JobResult apiValue) { - if (apiValue == null) { - return null; - } - return JobResult.builder() - .errorMessage(apiValue.getErrorMessage()) - .errorType(apiValue.getErrorType()) - .build(); - } - - public com.linkedin.openhouse.optimizer.api.model.JobResult toApiJobResult(JobResult modelValue) { - if (modelValue == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.JobResult.builder() - .errorMessage(modelValue.getErrorMessage()) - .errorType(modelValue.getErrorType()) - .build(); - } - // --- TableStats inner classes --- private TableStats.SnapshotMetrics toModelSnapshot( From 2e3a2316295d67105802f4a4c73032396048be9d Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:52:15 -0700 Subject: [PATCH 37/81] feat(optimizer): add debug echo fields to CompleteOperationRequest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tableUuid, databaseName, tableName, and operationType to the complete request body. They're debug-only — the server keys lookup off operationId — but preserving them on logs and traces helps an operator diagnose a failing complete call without joining back to the operation row. --- .../api/model/CompleteOperationRequest.java | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 30648d497..0add634b5 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -6,14 +6,20 @@ import lombok.NoArgsConstructor; /** - * Request body for {@code POST /v1/table-operations/{id}/complete}. + * Request body for {@code POST /v1/table-operations/complete}. * - *

Reports the outcome of a single completed operation. The path's {@code id} is the per-cycle - * operation UUID — the service looks up that one row and writes a history entry for it. + *

Reports the outcome of a single completed operation. The service looks up the operation row by + * {@link #operationId} and writes a history entry for it. * - *

A single Spark job typically processes N tables and yields N independent (status, result) - * pairs — one per operation. Callers issue one complete request per operation; the service does not + *

A single Spark job typically processes N tables and yields N independent (status) outcomes — + * one per operation. Callers issue one complete request per operation; the service does not * bulk-complete by job. + * + *

The remaining fields ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}, {@link + * #operationType}) are debug-only echo information. The server does not key off them; they are + * preserved on log lines and traces so an operator looking at a failing complete call can see which + * (db, table, operation) the caller believed it was completing without joining back to the + * operation row. */ @Data @Builder @@ -21,8 +27,21 @@ @AllArgsConstructor public class CompleteOperationRequest { + /** Operation row's UUID — the primary lookup key. */ private String operationId; /** Terminal outcome for this single operation. */ private HistoryStatus status; + + /** Debug echo: stable table identity the caller believed it was completing. */ + private String tableUuid; + + /** Debug echo: database name. */ + private String databaseName; + + /** Debug echo: table name. */ + private String tableName; + + /** Debug echo: operation type. */ + private OperationType operationType; } From db5eb2959a0fbbfba5d821ee36f00435248f9f5c Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:53:16 -0700 Subject: [PATCH 38/81] refactor(optimizer): move application.properties out of optimizer-0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every line in application.properties is run-time config (server.port, spring.application.name, actuator endpoints). optimizer-0 has no controllers and no endpoint to serve — the file is doing nothing here. The first PR that actually runs a web service is optimizer-2. Delete the file from this PR. optimizer-2 will re-introduce it alongside the REST controllers. The OptimizerServiceApplication @SpringBootApplication shell stays on this branch — optimizer-1's repository tests use @SpringBootTest and need an application class to discover. --- services/optimizer/src/main/resources/application.properties | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 services/optimizer/src/main/resources/application.properties diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties deleted file mode 100644 index 64c40d1f2..000000000 --- a/services/optimizer/src/main/resources/application.properties +++ /dev/null @@ -1,5 +0,0 @@ -spring.application.name=openhouse-optimizer-service -server.port=8080 - -management.endpoints.web.exposure.include=health,prometheus -management.endpoint.health.enabled=true From ac3abc06fec4b3cb1433649e16536a3e0008a4a2 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 14:04:51 -0700 Subject: [PATCH 39/81] feat(optimizer): introduce db/ layer with per-layer types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit optimizer-0 retired entity/, the schema, JPA/MySQL deps, and the api/mapper. This PR brings the DB layer back as db/ with its own self-contained types and a model↔db boundary mapper. db/ package: - TableOperationsRow, TableOperationsHistoryRow, TableStatsRow, TableStatsHistoryRow — JPA entities (same field set as the pre-deletion entity/ versions, with two exceptions: enum fields on the operations rows are now typed db/-side enums via @Enumerated(STRING), and TableOperationsHistoryRow loses the jobId/result columns since they were removed from the wire on optimizer-0). - OperationType, OperationStatus, HistoryStatus — db-layer enums. - TableStats (+ inner SnapshotMetrics, CommitDelta) — db-layer JSON payload, mirrors the model/ + api/ counterparts in shape but is its own class. model/mapper/ModelDbMapper: - Translates between model/ domain objects and db/ rows. - Lives in model/ per the boundary rule (model/ owns conversions to both edges; api/, model/, db/ data types are self-contained). Repositories: imports switched to db/; find() and findLatestPerTable take typed db enums instead of String. Repository tests: builders pass typed db enums; remove jobId/result fields no longer on TableOperationsHistoryRow. Schema (db/optimizer-schema.sql): restored. table_operations_history no longer has job_id / result columns. The idx_toph_optype_uuid_completed index for findLatestPerTable is preserved. build.gradle: restore spring-boot-starter-data-jpa, hibernate-types, mysql-connector-java, h2 dependencies. application-test.properties: restored (H2 test datasource). --- services/optimizer/build.gradle | 4 + .../openhouse/optimizer/db/HistoryStatus.java | 11 + .../optimizer/db/OperationStatus.java | 13 + .../openhouse/optimizer/db/OperationType.java | 12 + .../db/TableOperationsHistoryRow.java | 69 +++++ .../optimizer/db/TableOperationsRow.java | 87 +++++++ .../openhouse/optimizer/db/TableStats.java | 55 ++++ .../optimizer/db/TableStatsHistoryRow.java | 63 +++++ .../openhouse/optimizer/db/TableStatsRow.java | 59 +++++ .../optimizer/model/mapper/ModelDbMapper.java | 235 ++++++++++++++++++ .../TableOperationsHistoryRepository.java | 6 +- .../repository/TableOperationsRepository.java | 8 +- .../TableStatsHistoryRepository.java | 2 +- .../repository/TableStatsRepository.java | 2 +- .../main/resources/db/optimizer-schema.sql | 54 ++++ .../TableOperationsHistoryRepositoryTest.java | 47 ++-- .../TableOperationsRepositoryTest.java | 44 ++-- .../TableStatsHistoryRepositoryTest.java | 4 +- .../repository/TableStatsRepositoryTest.java | 4 +- .../resources/application-test.properties | 12 + 20 files changed, 734 insertions(+), 57 deletions(-) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java create mode 100644 services/optimizer/src/main/resources/db/optimizer-schema.sql create mode 100644 services/optimizer/src/test/resources/application-test.properties diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle index 2de8fd5c7..c05c7f9c3 100644 --- a/services/optimizer/build.gradle +++ b/services/optimizer/build.gradle @@ -4,7 +4,11 @@ plugins { } dependencies { + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' + implementation 'mysql:mysql-connector-java:8.+' + testImplementation 'com.h2database:h2:2.2.224' testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java new file mode 100644 index 000000000..94e573968 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java @@ -0,0 +1,11 @@ +package com.linkedin.openhouse.optimizer.db; + +/** + * DB-layer enum for the {@code status} column of {@code table_operations_history}. + * + *

Self-contained: no references to api/ or model/ types. + */ +public enum HistoryStatus { + SUCCESS, + FAILED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java new file mode 100644 index 000000000..4e9161693 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.optimizer.db; + +/** + * DB-layer enum for the {@code status} column of {@code table_operations}. + * + *

Self-contained: no references to api/ or model/ types. + */ +public enum OperationStatus { + PENDING, + SCHEDULING, + SCHEDULED, + CANCELED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java new file mode 100644 index 000000000..3a896e415 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java @@ -0,0 +1,12 @@ +package com.linkedin.openhouse.optimizer.db; + +/** + * DB-layer enum for the operation types persisted in {@code table_operations.operation_type} and + * {@code table_operations_history.operation_type}. + * + *

Self-contained: no references to api/ or model/ types. JPA binds this via + * {@code @Enumerated(EnumType.STRING)}. + */ +public enum OperationType { + ORPHAN_FILES_DELETION +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java new file mode 100644 index 000000000..2e1230181 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java @@ -0,0 +1,69 @@ +package com.linkedin.openhouse.optimizer.db; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Append-only record of a completed maintenance operation. + * + *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the + * originating live-operations row, tying each history entry back to the operation cycle that + * produced it. Multiple runs of the same operation on the same table produce multiple rows. + * + *

Self-contained DB-layer type: enums are {@link OperationType} / {@link HistoryStatus} from the + * same package, JPA-bound as strings. + */ +@Entity +@Table( + name = "table_operations_history", + indexes = { + @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), + @Index(name = "idx_op_type_hist", columnList = "operation_type"), + @Index(name = "idx_completed_at", columnList = "completed_at"), + @Index(name = "idx_status_hist", columnList = "status"), + @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableOperationsHistoryRow { + + /** Same UUID as the originating live-operations row. Set by the caller; not generated. */ + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + + @Enumerated(EnumType.STRING) + @Column(name = "operation_type", nullable = false, length = 50) + private OperationType operationType; + + @Column(name = "completed_at", nullable = false) + private Instant completedAt; + + @Enumerated(EnumType.STRING) + @Column(name = "status", nullable = false, length = 20) + private HistoryStatus status; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java new file mode 100644 index 000000000..9652214d3 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java @@ -0,0 +1,87 @@ +package com.linkedin.openhouse.optimizer.db; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * JPA entity representing an Analyzer recommendation for a table maintenance operation. + * + *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row + * when it first recommends an operation for a table, or when re-recommending after a prior terminal + * state. {@code table_uuid} is the stable identity for the table (survives renames; rotates on + * drop+recreate). The application enforces one active (PENDING / SCHEDULING / SCHEDULED) row per + * {@code (table_uuid, operation_type)} at a time. + * + *

Self-contained DB-layer type: enums are {@link OperationType} / {@link OperationStatus} from + * the same package, JPA-bound as strings. + */ +@Entity +@Table( + name = "table_operations", + indexes = { + @Index(name = "idx_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_op_type", columnList = "operation_type"), + @Index(name = "idx_status", columnList = "status"), + @Index(name = "idx_created_at", columnList = "created_at"), + @Index(name = "idx_scheduled_at", columnList = "scheduled_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableOperationsRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + + @Enumerated(EnumType.STRING) + @Column(name = "operation_type", nullable = false, length = 50) + private OperationType operationType; + + @Enumerated(EnumType.STRING) + @Column(name = "status", nullable = false, length = 20) + private OperationStatus status; + + @Column(name = "created_at", nullable = false) + private Instant createdAt; + + @Column(name = "scheduled_at") + private Instant scheduledAt; + + /** Spark job ID written by the scheduler at claim time. Internal-only; never exposed on wire. */ + @Column(name = "job_id", length = 255) + private String jobId; + + /** + * Monotonically-increasing version for application-level optimistic concurrency control. The + * scheduler's batch CAS transitions match this in the WHERE clause and bump it by one on UPDATE, + * ensuring two scheduler instances can't both move the same row out of PENDING. Not managed by + * JPA optimistic locking — kept as a plain column so the WHERE-clause-based CAS pattern works + * portably across MySQL and H2. + */ + @Column(name = "version") + private Long version; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java new file mode 100644 index 000000000..ceebb5ad5 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.optimizer.db; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * DB-layer stats payload — stored as a JSON blob in the {@code stats} column of {@code table_stats} + * and {@code table_stats_history}. + * + *

Self-contained: no references to api/ or model/ types. + */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters; accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long addedSizeBytes; + private Long deletedSizeBytes; + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java new file mode 100644 index 000000000..2b7628de1 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java @@ -0,0 +1,63 @@ +package com.linkedin.openhouse.optimizer.db; + +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * Append-only record of per-commit stats reported by the Tables Service. + * + *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can + * query this table to reconstruct change rates over arbitrary time windows. + * + *

Self-contained DB-layer type: the JSON payload type is {@link TableStats} from the same + * package. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table( + name = "table_stats_history", + indexes = { + @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsHistoryRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Column(name = "recorded_at", nullable = false) + private Instant recordedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java new file mode 100644 index 000000000..950cf5327 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java @@ -0,0 +1,59 @@ +package com.linkedin.openhouse.optimizer.db; + +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import java.util.Map; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * JPA entity representing a per-table stats snapshot in the optimizer DB. + * + *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA + * to enumerate tables and check scheduling eligibility. + * + *

Self-contained DB-layer type: the JSON payload type is {@link TableStats} from the same + * package. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table(name = "table_stats") +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsRow { + + @Id + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Type(type = "json") + @Column(name = "table_properties", columnDefinition = "TEXT") + private Map tableProperties; + + /** Set on every upsert. Used for stats pipeline staleness monitoring. */ + @Column(name = "updated_at", nullable = false) + private Instant updatedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java new file mode 100644 index 000000000..f77773928 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java @@ -0,0 +1,235 @@ +package com.linkedin.openhouse.optimizer.model.mapper; + +import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStats; +import java.util.Collections; +import org.springframework.stereotype.Component; + +/** + * Converts between internal {@code model/} domain objects and database row entities. + * + *

The only place inside {@code model/} where {@code db/} types are referenced — this is the + * boundary at which the internal model meets the database layer. Pure data types under {@code + * model/} stay free of any DB-side imports. + * + *

Each layer carries its own per-layer enum + payload types. This mapper translates between + * model/-side and db/-side counterparts by name. + */ +@Component +public class ModelDbMapper { + + // --- TableOperationsRow <-> TableOperation --- + + public TableOperation toOperation(TableOperationsRow row) { + if (row == null) { + return null; + } + return TableOperation.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(toModelOperationType(row.getOperationType())) + .status(toModelOperationStatus(row.getStatus())) + .createdAt(row.getCreatedAt()) + .scheduledAt(row.getScheduledAt()) + .build(); + } + + public TableOperationsRow toRow(TableOperation op) { + if (op == null) { + return null; + } + return TableOperationsRow.builder() + .id(op.getId()) + .tableUuid(op.getTableUuid()) + .databaseName(op.getDatabaseName()) + .tableName(op.getTableName()) + .operationType(toDbOperationType(op.getOperationType())) + .status(toDbOperationStatus(op.getStatus())) + .createdAt(op.getCreatedAt()) + .scheduledAt(op.getScheduledAt()) + .version(0L) + .build(); + } + + // --- TableOperationsHistoryRow <-> TableOperationsHistory --- + + public TableOperationsHistory toHistory(TableOperationsHistoryRow row) { + if (row == null) { + return null; + } + return TableOperationsHistory.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(toModelOperationType(row.getOperationType())) + .completedAt(row.getCompletedAt()) + .status(toModelHistoryStatus(row.getStatus())) + .build(); + } + + public TableOperationsHistoryRow toRow(TableOperationsHistory history) { + if (history == null) { + return null; + } + return TableOperationsHistoryRow.builder() + .id(history.getId()) + .tableUuid(history.getTableUuid()) + .databaseName(history.getDatabaseName()) + .tableName(history.getTableName()) + .operationType(toDbOperationType(history.getOperationType())) + .completedAt(history.getCompletedAt()) + .status(toDbHistoryStatus(history.getStatus())) + .build(); + } + + // --- TableStatsRow -> Table --- + + public Table toTable(TableStatsRow row) { + if (row == null) { + return null; + } + return Table.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableId(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + .stats(toModelStats(row.getStats())) + .build(); + } + + // --- TableStats payload --- + + public TableStats toModelStats(com.linkedin.openhouse.optimizer.db.TableStats dbStats) { + if (dbStats == null) { + return null; + } + return TableStats.builder() + .snapshot(toModelSnapshot(dbStats.getSnapshot())) + .delta(toModelDelta(dbStats.getDelta())) + .build(); + } + + public com.linkedin.openhouse.optimizer.db.TableStats toDbStats(TableStats modelStats) { + if (modelStats == null) { + return null; + } + return com.linkedin.openhouse.optimizer.db.TableStats.builder() + .snapshot(toDbSnapshot(modelStats.getSnapshot())) + .delta(toDbDelta(modelStats.getDelta())) + .build(); + } + + public TableStatsHistoryRow toStatsHistoryRow( + String id, + String tableUuid, + String databaseName, + String tableName, + TableStats stats, + java.time.Instant recordedAt) { + return TableStatsHistoryRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .stats(toDbStats(stats)) + .recordedAt(recordedAt) + .build(); + } + + // --- enum helpers --- + + public OperationType toModelOperationType(com.linkedin.openhouse.optimizer.db.OperationType v) { + return v == null ? null : OperationType.valueOf(v.name()); + } + + public com.linkedin.openhouse.optimizer.db.OperationType toDbOperationType(OperationType v) { + return v == null ? null : com.linkedin.openhouse.optimizer.db.OperationType.valueOf(v.name()); + } + + public OperationStatus toModelOperationStatus( + com.linkedin.openhouse.optimizer.db.OperationStatus v) { + return v == null ? null : OperationStatus.valueOf(v.name()); + } + + public com.linkedin.openhouse.optimizer.db.OperationStatus toDbOperationStatus( + OperationStatus v) { + return v == null ? null : com.linkedin.openhouse.optimizer.db.OperationStatus.valueOf(v.name()); + } + + public HistoryStatus toModelHistoryStatus(com.linkedin.openhouse.optimizer.db.HistoryStatus v) { + return v == null ? null : HistoryStatus.valueOf(v.name()); + } + + public com.linkedin.openhouse.optimizer.db.HistoryStatus toDbHistoryStatus(HistoryStatus v) { + return v == null ? null : com.linkedin.openhouse.optimizer.db.HistoryStatus.valueOf(v.name()); + } + + // --- TableStats inner classes --- + + private TableStats.SnapshotMetrics toModelSnapshot( + com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics v) { + if (v == null) { + return null; + } + return TableStats.SnapshotMetrics.builder() + .clusterId(v.getClusterId()) + .tableVersion(v.getTableVersion()) + .tableLocation(v.getTableLocation()) + .tableSizeBytes(v.getTableSizeBytes()) + .numCurrentFiles(v.getNumCurrentFiles()) + .build(); + } + + private com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics toDbSnapshot( + TableStats.SnapshotMetrics v) { + if (v == null) { + return null; + } + return com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics.builder() + .clusterId(v.getClusterId()) + .tableVersion(v.getTableVersion()) + .tableLocation(v.getTableLocation()) + .tableSizeBytes(v.getTableSizeBytes()) + .numCurrentFiles(v.getNumCurrentFiles()) + .build(); + } + + private TableStats.CommitDelta toModelDelta( + com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta v) { + if (v == null) { + return null; + } + return TableStats.CommitDelta.builder() + .numFilesAdded(v.getNumFilesAdded()) + .numFilesDeleted(v.getNumFilesDeleted()) + .addedSizeBytes(v.getAddedSizeBytes()) + .deletedSizeBytes(v.getDeletedSizeBytes()) + .build(); + } + + private com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta toDbDelta( + TableStats.CommitDelta v) { + if (v == null) { + return null; + } + return com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta.builder() + .numFilesAdded(v.getNumFilesAdded()) + .numFilesDeleted(v.getNumFilesDeleted()) + .addedSizeBytes(v.getAddedSizeBytes()) + .deletedSizeBytes(v.getDeletedSizeBytes()) + .build(); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index ba2ce35a8..5faf349e3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -1,6 +1,7 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import java.util.List; import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; @@ -36,5 +37,6 @@ List findByTableUuidOrderByCompletedAtDesc( + "AND r.completedAt = (" + " SELECT MAX(r2.completedAt) FROM TableOperationsHistoryRow r2 " + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") - List findLatestPerTable(@Param("operationType") String operationType); + List findLatestPerTable( + @Param("operationType") OperationType operationType); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index c7a08cabc..e9bc1c8b3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -1,6 +1,8 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import com.linkedin.openhouse.optimizer.db.OperationStatus; +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; @@ -21,8 +23,8 @@ public interface TableOperationsRepository extends JpaRepository find( - @Param("operationType") String operationType, - @Param("status") String status, + @Param("operationType") OperationType operationType, + @Param("status") OperationStatus status, @Param("tableUuid") String tableUuid, @Param("databaseName") String databaseName, @Param("tableName") String tableName); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java index aaa1b0050..6f9595275 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import java.time.Instant; import java.util.List; import org.springframework.data.domain.Pageable; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 4215237bc..dbf1de0ae 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql new file mode 100644 index 000000000..92e79976b --- /dev/null +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -0,0 +1,54 @@ +-- Optimizer Service Schema +-- Compatible with MySQL (production) and H2 in MySQL mode (tests). +CREATE TABLE IF NOT EXISTS table_operations ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + status VARCHAR(20) NOT NULL, + created_at TIMESTAMP(6) NOT NULL, + scheduled_at TIMESTAMP(6), + job_id VARCHAR(255), + version BIGINT, + -- TODO: per-operation metric columns will be added as operations are onboarded. + PRIMARY KEY (id) +); + +CREATE TABLE IF NOT EXISTS table_stats ( + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, + stats TEXT, + table_properties TEXT, + updated_at TIMESTAMP(6) NOT NULL, + PRIMARY KEY (table_uuid) +); + +CREATE TABLE IF NOT EXISTS table_stats_history ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, + stats TEXT, + recorded_at TIMESTAMP(6) NOT NULL, + PRIMARY KEY (id), + INDEX idx_tsh_table_uuid (table_uuid), + INDEX idx_tsh_recorded_at (recorded_at) +); + +CREATE TABLE IF NOT EXISTS table_operations_history ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + completed_at TIMESTAMP(6) NOT NULL, + status VARCHAR(20) NOT NULL, + PRIMARY KEY (id), + INDEX idx_toph_db_table (database_name, table_name), + -- Drives TableOperationHistoryRepository.findLatestPerTable: the correlated + -- MAX(completed_at) subquery becomes an index-only lookup per (operation_type, + -- table_uuid) instead of an O(N²) scan. + INDEX idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) +); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index 436d08066..706ecd877 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -2,9 +2,9 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.db.HistoryStatus; +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import java.time.Instant; import java.util.List; import java.util.UUID; @@ -27,38 +27,37 @@ void findByTableUuid_returnsRowsNewestFirst() { Instant t1 = Instant.parse("2024-01-01T10:00:00Z"); Instant t2 = Instant.parse("2024-01-02T10:00:00Z"); String tableUuid = UUID.randomUUID().toString(); + String idOlder = UUID.randomUUID().toString(); + String idNewer = UUID.randomUUID().toString(); repository.save( TableOperationsHistoryRow.builder() - .id(UUID.randomUUID().toString()) + .id(idOlder) .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(t1) - .status(HistoryStatus.SUCCESS.name()) - .jobId("job-001") + .status(HistoryStatus.SUCCESS) .build()); repository.save( TableOperationsHistoryRow.builder() - .id(UUID.randomUUID().toString()) + .id(idNewer) .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(t2) - .status(HistoryStatus.FAILED.name()) - .jobId("job-002") - .result("{\"errorMessage\":\"out of memory\",\"errorType\":\"OOM\"}") + .status(HistoryStatus.FAILED) .build()); List rows = repository.findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, 10)); assertThat(rows).hasSize(2); - assertThat(rows.get(0).getJobId()).isEqualTo("job-002"); - assertThat(rows.get(1).getJobId()).isEqualTo("job-001"); + assertThat(rows.get(0).getId()).isEqualTo(idNewer); + assertThat(rows.get(1).getId()).isEqualTo(idOlder); } @Test @@ -72,9 +71,9 @@ void findByTableUuid_respectsLimit() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl3") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(now.plusSeconds(i)) - .status(HistoryStatus.SUCCESS.name()) + .status(HistoryStatus.SUCCESS) .build()); } @@ -96,9 +95,9 @@ void findLatestPerTable_returnsOneRowPerTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(t1) - .status(HistoryStatus.SUCCESS.name()) + .status(HistoryStatus.SUCCESS) .build()); repository.save( TableOperationsHistoryRow.builder() @@ -106,9 +105,9 @@ void findLatestPerTable_returnsOneRowPerTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(t2) - .status(HistoryStatus.FAILED.name()) + .status(HistoryStatus.FAILED) .build()); repository.save( TableOperationsHistoryRow.builder() @@ -116,18 +115,18 @@ void findLatestPerTable_returnsOneRowPerTableUuid() { .tableUuid(otherUuid) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(t1) - .status(HistoryStatus.SUCCESS.name()) + .status(HistoryStatus.SUCCESS) .build()); List latest = - repository.findLatestPerTable(OperationType.ORPHAN_FILES_DELETION.name()); + repository.findLatestPerTable(OperationType.ORPHAN_FILES_DELETION); assertThat(latest).hasSize(2); TableOperationsHistoryRow forTarget = latest.stream().filter(r -> r.getTableUuid().equals(tableUuid)).findFirst().orElseThrow(); assertThat(forTarget.getCompletedAt()).isEqualTo(t2); - assertThat(forTarget.getStatus()).isEqualTo(HistoryStatus.FAILED.name()); + assertThat(forTarget.getStatus()).isEqualTo(HistoryStatus.FAILED); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index 2ca8dc61e..44a03ba9e 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -2,9 +2,9 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import com.linkedin.openhouse.optimizer.db.OperationStatus; +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import java.time.Instant; import java.util.List; import java.util.Optional; @@ -32,8 +32,8 @@ void saveAndFindById() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.PENDING.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) .createdAt(Instant.now()) .build(); @@ -41,7 +41,7 @@ void saveAndFindById() { Optional found = repository.findById(id); assertThat(found).isPresent(); - assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING.name()); + assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING); } @Test @@ -52,8 +52,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.PENDING.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) .createdAt(Instant.now()) .build()); repository.save( @@ -62,8 +62,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.SCHEDULED.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) .createdAt(Instant.now()) .build()); @@ -79,8 +79,8 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.PENDING.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) .createdAt(Instant.now()) .build()); repository.save( @@ -89,20 +89,20 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.SCHEDULED.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) .createdAt(Instant.now()) .build()); List pending = - repository.find(null, OperationStatus.PENDING.name(), null, null, null); + repository.find(null, OperationStatus.PENDING, null, null, null); assertThat(pending).hasSize(1); - assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING.name()); + assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); List scheduled = - repository.find(null, OperationStatus.SCHEDULED.name(), null, null, null); + repository.find(null, OperationStatus.SCHEDULED, null, null, null); assertThat(scheduled).hasSize(1); - assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED.name()); + assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); } @Test @@ -113,8 +113,8 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.PENDING.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) .createdAt(Instant.now()) .build()); repository.save( @@ -123,8 +123,8 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db2") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.PENDING.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) .createdAt(Instant.now()) .build()); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index 475196630..18241ce8d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.db.TableStats; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.List; diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index 240d512ef..e70704f51 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.db.TableStats; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.time.Instant; import java.util.Map; import java.util.Optional; diff --git a/services/optimizer/src/test/resources/application-test.properties b/services/optimizer/src/test/resources/application-test.properties new file mode 100644 index 000000000..97b7841dc --- /dev/null +++ b/services/optimizer/src/test/resources/application-test.properties @@ -0,0 +1,12 @@ +spring.datasource.url=jdbc:h2:mem:optimizer_test;MODE=MySQL;DATABASE_TO_LOWER=TRUE;DB_CLOSE_DELAY=-1 +spring.datasource.driver-class-name=org.h2.Driver +spring.datasource.username=sa +spring.datasource.password= + +spring.jpa.hibernate.ddl-auto=none +spring.sql.init.mode=always +spring.jpa.defer-datasource-initialization=true +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect +spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl + +spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql From e79eec7b01dd0890df975c3e3ac311f2ef2cc96c Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 14:25:33 -0700 Subject: [PATCH 40/81] refactor(optimizer): split TableStats envelope into snapshot + delta columns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DB layer no longer mirrors the wire-side TableStats JSON envelope. Instead the two structurally-separate concepts inside it — point-in-time snapshot metrics and per-commit delta counters — are persisted as two independent JSON columns. Per-layer decoupling: the api/ envelope can evolve without forcing the DB column shape to change in lockstep. Tables and class names are unchanged: table_stats / table_stats_history on the SQL side; TableStatsRow / TableStatsHistoryRow on the Java side. Changes: - Delete db/TableStats (the envelope wrapper is no longer needed). - Add db/SnapshotMetrics (plain POJO; serialized into the `snapshot` JSON column). - Add db/CommitDeltaMetrics (plain POJO; serialized into the `delta` JSON column). - TableStatsRow: replace `stats: TableStats` with `snapshot: SnapshotMetrics` and `delta: CommitDeltaMetrics`. - TableStatsHistoryRow: same split. - Schema: replace `stats TEXT` with `snapshot TEXT` and `delta TEXT` on both tables. - ModelDbMapper: split/join at the boundary. New helpers `toDbSnapshot`, `toDbDelta`, `joinStats` translate between the single model-layer TableStats and the two DB columns. `toStatsHistoryRow` projects a TableStats into the two-column row. - Repository tests: build rows with the new two-field shape. --- .../optimizer/db/CommitDeltaMetrics.java | 21 +++++++ .../optimizer/db/SnapshotMetrics.java | 24 ++++++++ .../openhouse/optimizer/db/TableStats.java | 55 ------------------ .../optimizer/db/TableStatsHistoryRow.java | 18 +++--- .../openhouse/optimizer/db/TableStatsRow.java | 13 +++-- .../optimizer/model/mapper/ModelDbMapper.java | 57 ++++++++++--------- .../main/resources/db/optimizer-schema.sql | 6 +- .../TableStatsHistoryRepositoryTest.java | 26 ++++----- .../repository/TableStatsRepositoryTest.java | 51 ++++++----------- 9 files changed, 125 insertions(+), 146 deletions(-) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java new file mode 100644 index 000000000..8094d28b8 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java @@ -0,0 +1,21 @@ +package com.linkedin.openhouse.optimizer.db; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Per-commit incremental counters. Serialized as JSON into the {@code delta} column. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class CommitDeltaMetrics { + + private Long numFilesAdded; + private Long numFilesDeleted; + private Long addedSizeBytes; + private Long deletedSizeBytes; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java new file mode 100644 index 000000000..22d222172 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java @@ -0,0 +1,24 @@ +package com.linkedin.openhouse.optimizer.db; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Point-in-time snapshot fields. Serialized as JSON into the {@code snapshot} column. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class SnapshotMetrics { + + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java deleted file mode 100644 index ceebb5ad5..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java +++ /dev/null @@ -1,55 +0,0 @@ -package com.linkedin.openhouse.optimizer.db; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * DB-layer stats payload — stored as a JSON blob in the {@code stats} column of {@code table_stats} - * and {@code table_stats_history}. - * - *

Self-contained: no references to api/ or model/ types. - */ -@Data -@Builder(toBuilder = true) -@NoArgsConstructor -@AllArgsConstructor -@JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { - - /** Snapshot fields — overwritten on every upsert. */ - private SnapshotMetrics snapshot; - - /** Delta fields — accumulated across commit events. */ - private CommitDelta delta; - - /** Point-in-time metadata read from Iceberg at scan time. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class SnapshotMetrics { - private String clusterId; - private String tableVersion; - private String tableLocation; - private Long tableSizeBytes; - /** Total number of data files as of the latest snapshot. */ - private Long numCurrentFiles; - } - - /** Per-commit incremental counters; accumulated across all recorded commit events. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class CommitDelta { - private Long numFilesAdded; - private Long numFilesDeleted; - private Long addedSizeBytes; - private Long deletedSizeBytes; - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java index 2b7628de1..71c17b582 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java @@ -19,12 +19,12 @@ /** * Append-only record of per-commit stats reported by the Tables Service. * - *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can - * query this table to reconstruct change rates over arbitrary time windows. + *

Each Iceberg commit produces one row. Consumers can query this table to reconstruct change + * rates over arbitrary time windows. * - *

Self-contained DB-layer type: the JSON payload type is {@link TableStats} from the same - * package. + *

Self-contained DB-layer type. The stats payload is split across two JSON columns — {@link + * SnapshotMetrics} (point-in-time fields at commit time) and {@link CommitDeltaMetrics} (per-commit + * counters). */ @TypeDef(name = "json", typeClass = JsonStringType.class) @Entity @@ -55,8 +55,12 @@ public class TableStatsHistoryRow { private String tableName; @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; + @Column(name = "snapshot", columnDefinition = "TEXT") + private SnapshotMetrics snapshot; + + @Type(type = "json") + @Column(name = "delta", columnDefinition = "TEXT") + private CommitDeltaMetrics delta; @Column(name = "recorded_at", nullable = false) private Instant recordedAt; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java index 950cf5327..2566763ce 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java @@ -22,8 +22,9 @@ *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA * to enumerate tables and check scheduling eligibility. * - *

Self-contained DB-layer type: the JSON payload type is {@link TableStats} from the same - * package. + *

Self-contained DB-layer type. The stats payload is split across two JSON columns — {@link + * SnapshotMetrics} (point-in-time fields, overwritten each commit) and {@link CommitDeltaMetrics} + * (per-commit counters). */ @TypeDef(name = "json", typeClass = JsonStringType.class) @Entity @@ -46,8 +47,12 @@ public class TableStatsRow { private String tableName; @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; + @Column(name = "snapshot", columnDefinition = "TEXT") + private SnapshotMetrics snapshot; + + @Type(type = "json") + @Column(name = "delta", columnDefinition = "TEXT") + private CommitDeltaMetrics delta; @Type(type = "json") @Column(name = "table_properties", columnDefinition = "TEXT") diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java index f77773928..0ae9167e1 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java @@ -1,5 +1,7 @@ package com.linkedin.openhouse.optimizer.model.mapper; +import com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics; +import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; @@ -21,8 +23,9 @@ * boundary at which the internal model meets the database layer. Pure data types under {@code * model/} stay free of any DB-side imports. * - *

Each layer carries its own per-layer enum + payload types. This mapper translates between - * model/-side and db/-side counterparts by name. + *

Each layer carries its own per-layer enum + payload types. The DB layer flattens the wire-side + * {@code TableStats} envelope into two separate columns ({@code snapshot} and {@code delta}); this + * mapper joins / splits them at the boundary. */ @Component public class ModelDbMapper { @@ -106,30 +109,31 @@ public Table toTable(TableStatsRow row) { .tableId(row.getTableName()) .tableProperties( row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) - .stats(toModelStats(row.getStats())) + .stats(joinStats(row.getSnapshot(), row.getDelta())) .build(); } - // --- TableStats payload --- + // --- TableStats payload <-> (snapshot, delta) --- - public TableStats toModelStats(com.linkedin.openhouse.optimizer.db.TableStats dbStats) { - if (dbStats == null) { + /** Join the two DB-side columns into a single internal-model {@link TableStats}. */ + public TableStats joinStats(SnapshotMetrics dbSnapshot, CommitDeltaMetrics dbDelta) { + if (dbSnapshot == null && dbDelta == null) { return null; } return TableStats.builder() - .snapshot(toModelSnapshot(dbStats.getSnapshot())) - .delta(toModelDelta(dbStats.getDelta())) + .snapshot(toModelSnapshot(dbSnapshot)) + .delta(toModelDelta(dbDelta)) .build(); } - public com.linkedin.openhouse.optimizer.db.TableStats toDbStats(TableStats modelStats) { - if (modelStats == null) { - return null; - } - return com.linkedin.openhouse.optimizer.db.TableStats.builder() - .snapshot(toDbSnapshot(modelStats.getSnapshot())) - .delta(toDbDelta(modelStats.getDelta())) - .build(); + /** Project the internal-model {@link TableStats#getSnapshot()} side. */ + public SnapshotMetrics toDbSnapshot(TableStats modelStats) { + return modelStats == null ? null : toDbSnapshot(modelStats.getSnapshot()); + } + + /** Project the internal-model {@link TableStats#getDelta()} side. */ + public CommitDeltaMetrics toDbDelta(TableStats modelStats) { + return modelStats == null ? null : toDbDelta(modelStats.getDelta()); } public TableStatsHistoryRow toStatsHistoryRow( @@ -144,7 +148,8 @@ public TableStatsHistoryRow toStatsHistoryRow( .tableUuid(tableUuid) .databaseName(databaseName) .tableName(tableName) - .stats(toDbStats(stats)) + .snapshot(toDbSnapshot(stats)) + .delta(toDbDelta(stats)) .recordedAt(recordedAt) .build(); } @@ -177,10 +182,9 @@ public com.linkedin.openhouse.optimizer.db.HistoryStatus toDbHistoryStatus(Histo return v == null ? null : com.linkedin.openhouse.optimizer.db.HistoryStatus.valueOf(v.name()); } - // --- TableStats inner classes --- + // --- inner-payload field copies --- - private TableStats.SnapshotMetrics toModelSnapshot( - com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics v) { + private TableStats.SnapshotMetrics toModelSnapshot(SnapshotMetrics v) { if (v == null) { return null; } @@ -193,12 +197,11 @@ private TableStats.SnapshotMetrics toModelSnapshot( .build(); } - private com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics toDbSnapshot( - TableStats.SnapshotMetrics v) { + private SnapshotMetrics toDbSnapshot(TableStats.SnapshotMetrics v) { if (v == null) { return null; } - return com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics.builder() + return SnapshotMetrics.builder() .clusterId(v.getClusterId()) .tableVersion(v.getTableVersion()) .tableLocation(v.getTableLocation()) @@ -207,8 +210,7 @@ private com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics toDbSnaps .build(); } - private TableStats.CommitDelta toModelDelta( - com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta v) { + private TableStats.CommitDelta toModelDelta(CommitDeltaMetrics v) { if (v == null) { return null; } @@ -220,12 +222,11 @@ private TableStats.CommitDelta toModelDelta( .build(); } - private com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta toDbDelta( - TableStats.CommitDelta v) { + private CommitDeltaMetrics toDbDelta(TableStats.CommitDelta v) { if (v == null) { return null; } - return com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta.builder() + return CommitDeltaMetrics.builder() .numFilesAdded(v.getNumFilesAdded()) .numFilesDeleted(v.getNumFilesDeleted()) .addedSizeBytes(v.getAddedSizeBytes()) diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 92e79976b..3f3d11629 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -19,7 +19,8 @@ CREATE TABLE IF NOT EXISTS table_stats ( table_uuid VARCHAR(36) NOT NULL, database_name VARCHAR(128) NOT NULL, table_name VARCHAR(128) NOT NULL, - stats TEXT, + snapshot TEXT, + delta TEXT, table_properties TEXT, updated_at TIMESTAMP(6) NOT NULL, PRIMARY KEY (table_uuid) @@ -30,7 +31,8 @@ CREATE TABLE IF NOT EXISTS table_stats_history ( table_uuid VARCHAR(36) NOT NULL, database_name VARCHAR(128) NOT NULL, table_name VARCHAR(128) NOT NULL, - stats TEXT, + snapshot TEXT, + delta TEXT, recorded_at TIMESTAMP(6) NOT NULL, PRIMARY KEY (id), INDEX idx_tsh_table_uuid (table_uuid), diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index 18241ce8d..dbd8cc686 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -2,7 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.db.TableStats; +import com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics; +import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import java.time.Instant; import java.time.temporal.ChronoUnit; @@ -35,8 +36,8 @@ void saveAndFind() { assertThat(rows).hasSize(3); // newest first - assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); - assertThat(rows.get(2).getStats().getDelta().getNumFilesAdded()).isEqualTo(10L); + assertThat(rows.get(0).getDelta().getNumFilesAdded()).isEqualTo(3L); + assertThat(rows.get(2).getDelta().getNumFilesAdded()).isEqualTo(10L); } @Test @@ -67,7 +68,7 @@ void find_withSince_filtersOlderRows() { // only the 2 rows within the last 90 minutes assertThat(rows).hasSize(2); - assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); + assertThat(rows.get(0).getDelta().getNumFilesAdded()).isEqualTo(3L); } @Test @@ -131,18 +132,11 @@ private static TableStatsHistoryRow buildRow( .tableUuid(tableUuid) .databaseName(databaseName) .tableName(tableName) - .stats( - TableStats.builder() - .snapshot( - TableStats.SnapshotMetrics.builder() - .clusterId("cl1") - .tableSizeBytes(1024L) - .build()) - .delta( - TableStats.CommitDelta.builder() - .numFilesAdded(numFilesAdded) - .numFilesDeleted(numFilesDeleted) - .build()) + .snapshot(SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build()) + .delta( + CommitDeltaMetrics.builder() + .numFilesAdded(numFilesAdded) + .numFilesDeleted(numFilesDeleted) .build()) .recordedAt(recordedAt) .build(); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index e70704f51..5f6a4ef4f 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -2,7 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.db.TableStats; +import com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics; +import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.time.Instant; import java.util.Map; @@ -24,19 +25,18 @@ class TableStatsRepositoryTest { @Test void saveAndFindById() { String tableUuid = UUID.randomUUID().toString(); - TableStats stats = - TableStats.builder() - .snapshot( - TableStats.SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build()) - .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(1L).build()) - .build(); + SnapshotMetrics snapshot = + SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build(); + CommitDeltaMetrics delta = + CommitDeltaMetrics.builder().numFilesAdded(3L).numFilesDeleted(1L).build(); repository.save( TableStatsRow.builder() .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .stats(stats) + .snapshot(snapshot) + .delta(delta) .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) .updatedAt(Instant.now()) .build()); @@ -44,7 +44,8 @@ void saveAndFindById() { Optional found = repository.findById(tableUuid); assertThat(found).isPresent(); assertThat(found.get().getDatabaseName()).isEqualTo("db1"); - assertThat(found.get().getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(found.get().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(found.get().getDelta().getNumFilesAdded()).isEqualTo(3L); assertThat(found.get().getTableProperties()) .containsEntry("maintenance.optimizer.ofd.enabled", "true"); } @@ -58,10 +59,7 @@ void upsert_overwritesPreviousStats() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(100L).build()) .updatedAt(Instant.now()) .build()); @@ -70,15 +68,12 @@ void upsert_overwritesPreviousStats() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(200L).build()) .updatedAt(Instant.now()) .build()); assertThat(repository.findAll()).hasSize(1); - assertThat(repository.findById(tableUuid).get().getStats().getSnapshot().getTableSizeBytes()) + assertThat(repository.findById(tableUuid).get().getSnapshot().getTableSizeBytes()) .isEqualTo(200L); } @@ -89,10 +84,7 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(100L).build()) .updatedAt(Instant.now()) .build()); repository.save( @@ -100,10 +92,7 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db2") .tableName("tbl2") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(200L).build()) .updatedAt(Instant.now()) .build()); @@ -117,10 +106,7 @@ void find_byDatabase() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(100L).build()) .updatedAt(Instant.now()) .build()); repository.save( @@ -128,10 +114,7 @@ void find_byDatabase() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db2") .tableName("tbl2") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(200L).build()) .updatedAt(Instant.now()) .build()); From f955ded61892180eefdc562ecc48a0b5cbffa391 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 14:34:25 -0700 Subject: [PATCH 41/81] fix(optimizer): drop CommitDeltaMetrics from TableStatsRow table_stats is the current-state row (one per table). Per-commit deltas are an append-only history concern and belong only to TableStatsHistoryRow. Storing a delta on the current-state row implied an aggregation that isn't actually performed. - TableStatsRow: remove the `delta` field. - table_stats schema: drop the `delta` column. - ModelDbMapper.toTable: project only snapshot to model.TableStats; history-only deltas remain in TableStatsHistoryRow. - TableStatsRepositoryTest: drop .delta(...) builder usage. --- .../linkedin/openhouse/optimizer/db/TableStatsRow.java | 9 ++------- .../openhouse/optimizer/model/mapper/ModelDbMapper.java | 3 ++- .../optimizer/src/main/resources/db/optimizer-schema.sql | 1 - .../optimizer/repository/TableStatsRepositoryTest.java | 5 ----- 4 files changed, 4 insertions(+), 14 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java index 2566763ce..8d869ff1e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java @@ -22,9 +22,8 @@ *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA * to enumerate tables and check scheduling eligibility. * - *

Self-contained DB-layer type. The stats payload is split across two JSON columns — {@link - * SnapshotMetrics} (point-in-time fields, overwritten each commit) and {@link CommitDeltaMetrics} - * (per-commit counters). + *

Self-contained DB-layer type. Holds only the point-in-time {@link SnapshotMetrics} — + * per-commit deltas live exclusively on {@link TableStatsHistoryRow} and are not aggregated here. */ @TypeDef(name = "json", typeClass = JsonStringType.class) @Entity @@ -50,10 +49,6 @@ public class TableStatsRow { @Column(name = "snapshot", columnDefinition = "TEXT") private SnapshotMetrics snapshot; - @Type(type = "json") - @Column(name = "delta", columnDefinition = "TEXT") - private CommitDeltaMetrics delta; - @Type(type = "json") @Column(name = "table_properties", columnDefinition = "TEXT") private Map tableProperties; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java index 0ae9167e1..755b38400 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java @@ -109,7 +109,8 @@ public Table toTable(TableStatsRow row) { .tableId(row.getTableName()) .tableProperties( row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) - .stats(joinStats(row.getSnapshot(), row.getDelta())) + // table_stats holds only the snapshot — deltas live on the history table. + .stats(joinStats(row.getSnapshot(), null)) .build(); } diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 3f3d11629..24b367549 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -20,7 +20,6 @@ CREATE TABLE IF NOT EXISTS table_stats ( database_name VARCHAR(128) NOT NULL, table_name VARCHAR(128) NOT NULL, snapshot TEXT, - delta TEXT, table_properties TEXT, updated_at TIMESTAMP(6) NOT NULL, PRIMARY KEY (table_uuid) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index 5f6a4ef4f..493eb88b6 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -2,7 +2,6 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics; import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.time.Instant; @@ -27,8 +26,6 @@ void saveAndFindById() { String tableUuid = UUID.randomUUID().toString(); SnapshotMetrics snapshot = SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build(); - CommitDeltaMetrics delta = - CommitDeltaMetrics.builder().numFilesAdded(3L).numFilesDeleted(1L).build(); repository.save( TableStatsRow.builder() @@ -36,7 +33,6 @@ void saveAndFindById() { .databaseName("db1") .tableName("tbl1") .snapshot(snapshot) - .delta(delta) .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) .updatedAt(Instant.now()) .build()); @@ -45,7 +41,6 @@ void saveAndFindById() { assertThat(found).isPresent(); assertThat(found.get().getDatabaseName()).isEqualTo("db1"); assertThat(found.get().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); - assertThat(found.get().getDelta().getNumFilesAdded()).isEqualTo(3L); assertThat(found.get().getTableProperties()) .containsEntry("maintenance.optimizer.ofd.enabled", "true"); } From 969949d98b935443017e1264aa69216c63429001 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 14:42:21 -0700 Subject: [PATCH 42/81] refactor(optimizer): rewire service layer onto api/model/db mappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adapt the REST service layer to the new architecture introduced on optimizer-0 and optimizer-1: - api/ and db/ data types are self-contained per layer. - model/mapper/ApiModelMapper and model/mapper/ModelDbMapper own all cross-layer translation. - The old api/mapper/OptimizerMapper is gone. - JobResult is removed from the wire entirely. - TableStats is split on the DB side: TableStatsRow holds only the snapshot; TableStatsHistoryRow holds snapshot + delta per commit. Changes: OptimizerDataServiceImpl rewrite: - Inject ApiModelMapper + ModelDbMapper instead of OptimizerMapper. - Operations: list/get/complete/append go db row → ModelDbMapper → model object → ApiModelMapper → wire DTO. Enum filters on list() translate api → model → db. - completeOperation: signature is now (CompleteOperationRequest) only; operationId lives in the body. No jobId / result on the written history row. - Stats: split api.TableStats into snapshot (current-state row) and snapshot+delta (history row) at write time. Join back to the wire TableStats at read time (current-state has snapshot only; history has both). OptimizerDataService interface: - completeOperation(CompleteOperationRequest) — drop the String id path-style parameter. TableOperationsController: - POST endpoint moves from /{id}/complete to /complete. operationId is read from the request body. application.properties: - Re-introduced with production runtime config (server.port, application name, actuator) and JPA/MySQL datasource + schema-init pointing at the schema added on optimizer-1. OptimizerDataServiceImplTest: rewritten to use api/ + db/ types, new completeOperation signature, and the split snapshot/delta on stats; drop JobResult-dependent assertions. --- .../controller/TableOperationsController.java | 13 ++- .../service/OptimizerDataService.java | 9 +- .../service/OptimizerDataServiceImpl.java | 106 +++++++++++++----- .../src/main/resources/application.properties | 20 ++++ .../service/OptimizerDataServiceImplTest.java | 73 +++++++++--- 5 files changed, 162 insertions(+), 59 deletions(-) create mode 100644 services/optimizer/src/main/resources/application.properties diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index adc4d7a85..e48043a35 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -28,15 +28,16 @@ public class TableOperationsController { private final OptimizerDataService service; /** - * Report that an operation has completed. The backend looks up the operation row, writes a - * history entry with the operation's table metadata and the supplied result. Returns 201 Created - * with the history row, or 404 if the operation does not exist. + * Report that an operation has completed. The body carries the {@code operationId} the caller is + * completing along with its terminal status. The backend looks up the operation row, writes a + * history entry with the operation's table metadata, and returns 201 Created with the history + * row, or 404 if the operation does not exist. */ - @PostMapping("/{id}/complete") + @PostMapping("/complete") public ResponseEntity completeOperation( - @PathVariable String id, @RequestBody CompleteOperationRequest request) { + @RequestBody CompleteOperationRequest request) { return service - .completeOperation(id, request) + .completeOperation(request) .map(dto -> ResponseEntity.status(HttpStatus.CREATED).body(dto)) .orElse(ResponseEntity.notFound().build()); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index 6f71c708e..c3988f668 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -29,12 +29,11 @@ List listTableOperations( Optional tableUuid); /** - * Complete an operation by writing a history entry. Looks up the operation row by {@code id}, - * copies its table metadata into a new history row, and saves it. Returns the history DTO, or - * empty if the operation does not exist. + * Complete an operation by writing a history entry. Looks up the operation row by {@code + * request.operationId}, copies its table metadata into a new history row, and saves it. Returns + * the history DTO, or empty if the operation does not exist. */ - Optional completeOperation( - String id, CompleteOperationRequest request); + Optional completeOperation(CompleteOperationRequest request); /** * Return the operation row for {@code id} regardless of status, or empty if it does not exist. diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 93b9af2a0..21802a84f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -1,17 +1,19 @@ package com.linkedin.openhouse.optimizer.service; -import com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper; import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; +import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; @@ -35,7 +37,8 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { private final TableOperationsHistoryRepository historyRepository; private final TableStatsRepository statsRepository; private final TableStatsHistoryRepository statsHistoryRepository; - private final OptimizerMapper mapper; + private final ApiModelMapper apiMapper; + private final ModelDbMapper dbMapper; // --- TableOperations --- @@ -48,22 +51,26 @@ public List listTableOperations( Optional tableUuid) { return operationsRepository .find( - operationType.map(OperationType::name).orElse(null), - status.map(OperationStatus::name).orElse(null), + operationType + .map(t -> dbMapper.toDbOperationType(apiMapper.toModelOperationType(t))) + .orElse(null), + status + .map(s -> dbMapper.toDbOperationStatus(apiMapper.toModelOperationStatus(s))) + .orElse(null), tableUuid.orElse(null), databaseName.orElse(null), tableName.orElse(null)) .stream() - .map(mapper::toDto) + .map(dbMapper::toOperation) + .map(apiMapper::toDto) .collect(Collectors.toList()); } @Override @Transactional - public Optional completeOperation( - String id, CompleteOperationRequest request) { + public Optional completeOperation(CompleteOperationRequest request) { return operationsRepository - .findById(id) + .findById(request.getOperationId()) .map( row -> { TableOperationsHistoryRow historyRow = @@ -74,17 +81,17 @@ public Optional completeOperation( .tableName(row.getTableName()) .operationType(row.getOperationType()) .completedAt(Instant.now()) - .status(request.getStatus().name()) - .jobId(row.getJobId()) - .result(mapper.fromJobResult(request.getResult())) + .status( + dbMapper.toDbHistoryStatus( + apiMapper.toModelHistoryStatus(request.getStatus()))) .build(); - return mapper.toDto(historyRepository.save(historyRow)); + return apiMapper.toDto(dbMapper.toHistory(historyRepository.save(historyRow))); }); } @Override public Optional getTableOperation(String id) { - return operationsRepository.findById(id).map(mapper::toDto); + return operationsRepository.findById(id).map(dbMapper::toOperation).map(apiMapper::toDto); } // --- TableStats --- @@ -93,6 +100,9 @@ public Optional getTableOperation(String id) { @Transactional public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest request) { Instant now = Instant.now(); + com.linkedin.openhouse.optimizer.model.TableStats modelStats = + apiMapper.toModelStats(request.getStats()); + TableStatsRow row = statsRepository .findById(tableUuid) @@ -102,7 +112,7 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest .toBuilder() .databaseName(request.getDatabaseName()) .tableName(request.getTableName()) - .stats(request.getStats()) + .snapshot(dbMapper.toDbSnapshot(modelStats)) .tableProperties(request.getTableProperties()) .updatedAt(now) .build()) @@ -111,11 +121,11 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest .tableUuid(tableUuid) .databaseName(request.getDatabaseName()) .tableName(request.getTableName()) - .stats(request.getStats()) + .snapshot(dbMapper.toDbSnapshot(modelStats)) .tableProperties(request.getTableProperties()) .updatedAt(now) .build()); - TableStatsDto saved = mapper.toDto(statsRepository.save(row)); + TableStatsRow saved = statsRepository.save(row); statsHistoryRepository.save( TableStatsHistoryRow.builder() @@ -123,16 +133,17 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest .tableUuid(tableUuid) .databaseName(request.getDatabaseName()) .tableName(request.getTableName()) - .stats(request.getStats()) + .snapshot(dbMapper.toDbSnapshot(modelStats)) + .delta(dbMapper.toDbDelta(modelStats)) .recordedAt(now) .build()); - return saved; + return toTableStatsDto(saved); } @Override public Optional getTableStats(String tableUuid) { - return statsRepository.findById(tableUuid).map(mapper::toDto); + return statsRepository.findById(tableUuid).map(this::toTableStatsDto); } @Override @@ -140,7 +151,7 @@ public List listTableStats( Optional databaseName, Optional tableName, Optional tableUuid) { return statsRepository .find(databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null)).stream() - .map(mapper::toDto) + .map(this::toTableStatsDto) .collect(Collectors.toList()); } @@ -149,7 +160,7 @@ public List getStatsHistory( String tableUuid, Optional since, int limit) { return statsHistoryRepository.find(tableUuid, since.orElse(null), PageRequest.of(0, limit)) .stream() - .map(mapper::toDto) + .map(this::toTableStatsHistoryDto) .collect(Collectors.toList()); } @@ -164,20 +175,55 @@ public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto) { .tableUuid(dto.getTableUuid()) .databaseName(dto.getDatabaseName()) .tableName(dto.getTableName()) - .operationType(dto.getOperationType() != null ? dto.getOperationType().name() : null) + .operationType( + dbMapper.toDbOperationType(apiMapper.toModelOperationType(dto.getOperationType()))) .completedAt(dto.getCompletedAt() != null ? dto.getCompletedAt() : Instant.now()) - .status(dto.getStatus() != null ? dto.getStatus().name() : null) - .jobId(dto.getJobId()) - .result(mapper.fromJobResult(dto.getResult())) + .status(dbMapper.toDbHistoryStatus(apiMapper.toModelHistoryStatus(dto.getStatus()))) .build(); - return mapper.toDto(historyRepository.save(row)); + return apiMapper.toDto(dbMapper.toHistory(historyRepository.save(row))); } @Override public List getHistory(String tableUuid, int limit) { return historyRepository .findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, limit)).stream() - .map(mapper::toDto) + .map(dbMapper::toHistory) + .map(apiMapper::toDto) .collect(Collectors.toList()); } + + // --- private helpers --- + + /** + * Assemble a wire {@link TableStatsDto} from a {@link TableStatsRow}. The current-state row holds + * only the snapshot — deltas live exclusively on history rows. + */ + private TableStatsDto toTableStatsDto(TableStatsRow row) { + com.linkedin.openhouse.optimizer.model.TableStats modelStats = + dbMapper.joinStats(row.getSnapshot(), null); + TableStats apiStats = apiMapper.toApiStats(modelStats); + return TableStatsDto.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .stats(apiStats) + .tableProperties(row.getTableProperties()) + .updatedAt(row.getUpdatedAt()) + .build(); + } + + /** Assemble a wire {@link TableStatsHistoryDto} from a {@link TableStatsHistoryRow}. */ + private TableStatsHistoryDto toTableStatsHistoryDto(TableStatsHistoryRow row) { + com.linkedin.openhouse.optimizer.model.TableStats modelStats = + dbMapper.joinStats(row.getSnapshot(), row.getDelta()); + TableStats apiStats = apiMapper.toApiStats(modelStats); + return TableStatsHistoryDto.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .stats(apiStats) + .recordedAt(row.getRecordedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties new file mode 100644 index 000000000..c6c3f8437 --- /dev/null +++ b/services/optimizer/src/main/resources/application.properties @@ -0,0 +1,20 @@ +spring.application.name=openhouse-optimizer-service +server.port=8080 + +spring.jpa.hibernate.ddl-auto=none +spring.sql.init.mode=always +spring.jpa.defer-datasource-initialization=true +spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql + +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQL8Dialect +spring.jpa.properties.hibernate.show_sql=false +spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl + +spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver +spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:mysql://localhost:3306/oh_db} +spring.datasource.username=${OPTIMIZER_DB_USERNAME:oh_user} +spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} +spring.datasource.hikari.maximum-pool-size=20 + +management.endpoints.web.exposure.include=health,prometheus +management.endpoint.health.enabled=true diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 17ab55278..29374cbfc 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -4,15 +4,14 @@ import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.JobResult; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -42,16 +41,16 @@ class OptimizerDataServiceImplTest { @Test void completeOperation_writesHistoryFromOperationRow() { - String id = UUID.randomUUID().toString(); + String operationId = UUID.randomUUID().toString(); String tableUuid = UUID.randomUUID().toString(); operationsRepository.save( TableOperationsRow.builder() - .id(id) + .id(operationId) .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.SCHEDULED.name()) + .operationType(com.linkedin.openhouse.optimizer.db.OperationType.ORPHAN_FILES_DELETION) + .status(com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED) .createdAt(Instant.now()) .scheduledAt(Instant.now()) .jobId("spark-job-123") @@ -59,12 +58,14 @@ void completeOperation_writesHistoryFromOperationRow() { Optional result = service.completeOperation( - id, CompleteOperationRequest.builder().status(HistoryStatus.SUCCESS).build()); + CompleteOperationRequest.builder() + .operationId(operationId) + .status(HistoryStatus.SUCCESS) + .build()); assertThat(result).isPresent(); assertThat(result.get().getStatus()).isEqualTo(HistoryStatus.SUCCESS); assertThat(result.get().getTableUuid()).isEqualTo(tableUuid); - assertThat(result.get().getJobId()).isEqualTo("spark-job-123"); assertThat(result.get().getOperationType()).isEqualTo(OperationType.ORPHAN_FILES_DELETION); assertThat(result.get().getDatabaseName()).isEqualTo("db1"); assertThat(result.get().getCompletedAt()).isNotNull(); @@ -74,11 +75,9 @@ void completeOperation_writesHistoryFromOperationRow() { void completeOperation_notFound_returnsEmpty() { Optional result = service.completeOperation( - UUID.randomUUID().toString(), CompleteOperationRequest.builder() + .operationId(UUID.randomUUID().toString()) .status(HistoryStatus.FAILED) - .result( - JobResult.builder().errorMessage("boom").errorType("RuntimeException").build()) .build()); assertThat(result).isEmpty(); @@ -141,16 +140,54 @@ void upsertTableStats_updatesExistingRow_andAppendsHistory() { .stats(secondStats) .build()); - // Current row reflects the latest upsert + // Current row reflects the latest upsert's snapshot. assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(200L); assertThat(statsRepository.findAll()).hasSize(1); - // History has one row per upsert with the raw delta from each call + // History has one row per upsert with the raw delta from each call. List history = statsHistoryRepository.find(tableUuid, null, PageRequest.of(0, 100)); assertThat(history).hasSize(2); - // Newest first - assertThat(history.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); - assertThat(history.get(1).getStats().getDelta().getNumFilesAdded()).isEqualTo(5L); + // Newest first. + assertThat(history.get(0).getDelta().getNumFilesAdded()).isEqualTo(3L); + assertThat(history.get(1).getDelta().getNumFilesAdded()).isEqualTo(5L); + } + + // --- list filters touch the operations enum mapping path --- + + @Test + void listTableOperations_filtersByOperationTypeAndStatus() { + String pendingId = UUID.randomUUID().toString(); + String scheduledId = UUID.randomUUID().toString(); + operationsRepository.save( + TableOperationsRow.builder() + .id(pendingId) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(com.linkedin.openhouse.optimizer.db.OperationType.ORPHAN_FILES_DELETION) + .status(com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + operationsRepository.save( + TableOperationsRow.builder() + .id(scheduledId) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl2") + .operationType(com.linkedin.openhouse.optimizer.db.OperationType.ORPHAN_FILES_DELETION) + .status(com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .build()); + + assertThat( + service.listTableOperations( + Optional.of(OperationType.ORPHAN_FILES_DELETION), + Optional.of(OperationStatus.PENDING), + Optional.empty(), + Optional.empty(), + Optional.empty())) + .extracting(dto -> dto.getId()) + .containsExactly(pendingId); } } From 861b584c3cd41ff03db336c85cb0cde4bc063fe4 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 14:55:57 -0700 Subject: [PATCH 43/81] feat(optimizer): extend model layer for service-only types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare model/ for a service-layer rewrite that returns only model/ types (no api/ DTO leakage into the service interface). - model/Table: add `Instant updatedAt`. The service stamps it on every upsert; controllers read it when assembling the wire DTO. - model/TableStatsHistory: new internal-model counterpart to db.TableStatsHistoryRow. Fields mirror the row in internal types (id, tableUuid, databaseName, tableName, stats, recordedAt). - ApiModelMapper: add the missing api↔model conversions that controllers will own once the service drops api/ knowledge — Table ↔ TableStatsDto, TableStatsHistory ↔ TableStatsHistoryDto, and toTable(tableUuid, UpsertTableStatsRequest). --- .../openhouse/optimizer/model/Table.java | 4 ++ .../optimizer/model/TableStatsHistory.java | 33 +++++++++++ .../model/mapper/ApiModelMapper.java | 58 +++++++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index c8bede225..dc0a16a0c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import java.time.Instant; import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; @@ -28,4 +29,7 @@ public class Table { @Builder.Default private Map tableProperties = Collections.emptyMap(); private TableStats stats; + + /** When the current snapshot was last written. Stamped server-side on every upsert. */ + private Instant updatedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java new file mode 100644 index 000000000..5cdad1918 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java @@ -0,0 +1,33 @@ +package com.linkedin.openhouse.optimizer.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Internal-model view of an append-only per-commit stats history record. + * + *

One per Iceberg commit. {@link #stats} carries both the snapshot at commit time and the commit + * delta — consumers can reconstruct change rates over arbitrary time windows. + * + *

Pure internal-model type — no references to wire-API or DB types. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsHistory { + + private String id; + private String tableUuid; + private String databaseName; + private String tableName; + + /** Snapshot + delta for this commit event. */ + private TableStats stats; + + /** When this history row was recorded. */ + private Instant recordedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java index 35af7fb25..d77b3a253 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -2,12 +2,18 @@ import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.model.HistoryStatus; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStatsHistory; +import java.util.Collections; import org.springframework.stereotype.Component; /** @@ -89,6 +95,58 @@ public TableOperationsHistoryDto toDto(TableOperationsHistory history) { .build(); } + // --- Table <-> TableStatsDto / UpsertTableStatsRequest --- + + /** + * Build an internal-model {@link Table} from a wire upsert request. {@link Table#getUpdatedAt()} + * is intentionally left null — the service stamps it server-side at write time. + */ + public Table toTable(String tableUuid, UpsertTableStatsRequest request) { + if (request == null) { + return null; + } + return Table.builder() + .tableUuid(tableUuid) + .databaseName(request.getDatabaseName()) + .tableId(request.getTableName()) + .tableProperties( + request.getTableProperties() != null + ? request.getTableProperties() + : Collections.emptyMap()) + .stats(toModelStats(request.getStats())) + .build(); + } + + public TableStatsDto toDto(Table table) { + if (table == null) { + return null; + } + return TableStatsDto.builder() + .tableUuid(table.getTableUuid()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .stats(toApiStats(table.getStats())) + .tableProperties(table.getTableProperties()) + .updatedAt(table.getUpdatedAt()) + .build(); + } + + // --- TableStatsHistory <-> TableStatsHistoryDto --- + + public TableStatsHistoryDto toDto(TableStatsHistory history) { + if (history == null) { + return null; + } + return TableStatsHistoryDto.builder() + .id(history.getId()) + .tableUuid(history.getTableUuid()) + .databaseName(history.getDatabaseName()) + .tableName(history.getTableName()) + .stats(toApiStats(history.getStats())) + .recordedAt(history.getRecordedAt()) + .build(); + } + // --- TableStats payload --- public TableStats toModelStats(com.linkedin.openhouse.optimizer.api.model.TableStats apiStats) { From b60a3bfc1d51a6f60ac42baba3669bf90e71683f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:00:13 -0700 Subject: [PATCH 44/81] feat(optimizer): extend ModelDbMapper for service-only types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round out the model↔db boundary for the upcoming service-layer rewrite that returns only internal-model types: - toTable: stamp model.Table.updatedAt from the row's updated_at column so the model carries the freshness needed by callers without leaking the row. - toStatsHistory: new — db.TableStatsHistoryRow → model.TableStatsHistory. Joins the row's snapshot + delta columns into the model's single TableStats payload. --- .../optimizer/model/mapper/ModelDbMapper.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java index 755b38400..7a454c78c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java @@ -13,6 +13,7 @@ import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import java.util.Collections; import org.springframework.stereotype.Component; @@ -111,6 +112,23 @@ public Table toTable(TableStatsRow row) { row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) // table_stats holds only the snapshot — deltas live on the history table. .stats(joinStats(row.getSnapshot(), null)) + .updatedAt(row.getUpdatedAt()) + .build(); + } + + // --- TableStatsHistoryRow -> TableStatsHistory --- + + public TableStatsHistory toStatsHistory(TableStatsHistoryRow row) { + if (row == null) { + return null; + } + return TableStatsHistory.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .stats(joinStats(row.getSnapshot(), row.getDelta())) + .recordedAt(row.getRecordedAt()) .build(); } From b80b2e503f97d318675f2b39e387e1245e28db1e Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:03:13 -0700 Subject: [PATCH 45/81] refactor(optimizer): service layer returns only model/ types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Push the api/model boundary out of the service entirely. After this commit, calling into OptimizerDataService never returns or accepts a wire DTO; controllers (or any future CLI / in-process consumer) own the marshalling at their own edge. Service interface: - All return types and parameters are model/ types or primitives. - completeOperation(String operationId, model.HistoryStatus status). - upsertTableStats(model.Table table) — caller supplies a Table; the service stamps Table.updatedAt and returns the updated Table. - listTableOperations / getStatsHistory / etc. return Lists of model types. Service impl: - Drop ApiModelMapper injection. Only depends on ModelDbMapper. - All conversions are db row → ModelDbMapper → model. The new toStatsHistory mapper method (landed on optimizer-1) handles the history-row case. The updated toTable now stamps Table.updatedAt from the row. Controllers (api/controller/*): - TableOperationsController, TableOperationsHistoryController, TableStatsController now inject ApiModelMapper and do api↔model conversion at the boundary. Each controller method takes api request types, converts to model, calls the service, converts the returned model back to api DTOs. - TableOperationsController.complete continues to take the operationId from the request body. Test: - OptimizerDataServiceImplTest now exercises the service in model types: builders create model.Table, assertions read model.HistoryStatus / model.OperationType / model.TableStats etc. Verification: `git grep "import com.linkedin.openhouse.optimizer.api" -- services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/` returns empty. --- .../controller/TableOperationsController.java | 28 ++-- .../TableOperationsHistoryController.java | 12 +- .../api/controller/TableStatsController.java | 28 +++- .../service/OptimizerDataService.java | 51 +++--- .../service/OptimizerDataServiceImpl.java | 152 +++++++----------- .../service/OptimizerDataServiceImplTest.java | 127 +++++++-------- 6 files changed, 195 insertions(+), 203 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index e48043a35..2c2483c1b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -5,9 +5,11 @@ import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -26,6 +28,7 @@ public class TableOperationsController { private final OptimizerDataService service; + private final ApiModelMapper apiMapper; /** * Report that an operation has completed. The body carries the {@code operationId} the caller is @@ -37,8 +40,9 @@ public class TableOperationsController { public ResponseEntity completeOperation( @RequestBody CompleteOperationRequest request) { return service - .completeOperation(request) - .map(dto -> ResponseEntity.status(HttpStatus.CREATED).body(dto)) + .completeOperation( + request.getOperationId(), apiMapper.toModelHistoryStatus(request.getStatus())) + .map(history -> ResponseEntity.status(HttpStatus.CREATED).body(apiMapper.toDto(history))) .orElse(ResponseEntity.notFound().build()); } @@ -47,6 +51,7 @@ public ResponseEntity completeOperation( public ResponseEntity getTableOperation(@PathVariable String id) { return service .getTableOperation(id) + .map(apiMapper::toDto) .map(ResponseEntity::ok) .orElse(ResponseEntity.notFound().build()); } @@ -62,12 +67,17 @@ public ResponseEntity> listTableOperations( @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { - return ResponseEntity.ok( - service.listTableOperations( - Optional.ofNullable(operationType), - Optional.ofNullable(status), - Optional.ofNullable(databaseName), - Optional.ofNullable(tableName), - Optional.ofNullable(tableUuid))); + List result = + service + .listTableOperations( + Optional.ofNullable(operationType).map(apiMapper::toModelOperationType), + Optional.ofNullable(status).map(apiMapper::toModelOperationStatus), + Optional.ofNullable(databaseName), + Optional.ofNullable(tableName), + Optional.ofNullable(tableUuid)) + .stream() + .map(apiMapper::toDto) + .collect(Collectors.toList()); + return ResponseEntity.ok(result); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 17dc0670a..df7cabeff 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -1,8 +1,10 @@ package com.linkedin.openhouse.optimizer.api.controller; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; +import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -21,18 +23,24 @@ public class TableOperationsHistoryController { private final OptimizerDataService service; + private final ApiModelMapper apiMapper; /** Append a completed-job result. Called by the SparkJob after each run (success or failure). */ @PostMapping public ResponseEntity appendHistory( @RequestBody TableOperationsHistoryDto dto) { - return ResponseEntity.status(HttpStatus.CREATED).body(service.appendHistory(dto)); + return ResponseEntity.status(HttpStatus.CREATED) + .body(apiMapper.toDto(service.appendHistory(apiMapper.toHistory(dto)))); } /** Return the most recent history for a table, newest first, up to {@code limit} rows. */ @GetMapping("/{tableUuid}") public ResponseEntity> getHistory( @PathVariable String tableUuid, @RequestParam(defaultValue = "100") int limit) { - return ResponseEntity.ok(service.getHistory(tableUuid, limit)); + List result = + service.getHistory(tableUuid, limit).stream() + .map(apiMapper::toDto) + .collect(Collectors.toList()); + return ResponseEntity.ok(result); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index ef57598e8..2b738a6c3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -3,10 +3,12 @@ import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; +import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.time.Instant; import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.GetMapping; @@ -24,6 +26,7 @@ public class TableStatsController { private final OptimizerDataService service; + private final ApiModelMapper apiMapper; /** * Create or overwrite the stats row for {@code tableUuid}. Called by the Tables Service on every @@ -32,7 +35,8 @@ public class TableStatsController { @PutMapping("/{tableUuid}") public ResponseEntity upsertTableStats( @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { - return ResponseEntity.ok(service.upsertTableStats(tableUuid, request)); + return ResponseEntity.ok( + apiMapper.toDto(service.upsertTableStats(apiMapper.toTable(tableUuid, request)))); } /** Fetch the stats row for {@code tableUuid}. Returns 404 if no stats have been written yet. */ @@ -40,6 +44,7 @@ public ResponseEntity upsertTableStats( public ResponseEntity getTableStats(@PathVariable String tableUuid) { return service .getTableStats(tableUuid) + .map(apiMapper::toDto) .map(ResponseEntity::ok) .orElse(ResponseEntity.notFound().build()); } @@ -53,11 +58,16 @@ public ResponseEntity> listTableStats( @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { - return ResponseEntity.ok( - service.listTableStats( - Optional.ofNullable(databaseName), - Optional.ofNullable(tableName), - Optional.ofNullable(tableUuid))); + List result = + service + .listTableStats( + Optional.ofNullable(databaseName), + Optional.ofNullable(tableName), + Optional.ofNullable(tableUuid)) + .stream() + .map(apiMapper::toDto) + .collect(Collectors.toList()); + return ResponseEntity.ok(result); } /** @@ -69,6 +79,10 @@ public ResponseEntity> getStatsHistory( @PathVariable String tableUuid, @RequestParam(required = false) Instant since, @RequestParam(defaultValue = "100") int limit) { - return ResponseEntity.ok(service.getStatsHistory(tableUuid, Optional.ofNullable(since), limit)); + List result = + service.getStatsHistory(tableUuid, Optional.ofNullable(since), limit).stream() + .map(apiMapper::toDto) + .collect(Collectors.toList()); + return ResponseEntity.ok(result); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index c3988f668..e8a4da86e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -1,18 +1,23 @@ package com.linkedin.openhouse.optimizer.service; -import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import java.time.Instant; import java.util.List; import java.util.Optional; -/** Service interface for optimizer data operations. */ +/** + * Service interface for optimizer data operations. + * + *

The service is the boundary between the wire-API surface and the database. Inputs and outputs + * are internal-model types only — callers (controllers, future CLI, in-process consumers) + * convert at their own edge. No api/-package types appear here. + */ public interface OptimizerDataService { // --- TableOperations --- @@ -21,7 +26,7 @@ public interface OptimizerDataService { * List operations matching the given filters. Every parameter is optional — pass {@link * Optional#empty()} to skip that filter. No filters returns all rows. */ - List listTableOperations( + List listTableOperations( Optional operationType, Optional status, Optional databaseName, @@ -30,33 +35,35 @@ List listTableOperations( /** * Complete an operation by writing a history entry. Looks up the operation row by {@code - * request.operationId}, copies its table metadata into a new history row, and saves it. Returns - * the history DTO, or empty if the operation does not exist. + * operationId}, copies its table metadata into a new history row with the supplied terminal + * {@code status}, and saves it. Returns the history record, or empty if the operation does not + * exist. */ - Optional completeOperation(CompleteOperationRequest request); + Optional completeOperation(String operationId, HistoryStatus status); /** * Return the operation row for {@code id} regardless of status, or empty if it does not exist. * Used to poll a specific operation (e.g. waiting for SUCCESS after a Spark job completes). */ - Optional getTableOperation(String id); + Optional getTableOperation(String id); // --- TableStats --- /** - * Create or update the stats row for {@code tableUuid}. Fully idempotent: the same call - * overwrites the previous snapshot with the latest commit values. + * Create or update the stats row for {@code table.getTableUuid()}. Fully idempotent: the same + * call overwrites the previous snapshot with the latest commit values. The service stamps {@link + * Table#getUpdatedAt()} server-side and returns the resulting {@link Table}. */ - TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest request); + Table upsertTableStats(Table table); /** Return the stats row for {@code tableUuid}, or empty if none exists. */ - Optional getTableStats(String tableUuid); + Optional getTableStats(String tableUuid); /** * List stats rows matching the given filters. Every parameter is optional — pass {@link * Optional#empty()} to skip that filter. No filters returns all rows. */ - List listTableStats( + List
listTableStats( Optional databaseName, Optional tableName, Optional tableUuid); /** @@ -66,12 +73,12 @@ List listTableStats( * @param since if present, only return rows recorded at or after this instant * @param limit maximum number of rows to return */ - List getStatsHistory(String tableUuid, Optional since, int limit); + List getStatsHistory(String tableUuid, Optional since, int limit); // --- TableOperationsHistory --- /** Append a completed-job result record. */ - TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto); + TableOperationsHistory appendHistory(TableOperationsHistory history); /** * Return the most recent history rows for a table UUID, newest first. @@ -79,5 +86,5 @@ List listTableStats( * @param tableUuid the stable table UUID * @param limit maximum number of rows to return */ - List getHistory(String tableUuid, int limit); + List getHistory(String tableUuid, int limit); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 21802a84f..47143118c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -1,18 +1,15 @@ package com.linkedin.openhouse.optimizer.service; -import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStats; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import com.linkedin.openhouse.optimizer.db.TableStatsRow; -import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; @@ -28,7 +25,12 @@ import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; -/** Implementation of {@link OptimizerDataService}. */ +/** + * Implementation of {@link OptimizerDataService}. + * + *

Operates purely on model/ and db/ types. The model↔db boundary is the {@link ModelDbMapper}. + * No api/-package types appear in this class. + */ @Service @RequiredArgsConstructor public class OptimizerDataServiceImpl implements OptimizerDataService { @@ -37,13 +39,12 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { private final TableOperationsHistoryRepository historyRepository; private final TableStatsRepository statsRepository; private final TableStatsHistoryRepository statsHistoryRepository; - private final ApiModelMapper apiMapper; private final ModelDbMapper dbMapper; // --- TableOperations --- @Override - public List listTableOperations( + public List listTableOperations( Optional operationType, Optional status, Optional databaseName, @@ -51,26 +52,22 @@ public List listTableOperations( Optional tableUuid) { return operationsRepository .find( - operationType - .map(t -> dbMapper.toDbOperationType(apiMapper.toModelOperationType(t))) - .orElse(null), - status - .map(s -> dbMapper.toDbOperationStatus(apiMapper.toModelOperationStatus(s))) - .orElse(null), + operationType.map(dbMapper::toDbOperationType).orElse(null), + status.map(dbMapper::toDbOperationStatus).orElse(null), tableUuid.orElse(null), databaseName.orElse(null), tableName.orElse(null)) .stream() .map(dbMapper::toOperation) - .map(apiMapper::toDto) .collect(Collectors.toList()); } @Override @Transactional - public Optional completeOperation(CompleteOperationRequest request) { + public Optional completeOperation( + String operationId, HistoryStatus status) { return operationsRepository - .findById(request.getOperationId()) + .findById(operationId) .map( row -> { TableOperationsHistoryRow historyRow = @@ -81,27 +78,24 @@ public Optional completeOperation(CompleteOperationRe .tableName(row.getTableName()) .operationType(row.getOperationType()) .completedAt(Instant.now()) - .status( - dbMapper.toDbHistoryStatus( - apiMapper.toModelHistoryStatus(request.getStatus()))) + .status(dbMapper.toDbHistoryStatus(status)) .build(); - return apiMapper.toDto(dbMapper.toHistory(historyRepository.save(historyRow))); + return dbMapper.toHistory(historyRepository.save(historyRow)); }); } @Override - public Optional getTableOperation(String id) { - return operationsRepository.findById(id).map(dbMapper::toOperation).map(apiMapper::toDto); + public Optional getTableOperation(String id) { + return operationsRepository.findById(id).map(dbMapper::toOperation); } // --- TableStats --- @Override @Transactional - public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest request) { + public Table upsertTableStats(Table table) { Instant now = Instant.now(); - com.linkedin.openhouse.optimizer.model.TableStats modelStats = - apiMapper.toModelStats(request.getStats()); + String tableUuid = table.getTableUuid(); TableStatsRow row = statsRepository @@ -110,19 +104,19 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest existing -> existing .toBuilder() - .databaseName(request.getDatabaseName()) - .tableName(request.getTableName()) - .snapshot(dbMapper.toDbSnapshot(modelStats)) - .tableProperties(request.getTableProperties()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .snapshot(dbMapper.toDbSnapshot(table.getStats())) + .tableProperties(table.getTableProperties()) .updatedAt(now) .build()) .orElse( TableStatsRow.builder() .tableUuid(tableUuid) - .databaseName(request.getDatabaseName()) - .tableName(request.getTableName()) - .snapshot(dbMapper.toDbSnapshot(modelStats)) - .tableProperties(request.getTableProperties()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .snapshot(dbMapper.toDbSnapshot(table.getStats())) + .tableProperties(table.getTableProperties()) .updatedAt(now) .build()); TableStatsRow saved = statsRepository.save(row); @@ -131,36 +125,36 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest TableStatsHistoryRow.builder() .id(UUID.randomUUID().toString()) .tableUuid(tableUuid) - .databaseName(request.getDatabaseName()) - .tableName(request.getTableName()) - .snapshot(dbMapper.toDbSnapshot(modelStats)) - .delta(dbMapper.toDbDelta(modelStats)) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .snapshot(dbMapper.toDbSnapshot(table.getStats())) + .delta(dbMapper.toDbDelta(table.getStats())) .recordedAt(now) .build()); - return toTableStatsDto(saved); + return dbMapper.toTable(saved); } @Override - public Optional getTableStats(String tableUuid) { - return statsRepository.findById(tableUuid).map(this::toTableStatsDto); + public Optional

getTableStats(String tableUuid) { + return statsRepository.findById(tableUuid).map(dbMapper::toTable); } @Override - public List listTableStats( + public List
listTableStats( Optional databaseName, Optional tableName, Optional tableUuid) { return statsRepository .find(databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null)).stream() - .map(this::toTableStatsDto) + .map(dbMapper::toTable) .collect(Collectors.toList()); } @Override - public List getStatsHistory( + public List getStatsHistory( String tableUuid, Optional since, int limit) { return statsHistoryRepository.find(tableUuid, since.orElse(null), PageRequest.of(0, limit)) .stream() - .map(this::toTableStatsHistoryDto) + .map(dbMapper::toStatsHistory) .collect(Collectors.toList()); } @@ -168,62 +162,26 @@ public List getStatsHistory( @Override @Transactional - public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto) { + public TableOperationsHistory appendHistory(TableOperationsHistory history) { TableOperationsHistoryRow row = TableOperationsHistoryRow.builder() - .id(dto.getId()) - .tableUuid(dto.getTableUuid()) - .databaseName(dto.getDatabaseName()) - .tableName(dto.getTableName()) - .operationType( - dbMapper.toDbOperationType(apiMapper.toModelOperationType(dto.getOperationType()))) - .completedAt(dto.getCompletedAt() != null ? dto.getCompletedAt() : Instant.now()) - .status(dbMapper.toDbHistoryStatus(apiMapper.toModelHistoryStatus(dto.getStatus()))) + .id(history.getId()) + .tableUuid(history.getTableUuid()) + .databaseName(history.getDatabaseName()) + .tableName(history.getTableName()) + .operationType(dbMapper.toDbOperationType(history.getOperationType())) + .completedAt( + history.getCompletedAt() != null ? history.getCompletedAt() : Instant.now()) + .status(dbMapper.toDbHistoryStatus(history.getStatus())) .build(); - return apiMapper.toDto(dbMapper.toHistory(historyRepository.save(row))); + return dbMapper.toHistory(historyRepository.save(row)); } @Override - public List getHistory(String tableUuid, int limit) { + public List getHistory(String tableUuid, int limit) { return historyRepository .findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, limit)).stream() .map(dbMapper::toHistory) - .map(apiMapper::toDto) .collect(Collectors.toList()); } - - // --- private helpers --- - - /** - * Assemble a wire {@link TableStatsDto} from a {@link TableStatsRow}. The current-state row holds - * only the snapshot — deltas live exclusively on history rows. - */ - private TableStatsDto toTableStatsDto(TableStatsRow row) { - com.linkedin.openhouse.optimizer.model.TableStats modelStats = - dbMapper.joinStats(row.getSnapshot(), null); - TableStats apiStats = apiMapper.toApiStats(modelStats); - return TableStatsDto.builder() - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .stats(apiStats) - .tableProperties(row.getTableProperties()) - .updatedAt(row.getUpdatedAt()) - .build(); - } - - /** Assemble a wire {@link TableStatsHistoryDto} from a {@link TableStatsHistoryRow}. */ - private TableStatsHistoryDto toTableStatsHistoryDto(TableStatsHistoryRow row) { - com.linkedin.openhouse.optimizer.model.TableStats modelStats = - dbMapper.joinStats(row.getSnapshot(), row.getDelta()); - TableStats apiStats = apiMapper.toApiStats(modelStats); - return TableStatsHistoryDto.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .stats(apiStats) - .recordedAt(row.getRecordedAt()) - .build(); - } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 29374cbfc..9d653e21d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -2,16 +2,14 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStats; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -56,12 +54,8 @@ void completeOperation_writesHistoryFromOperationRow() { .jobId("spark-job-123") .build()); - Optional result = - service.completeOperation( - CompleteOperationRequest.builder() - .operationId(operationId) - .status(HistoryStatus.SUCCESS) - .build()); + Optional result = + service.completeOperation(operationId, HistoryStatus.SUCCESS); assertThat(result).isPresent(); assertThat(result.get().getStatus()).isEqualTo(HistoryStatus.SUCCESS); @@ -73,12 +67,8 @@ void completeOperation_writesHistoryFromOperationRow() { @Test void completeOperation_notFound_returnsEmpty() { - Optional result = - service.completeOperation( - CompleteOperationRequest.builder() - .operationId(UUID.randomUUID().toString()) - .status(HistoryStatus.FAILED) - .build()); + Optional result = + service.completeOperation(UUID.randomUUID().toString(), HistoryStatus.FAILED); assertThat(result).isEmpty(); } @@ -88,67 +78,72 @@ void completeOperation_notFound_returnsEmpty() { @Test void upsertTableStats_createsNewRow() { String tableUuid = UUID.randomUUID().toString(); - TableStats stats = - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(1024L).build()) + Table input = + Table.builder() + .tableUuid(tableUuid) + .databaseName("db1") + .tableId("tbl1") + .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(1024L).build()) + .build()) .build(); - TableStatsDto dto = - service.upsertTableStats( - tableUuid, - UpsertTableStatsRequest.builder() - .databaseName("db1") - .tableName("tbl1") - .stats(stats) - .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) - .build()); - - assertThat(dto.getTableUuid()).isEqualTo(tableUuid); - assertThat(dto.getDatabaseName()).isEqualTo("db1"); - assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); - assertThat(dto.getTableProperties()).containsEntry("maintenance.optimizer.ofd.enabled", "true"); + Table result = service.upsertTableStats(input); + + assertThat(result.getTableUuid()).isEqualTo(tableUuid); + assertThat(result.getDatabaseName()).isEqualTo("db1"); + assertThat(result.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(result.getTableProperties()) + .containsEntry("maintenance.optimizer.ofd.enabled", "true"); + assertThat(result.getUpdatedAt()).isNotNull(); assertThat(statsRepository.findById(tableUuid)).isPresent(); } @Test void upsertTableStats_updatesExistingRow_andAppendsHistory() { String tableUuid = UUID.randomUUID().toString(); - TableStats firstStats = - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .delta(TableStats.CommitDelta.builder().numFilesAdded(5L).numFilesDeleted(1L).build()) + Table first = + Table.builder() + .tableUuid(tableUuid) + .databaseName("db1") + .tableId("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .delta( + TableStats.CommitDelta.builder() + .numFilesAdded(5L) + .numFilesDeleted(1L) + .build()) + .build()) .build(); - TableStats secondStats = - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(0L).build()) + Table second = + Table.builder() + .tableUuid(tableUuid) + .databaseName("db1") + .tableId("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .delta( + TableStats.CommitDelta.builder() + .numFilesAdded(3L) + .numFilesDeleted(0L) + .build()) + .build()) .build(); - service.upsertTableStats( - tableUuid, - UpsertTableStatsRequest.builder() - .databaseName("db1") - .tableName("tbl1") - .stats(firstStats) - .build()); - TableStatsDto dto = - service.upsertTableStats( - tableUuid, - UpsertTableStatsRequest.builder() - .databaseName("db1") - .tableName("tbl1") - .stats(secondStats) - .build()); - - // Current row reflects the latest upsert's snapshot. - assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(200L); + service.upsertTableStats(first); + Table result = service.upsertTableStats(second); + + assertThat(result.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(200L); assertThat(statsRepository.findAll()).hasSize(1); - // History has one row per upsert with the raw delta from each call. List history = statsHistoryRepository.find(tableUuid, null, PageRequest.of(0, 100)); assertThat(history).hasSize(2); - // Newest first. assertThat(history.get(0).getDelta().getNumFilesAdded()).isEqualTo(3L); assertThat(history.get(1).getDelta().getNumFilesAdded()).isEqualTo(5L); } @@ -187,7 +182,7 @@ void listTableOperations_filtersByOperationTypeAndStatus() { Optional.empty(), Optional.empty(), Optional.empty())) - .extracting(dto -> dto.getId()) + .extracting(op -> op.getId()) .containsExactly(pendingId); } } From 25d98aaacc7ffd4c506b1f43a896210725b83f9a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:10:16 -0700 Subject: [PATCH 46/81] feat(optimizer): restore batch CAS methods on TableOperationsRepository MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimizer-1 db/ rewrite accidentally dropped the batch CAS helpers used by the scheduler. Restore them with db/-typed enum parameters and JPQL queries that compare against fully-qualified db.OperationStatus constants. - markSchedulingBatch(ids, scheduledAt): PENDING → SCHEDULING. - markScheduledBatch(ids, jobId): SCHEDULING → SCHEDULED. - markPendingBatch(ids): SCHEDULING → PENDING (job-launch failure retry). - cancelDuplicatePendingBatch(operationType, keepIds): drop dupe PENDING rows for an operation type, keeping the supplied IDs. --- .../repository/TableOperationsRepository.java | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index e9bc1c8b3..962a108a2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -3,8 +3,10 @@ import com.linkedin.openhouse.optimizer.db.OperationStatus; import com.linkedin.openhouse.optimizer.db.OperationType; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import java.time.Instant; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Modifying; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; @@ -28,4 +30,59 @@ List find( @Param("tableUuid") String tableUuid, @Param("databaseName") String databaseName, @Param("tableName") String tableName); + + /** + * Batch CAS: PENDING → SCHEDULING for every {@code id} still in PENDING. Returns the number of + * rows transitioned. Rows already claimed by another instance are skipped silently; callers must + * re-query if they need the precise list. + */ + @Modifying + @Query( + "UPDATE TableOperationsRow r " + + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING," + + " r.scheduledAt = :scheduledAt, r.version = r.version + 1 " + + "WHERE r.id IN :ids " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING") + int markSchedulingBatch( + @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); + + /** + * Batch CAS: SCHEDULING → SCHEDULED with the given {@code jobId} for every {@code id} still in + * SCHEDULING. Returns the number of rows transitioned. + */ + @Modifying + @Query( + "UPDATE TableOperationsRow r " + + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED," + + " r.jobId = :jobId, r.version = r.version + 1 " + + "WHERE r.id IN :ids " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") + int markScheduledBatch(@Param("ids") List ids, @Param("jobId") String jobId); + + /** + * Batch transition: SCHEDULING → PENDING for every {@code id} still in SCHEDULING. Used by the + * scheduler to release claimed rows when job submission fails so the next pass can retry. Returns + * the number of rows reverted. + */ + @Modifying + @Query( + "UPDATE TableOperationsRow r " + + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING," + + " r.scheduledAt = NULL, r.version = r.version + 1 " + + "WHERE r.id IN :ids " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") + int markPendingBatch(@Param("ids") List ids); + + /** + * Batch-delete duplicate PENDING rows for the given operation type, keeping only the IDs in + * {@code keepIds}. Used by the scheduler to deduplicate before claiming. + */ + @Modifying + @Query( + "DELETE FROM TableOperationsRow r " + + "WHERE r.operationType = :operationType " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING " + + "AND r.id NOT IN :keepIds") + int cancelDuplicatePendingBatch( + @Param("operationType") OperationType operationType, @Param("keepIds") List keepIds); } From 188713d7479b0d1c0425b9e753e0da2df25915b4 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:20:50 -0700 Subject: [PATCH 47/81] docs(optimizer): comment every field on opt-0 api/ and model/ types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several fields under api/model/ and model/ were left undocumented in the earlier per-layer-types passes. Audit + fill them in: api/model/TableOperationsHistoryDto: databaseName, tableName, operationType — add display/role docs. api/model/HistoryStatus: SUCCESS, FAILED — add enum-value docs. api/model/TableStats inner classes: - SnapshotMetrics: clusterId, tableVersion, tableLocation, tableSizeBytes — add field docs. - CommitDelta: numFilesAdded, numFilesDeleted, addedSizeBytes, deletedSizeBytes — add field docs. model/Table: tableUuid, databaseName, tableId, tableProperties, stats — add field docs. model/TableStats: same field-doc additions on SnapshotMetrics and CommitDelta as the api/ counterpart. model/OperationStatus: PENDING, SCHEDULING, SCHEDULED, CANCELED — add enum-value docs. model/OperationType: ORPHAN_FILES_DELETION — add enum-value doc. model/HistoryStatus: SUCCESS, FAILED — add enum-value docs. model/TableStatsHistory: id, tableUuid, databaseName, tableName — add field docs. --- .../optimizer/api/model/HistoryStatus.java | 4 ++++ .../api/model/TableOperationsHistoryDto.java | 5 +++++ .../optimizer/api/model/TableStats.java | 17 +++++++++++++++++ .../optimizer/model/HistoryStatus.java | 4 ++++ .../optimizer/model/OperationStatus.java | 8 ++++++++ .../optimizer/model/OperationType.java | 2 ++ .../openhouse/optimizer/model/Table.java | 7 +++++++ .../openhouse/optimizer/model/TableStats.java | 17 +++++++++++++++++ .../optimizer/model/TableStatsHistory.java | 7 +++++++ 9 files changed, 71 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index 2fbcf6235..dc52f863e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -2,6 +2,10 @@ /** Terminal states for a completed Spark maintenance job. */ public enum HistoryStatus { + + /** The Spark job for this operation completed successfully. */ SUCCESS, + + /** The Spark job for this operation failed. */ FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index d9fa1f387..4e247c7ce 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -19,8 +19,13 @@ public class TableOperationsHistoryDto { /** Stable table identity from the Tables Service. */ private String tableUuid; + /** Denormalized database name for display. */ private String databaseName; + + /** Denormalized table name for display. */ private String tableName; + + /** The type of maintenance operation this history row records. */ private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index de268ffe7..dcb360330 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -32,10 +32,19 @@ public class TableStats { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { + + /** Cluster the table lives on. */ private String clusterId; + + /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; + + /** Filesystem path (or URI) of the table's storage root. */ private String tableLocation; + + /** Total on-disk size of the table at this snapshot, in bytes. */ private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ private Long numCurrentFiles; } @@ -47,9 +56,17 @@ public static class SnapshotMetrics { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class CommitDelta { + + /** Number of data files this commit added to the table. */ private Long numFilesAdded; + + /** Number of data files this commit removed from the table. */ private Long numFilesDeleted; + + /** Total bytes added by this commit. */ private Long addedSizeBytes; + + /** Total bytes removed by this commit. */ private Long deletedSizeBytes; } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java index d29c88719..97b8e2992 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java @@ -8,6 +8,10 @@ *

Intentionally separate from the wire-API and DB representations. */ public enum HistoryStatus { + + /** The operation completed successfully. */ SUCCESS, + + /** The operation failed. */ FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java index 66f213c73..f284fedaf 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java @@ -8,8 +8,16 @@ *

Intentionally separate from the wire-API and DB representations. */ public enum OperationStatus { + + /** Analyzer has written the row; not yet claimed by the scheduler. */ PENDING, + + /** Scheduler has claimed the row and is launching a job; jobId not yet recorded. */ SCHEDULING, + + /** Job has been submitted to the Jobs Service; the row carries a {@code jobId}. */ SCHEDULED, + + /** Scheduler marked this row as a duplicate of another PENDING row; not claimable. */ CANCELED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java index bea44018b..8f4fe35a8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -6,5 +6,7 @@ * supported operations without churning either boundary. */ public enum OperationType { + + /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index dc0a16a0c..bca7e2420 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -22,12 +22,19 @@ @AllArgsConstructor public class Table { + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ private String tableUuid; + + /** Database the table lives in. */ private String databaseName; + + /** Iceberg table identifier (table name, not UUID). */ private String tableId; + /** Current table-property map (e.g. maintenance opt-in flags). Never null. */ @Builder.Default private Map tableProperties = Collections.emptyMap(); + /** Latest snapshot stats for this table. Delta is null when read from the current-state row. */ private TableStats stats; /** When the current snapshot was last written. Stamped server-side on every upsert. */ diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 3b56196ea..94d0a1655 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -27,10 +27,19 @@ public class TableStats { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { + + /** Cluster the table lives on. */ private String clusterId; + + /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; + + /** Filesystem path (or URI) of the table's storage root. */ private String tableLocation; + + /** Total on-disk size of the table at this snapshot, in bytes. */ private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ private Long numCurrentFiles; } @@ -42,9 +51,17 @@ public static class SnapshotMetrics { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class CommitDelta { + + /** Number of data files this commit added to the table. */ private Long numFilesAdded; + + /** Number of data files this commit removed from the table. */ private Long numFilesDeleted; + + /** Total bytes added by this commit. */ private Long addedSizeBytes; + + /** Total bytes removed by this commit. */ private Long deletedSizeBytes; } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java index 5cdad1918..53bb54d1e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java @@ -20,9 +20,16 @@ @AllArgsConstructor public class TableStatsHistory { + /** UUID primary key — set by the caller, not generated server-side. */ private String id; + + /** Stable table identity from the Tables Service. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseName; + + /** Denormalized table name for display. */ private String tableName; /** Snapshot + delta for this commit event. */ From 8d642732244b002f1f7926ae81e98b27f95b1881 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:28:57 -0700 Subject: [PATCH 48/81] refactor(optimizer): remove clusterId from SnapshotMetrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clusterId is per-table-immutable in OpenHouse — it never changes after the table is created — so persisting and transmitting it on every snapshot is dead weight. Remove from the wire and internal representations. - api/model/TableStats.SnapshotMetrics: drop clusterId. - model/TableStats.SnapshotMetrics: drop clusterId. - model/mapper/ApiModelMapper: drop the clusterId hop in toModelSnapshot and toApiSnapshot. --- .../com/linkedin/openhouse/optimizer/api/model/TableStats.java | 3 --- .../com/linkedin/openhouse/optimizer/model/TableStats.java | 3 --- .../openhouse/optimizer/model/mapper/ApiModelMapper.java | 2 -- 3 files changed, 8 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index dcb360330..096eecd1e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -33,9 +33,6 @@ public class TableStats { @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { - /** Cluster the table lives on. */ - private String clusterId; - /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 94d0a1655..56291e510 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -28,9 +28,6 @@ public class TableStats { @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { - /** Cluster the table lives on. */ - private String clusterId; - /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java index d77b3a253..31141ff44 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -215,7 +215,6 @@ private TableStats.SnapshotMetrics toModelSnapshot( return null; } return TableStats.SnapshotMetrics.builder() - .clusterId(apiValue.getClusterId()) .tableVersion(apiValue.getTableVersion()) .tableLocation(apiValue.getTableLocation()) .tableSizeBytes(apiValue.getTableSizeBytes()) @@ -229,7 +228,6 @@ private com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics to return null; } return com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics.builder() - .clusterId(modelValue.getClusterId()) .tableVersion(modelValue.getTableVersion()) .tableLocation(modelValue.getTableLocation()) .tableSizeBytes(modelValue.getTableSizeBytes()) From c1ad24615aaae6dd5c5012f2fdd591f8c0c01712 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:34:17 -0700 Subject: [PATCH 49/81] refactor(optimizer): comment every db/ field; drop clusterId and version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two cleanups on the DB layer, plus a doc audit. clusterId removal: - db/SnapshotMetrics: drop clusterId. - model/mapper/ModelDbMapper: drop clusterId from toModelSnapshot and toDbSnapshot. - Repository tests: drop .clusterId("cl1") from builders. (The api/ and model/ copies were retired in the prior optimizer-0 commit; this completes the removal at the db edge.) version removal: - db/TableOperationsRow: drop the `version` field. The batch CAS pattern's atomicity comes from filtering on `status` (PENDING → SCHEDULING is unambiguous on status alone); the version bump was decorative. - table_operations schema: drop the `version BIGINT` column. - TableOperationsRepository: remove `r.version = r.version + 1` from markSchedulingBatch / markScheduledBatch / markPendingBatch query strings. - model/mapper/ModelDbMapper.toRow: stop initializing version on the row builder. Doc audit on db/: - db/SnapshotMetrics, db/CommitDeltaMetrics: doc every field. - db/HistoryStatus, db/OperationStatus, db/OperationType: doc every enum value. - db/TableOperationsRow, db/TableOperationsHistoryRow, db/TableStatsRow, db/TableStatsHistoryRow: doc every field. --- .../optimizer/db/CommitDeltaMetrics.java | 7 +++++++ .../openhouse/optimizer/db/HistoryStatus.java | 4 ++++ .../optimizer/db/OperationStatus.java | 8 ++++++++ .../openhouse/optimizer/db/OperationType.java | 2 ++ .../optimizer/db/SnapshotMetrics.java | 6 +++++- .../db/TableOperationsHistoryRow.java | 6 ++++++ .../optimizer/db/TableOperationsRow.java | 18 ++++++++---------- .../optimizer/db/TableStatsHistoryRow.java | 7 +++++++ .../openhouse/optimizer/db/TableStatsRow.java | 5 +++++ .../optimizer/model/mapper/ModelDbMapper.java | 3 --- .../repository/TableOperationsRepository.java | 6 +++--- .../src/main/resources/db/optimizer-schema.sql | 1 - .../TableStatsHistoryRepositoryTest.java | 2 +- .../repository/TableStatsRepositoryTest.java | 3 +-- 14 files changed, 57 insertions(+), 21 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java index 8094d28b8..5a30c9afd 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java @@ -14,8 +14,15 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class CommitDeltaMetrics { + /** Number of data files this commit added to the table. */ private Long numFilesAdded; + + /** Number of data files this commit removed from the table. */ private Long numFilesDeleted; + + /** Total bytes added by this commit. */ private Long addedSizeBytes; + + /** Total bytes removed by this commit. */ private Long deletedSizeBytes; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java index 94e573968..3680735f4 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java @@ -6,6 +6,10 @@ *

Self-contained: no references to api/ or model/ types. */ public enum HistoryStatus { + + /** The Spark job for this operation completed successfully. */ SUCCESS, + + /** The Spark job for this operation failed. */ FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java index 4e9161693..0a2e07483 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java @@ -6,8 +6,16 @@ *

Self-contained: no references to api/ or model/ types. */ public enum OperationStatus { + + /** Analyzer has written the row; not yet claimed by the scheduler. */ PENDING, + + /** Scheduler has claimed the row and is launching a job; jobId not yet recorded. */ SCHEDULING, + + /** Job has been submitted to the Jobs Service; the row carries a {@code jobId}. */ SCHEDULED, + + /** Scheduler marked this row as a duplicate of another PENDING row; not claimable. */ CANCELED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java index 3a896e415..e4caf549b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java @@ -8,5 +8,7 @@ * {@code @Enumerated(EnumType.STRING)}. */ public enum OperationType { + + /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java index 22d222172..452b35097 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java @@ -14,9 +14,13 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class SnapshotMetrics { - private String clusterId; + /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; + + /** Filesystem path (or URI) of the table's storage root. */ private String tableLocation; + + /** Total on-disk size of the table at this snapshot, in bytes. */ private Long tableSizeBytes; /** Total number of data files as of the latest snapshot — used for bin-packing. */ diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java index 2e1230181..5f4a598d9 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java @@ -47,22 +47,28 @@ public class TableOperationsHistoryRow { @Column(name = "id", nullable = false, length = 36) private String id; + /** Stable table identity from the Tables Service. */ @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; + /** Denormalized database name. */ @Column(name = "database_name", nullable = false, length = 128) private String databaseName; + /** Denormalized table name. */ @Column(name = "table_name", nullable = false, length = 128) private String tableName; + /** The type of maintenance operation this history row records. */ @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) private OperationType operationType; + /** When the operation completed, as recorded by the complete endpoint. */ @Column(name = "completed_at", nullable = false) private Instant completedAt; + /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) private HistoryStatus status; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java index 9652214d3..dfe40d402 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java @@ -44,44 +44,42 @@ @AllArgsConstructor(access = AccessLevel.PROTECTED) public class TableOperationsRow { + /** Client-generated UUID identifying this specific operation recommendation. */ @Id @Column(name = "id", nullable = false, length = 36) private String id; + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; + /** Denormalized database name. */ @Column(name = "database_name", nullable = false, length = 128) private String databaseName; + /** Denormalized table name. */ @Column(name = "table_name", nullable = false, length = 128) private String tableName; + /** The type of maintenance operation this row recommends. */ @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) private OperationType operationType; + /** Lifecycle state — drives the scheduler's CAS claim and the analyzer's eligibility check. */ @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) private OperationStatus status; + /** When the analyzer first created this row. Set on insert; never updated. */ @Column(name = "created_at", nullable = false) private Instant createdAt; + /** When the scheduler last submitted a job for this row. {@code null} while {@code PENDING}. */ @Column(name = "scheduled_at") private Instant scheduledAt; /** Spark job ID written by the scheduler at claim time. Internal-only; never exposed on wire. */ @Column(name = "job_id", length = 255) private String jobId; - - /** - * Monotonically-increasing version for application-level optimistic concurrency control. The - * scheduler's batch CAS transitions match this in the WHERE clause and bump it by one on UPDATE, - * ensuring two scheduler instances can't both move the same row out of PENDING. Not managed by - * JPA optimistic locking — kept as a plain column so the WHERE-clause-based CAS pattern works - * portably across MySQL and H2. - */ - @Column(name = "version") - private Long version; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java index 71c17b582..4eaee2a6f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java @@ -41,27 +41,34 @@ @AllArgsConstructor(access = AccessLevel.PROTECTED) public class TableStatsHistoryRow { + /** UUID primary key — set by the caller, not generated server-side. */ @Id @Column(name = "id", nullable = false, length = 36) private String id; + /** Stable Iceberg table UUID. */ @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; + /** Denormalized database name. */ @Column(name = "database_name", nullable = false, length = 128) private String databaseName; + /** Denormalized table name. */ @Column(name = "table_name", nullable = false, length = 128) private String tableName; + /** Snapshot fields at commit time. Stored as a JSON blob in the {@code snapshot} column. */ @Type(type = "json") @Column(name = "snapshot", columnDefinition = "TEXT") private SnapshotMetrics snapshot; + /** Per-commit delta counters. Stored as a JSON blob in the {@code delta} column. */ @Type(type = "json") @Column(name = "delta", columnDefinition = "TEXT") private CommitDeltaMetrics delta; + /** When this history row was recorded (commit time). */ @Column(name = "recorded_at", nullable = false) private Instant recordedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java index 8d869ff1e..165247b6a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java @@ -35,20 +35,25 @@ @AllArgsConstructor(access = AccessLevel.PROTECTED) public class TableStatsRow { + /** Stable Iceberg table UUID. Primary key. */ @Id @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; + /** Denormalized database name. */ @Column(name = "database_name", nullable = false, length = 128) private String databaseName; + /** Denormalized table name. */ @Column(name = "table_name", nullable = false, length = 128) private String tableName; + /** Latest snapshot fields. Stored as a JSON blob in the {@code snapshot} column. */ @Type(type = "json") @Column(name = "snapshot", columnDefinition = "TEXT") private SnapshotMetrics snapshot; + /** Current table-property map (e.g. maintenance opt-in flags). Stored as JSON. */ @Type(type = "json") @Column(name = "table_properties", columnDefinition = "TEXT") private Map tableProperties; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java index 7a454c78c..59d7e8680 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java @@ -62,7 +62,6 @@ public TableOperationsRow toRow(TableOperation op) { .status(toDbOperationStatus(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) - .version(0L) .build(); } @@ -208,7 +207,6 @@ private TableStats.SnapshotMetrics toModelSnapshot(SnapshotMetrics v) { return null; } return TableStats.SnapshotMetrics.builder() - .clusterId(v.getClusterId()) .tableVersion(v.getTableVersion()) .tableLocation(v.getTableLocation()) .tableSizeBytes(v.getTableSizeBytes()) @@ -221,7 +219,6 @@ private SnapshotMetrics toDbSnapshot(TableStats.SnapshotMetrics v) { return null; } return SnapshotMetrics.builder() - .clusterId(v.getClusterId()) .tableVersion(v.getTableVersion()) .tableLocation(v.getTableLocation()) .tableSizeBytes(v.getTableSizeBytes()) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 962a108a2..8baddfe42 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -40,7 +40,7 @@ List find( @Query( "UPDATE TableOperationsRow r " + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING," - + " r.scheduledAt = :scheduledAt, r.version = r.version + 1 " + + " r.scheduledAt = :scheduledAt " + "WHERE r.id IN :ids " + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING") int markSchedulingBatch( @@ -54,7 +54,7 @@ int markSchedulingBatch( @Query( "UPDATE TableOperationsRow r " + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED," - + " r.jobId = :jobId, r.version = r.version + 1 " + + " r.jobId = :jobId " + "WHERE r.id IN :ids " + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") int markScheduledBatch(@Param("ids") List ids, @Param("jobId") String jobId); @@ -68,7 +68,7 @@ int markSchedulingBatch( @Query( "UPDATE TableOperationsRow r " + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING," - + " r.scheduledAt = NULL, r.version = r.version + 1 " + + " r.scheduledAt = NULL " + "WHERE r.id IN :ids " + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") int markPendingBatch(@Param("ids") List ids); diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 24b367549..892c1c55f 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -10,7 +10,6 @@ CREATE TABLE IF NOT EXISTS table_operations ( created_at TIMESTAMP(6) NOT NULL, scheduled_at TIMESTAMP(6), job_id VARCHAR(255), - version BIGINT, -- TODO: per-operation metric columns will be added as operations are onboarded. PRIMARY KEY (id) ); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index dbd8cc686..536b72e35 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -132,7 +132,7 @@ private static TableStatsHistoryRow buildRow( .tableUuid(tableUuid) .databaseName(databaseName) .tableName(tableName) - .snapshot(SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(1024L).build()) .delta( CommitDeltaMetrics.builder() .numFilesAdded(numFilesAdded) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index 493eb88b6..f9cc28d57 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -24,8 +24,7 @@ class TableStatsRepositoryTest { @Test void saveAndFindById() { String tableUuid = UUID.randomUUID().toString(); - SnapshotMetrics snapshot = - SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build(); + SnapshotMetrics snapshot = SnapshotMetrics.builder().tableSizeBytes(1024L).build(); repository.save( TableStatsRow.builder() From c72aae8ed9e324591b88cf54f993400370f087b3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:58:31 -0700 Subject: [PATCH 50/81] =?UTF-8?q?refactor(optimizer):=20move=20api?= =?UTF-8?q?=E2=86=94model=20conversion=20onto=20api=20types;=20delete=20Ap?= =?UTF-8?q?iModelMapper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the api/model boundary mapper with conversion methods on the types themselves. The api layer now imports model/ directly via to/from methods — controllers and other api-edge callers no longer inject a mapper bean. The dependency direction is a strict downward chain: api → model → db api types know about model types (and call model methods); model types know about db types (next round). db remains import-free. No central mapper, no risk of a cycle through a hub class. api/model/* changes (each gets a `toModel()` instance method + a static `fromModel(...)` factory): - TableOperationsDto ↔ model.TableOperation. - TableOperationsHistoryDto ↔ model.TableOperationsHistory. - TableStatsDto ↔ model.Table. - TableStatsHistoryDto ↔ model.TableStatsHistory. - UpsertTableStatsRequest → model.Table (one-way; takes the path-var tableUuid; updatedAt is server-stamped). - TableStats (+ SnapshotMetrics + CommitDelta inner) ↔ model.TableStats. - OperationType / OperationStatus / HistoryStatus (api enums) ↔ model enums. CompleteOperationRequest keeps its fields plain — callers extract `operationId` and `status.toModel()` directly; no wrapper needed. Delete services/optimizer/.../model/mapper/ApiModelMapper.java. --- .../optimizer/api/model/HistoryStatus.java | 12 +- .../optimizer/api/model/OperationStatus.java | 13 +- .../optimizer/api/model/OperationType.java | 12 +- .../api/model/TableOperationsDto.java | 32 +++ .../api/model/TableOperationsHistoryDto.java | 30 ++ .../optimizer/api/model/TableStats.java | 67 +++++ .../optimizer/api/model/TableStatsDto.java | 29 ++ .../api/model/TableStatsHistoryDto.java | 28 ++ .../api/model/UpsertTableStatsRequest.java | 17 ++ .../model/mapper/ApiModelMapper.java | 263 ------------------ 10 files changed, 237 insertions(+), 266 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index dc52f863e..0c9ff95da 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -7,5 +7,15 @@ public enum HistoryStatus { SUCCESS, /** The Spark job for this operation failed. */ - FAILED + FAILED; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.HistoryStatus toModel() { + return com.linkedin.openhouse.optimizer.model.HistoryStatus.valueOf(name()); + } + + /** Build the api-layer enum from the internal-model counterpart. */ + public static HistoryStatus fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { + return v == null ? null : HistoryStatus.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java index c97be441b..300c28263 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java @@ -17,5 +17,16 @@ public enum OperationStatus { * operation_type)}. Only the most-recent PENDING row is claimed; older duplicates are CANCELED * before the claim step. */ - CANCELED + CANCELED; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.OperationStatus toModel() { + return com.linkedin.openhouse.optimizer.model.OperationStatus.valueOf(name()); + } + + /** Build the api-layer enum from the internal-model counterpart. */ + public static OperationStatus fromModel( + com.linkedin.openhouse.optimizer.model.OperationStatus v) { + return v == null ? null : OperationStatus.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java index 8507bae12..5f325e712 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java @@ -3,5 +3,15 @@ /** Maintenance operation types supported by the continuous optimizer. */ public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ - ORPHAN_FILES_DELETION + ORPHAN_FILES_DELETION; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.OperationType toModel() { + return com.linkedin.openhouse.optimizer.model.OperationType.valueOf(name()); + } + + /** Build the api-layer enum from the internal-model counterpart. */ + public static OperationType fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { + return v == null ? null : OperationType.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index d41bd6906..db8ef1039 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableOperation; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -39,4 +40,35 @@ public class TableOperationsDto { /** Job ID returned by the Jobs Service after successful submission. */ private String jobId; + + /** Convert to the internal-model counterpart. */ + public TableOperation toModel() { + return TableOperation.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType == null ? null : operationType.toModel()) + .status(status == null ? null : status.toModel()) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableOperationsDto fromModel(TableOperation op) { + if (op == null) { + return null; + } + return TableOperationsDto.builder() + .id(op.getId()) + .tableUuid(op.getTableUuid()) + .databaseName(op.getDatabaseName()) + .tableName(op.getTableName()) + .operationType(OperationType.fromModel(op.getOperationType())) + .status(OperationStatus.fromModel(op.getStatus())) + .createdAt(op.getCreatedAt()) + .scheduledAt(op.getScheduledAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 4e247c7ce..935435040 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -33,4 +34,33 @@ public class TableOperationsHistoryDto { /** {@code SUCCESS} or {@code FAILED}. */ private HistoryStatus status; + + /** Convert to the internal-model counterpart. */ + public TableOperationsHistory toModel() { + return TableOperationsHistory.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType == null ? null : operationType.toModel()) + .completedAt(completedAt) + .status(status == null ? null : status.toModel()) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableOperationsHistoryDto fromModel(TableOperationsHistory h) { + if (h == null) { + return null; + } + return TableOperationsHistoryDto.builder() + .id(h.getId()) + .tableUuid(h.getTableUuid()) + .databaseName(h.getDatabaseName()) + .tableName(h.getTableName()) + .operationType(OperationType.fromModel(h.getOperationType())) + .completedAt(h.getCompletedAt()) + .status(HistoryStatus.fromModel(h.getStatus())) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index 096eecd1e..c75d21d75 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -25,6 +25,25 @@ public class TableStats { /** Delta fields — accumulated across commit events. */ private CommitDelta delta; + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.TableStats toModel() { + return com.linkedin.openhouse.optimizer.model.TableStats.builder() + .snapshot(snapshot == null ? null : snapshot.toModel()) + .delta(delta == null ? null : delta.toModel()) + .build(); + } + + /** Build the api-layer payload from the internal-model counterpart. */ + public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + if (m == null) { + return null; + } + return TableStats.builder() + .snapshot(SnapshotMetrics.fromModel(m.getSnapshot())) + .delta(CommitDelta.fromModel(m.getDelta())) + .build(); + } + /** Point-in-time metadata read from Iceberg at scan time. */ @Data @Builder(toBuilder = true) @@ -44,6 +63,30 @@ public static class SnapshotMetrics { /** Total number of data files as of the latest snapshot — used for bin-packing. */ private Long numCurrentFiles; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel() { + return com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics.builder() + .tableVersion(tableVersion) + .tableLocation(tableLocation) + .tableSizeBytes(tableSizeBytes) + .numCurrentFiles(numCurrentFiles) + .build(); + } + + /** Build the api-layer inner object from the internal-model counterpart. */ + public static SnapshotMetrics fromModel( + com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics m) { + if (m == null) { + return null; + } + return SnapshotMetrics.builder() + .tableVersion(m.getTableVersion()) + .tableLocation(m.getTableLocation()) + .tableSizeBytes(m.getTableSizeBytes()) + .numCurrentFiles(m.getNumCurrentFiles()) + .build(); + } } /** Per-commit incremental counters; accumulated across all recorded commit events. */ @@ -65,5 +108,29 @@ public static class CommitDelta { /** Total bytes removed by this commit. */ private Long deletedSizeBytes; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { + return com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta.builder() + .numFilesAdded(numFilesAdded) + .numFilesDeleted(numFilesDeleted) + .addedSizeBytes(addedSizeBytes) + .deletedSizeBytes(deletedSizeBytes) + .build(); + } + + /** Build the api-layer inner object from the internal-model counterpart. */ + public static CommitDelta fromModel( + com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta m) { + if (m == null) { + return null; + } + return CommitDelta.builder() + .numFilesAdded(m.getNumFilesAdded()) + .numFilesDeleted(m.getNumFilesDeleted()) + .addedSizeBytes(m.getAddedSizeBytes()) + .deletedSizeBytes(m.getDeletedSizeBytes()) + .build(); + } } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 81dd6b802..82dc552c2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,6 +1,8 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.Table; import java.time.Instant; +import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; @@ -31,4 +33,31 @@ public class TableStatsDto { /** When this row was last written. Used for staleness monitoring. */ private Instant updatedAt; + + /** Convert to the internal-model counterpart. */ + public Table toModel() { + return Table.builder() + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableId(tableName) + .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) + .stats(stats == null ? null : stats.toModel()) + .updatedAt(updatedAt) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableStatsDto fromModel(Table t) { + if (t == null) { + return null; + } + return TableStatsDto.builder() + .tableUuid(t.getTableUuid()) + .databaseName(t.getDatabaseName()) + .tableName(t.getTableId()) + .stats(TableStats.fromModel(t.getStats())) + .tableProperties(t.getTableProperties()) + .updatedAt(t.getUpdatedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 4a994fdb3..b5f971bbf 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -30,4 +31,31 @@ public class TableStatsHistoryDto { /** When this history row was recorded. */ private Instant recordedAt; + + /** Convert to the internal-model counterpart. */ + public TableStatsHistory toModel() { + return TableStatsHistory.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .stats(stats == null ? null : stats.toModel()) + .recordedAt(recordedAt) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableStatsHistoryDto fromModel(TableStatsHistory h) { + if (h == null) { + return null; + } + return TableStatsHistoryDto.builder() + .id(h.getId()) + .tableUuid(h.getTableUuid()) + .databaseName(h.getDatabaseName()) + .tableName(h.getTableName()) + .stats(TableStats.fromModel(h.getStats())) + .recordedAt(h.getRecordedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 02290bad5..13476543f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,5 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.Table; +import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; @@ -29,4 +31,19 @@ public class UpsertTableStatsRequest { /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; + + /** + * Build the internal-model {@link Table} described by this request. {@code tableUuid} comes from + * the URL path, not the body. {@link Table#getUpdatedAt()} is left {@code null}; the service + * stamps it server-side at write time. + */ + public Table toModel(String tableUuid) { + return Table.builder() + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableId(tableName) + .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) + .stats(stats == null ? null : stats.toModel()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java deleted file mode 100644 index 31141ff44..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ /dev/null @@ -1,263 +0,0 @@ -package com.linkedin.openhouse.optimizer.model.mapper; - -import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; -import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.OperationStatus; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.linkedin.openhouse.optimizer.model.TableStatsHistory; -import java.util.Collections; -import org.springframework.stereotype.Component; - -/** - * Converts between wire-API DTOs and internal {@code model/} domain objects. - * - *

The only place inside {@code model/} where {@code api/} types are referenced — this is the - * boundary at which the internal model meets the wire-API. Pure data types under {@code model/} - * stay free of any api-side imports. - * - *

API-layer enums + payloads are intentionally separate Java types from the internal-model - * counterparts; the two sides evolve independently. This mapper translates by name. - */ -@Component -public class ApiModelMapper { - - // --- TableOperationsDto <-> TableOperation --- - - public TableOperation toOperation(TableOperationsDto dto) { - if (dto == null) { - return null; - } - return TableOperation.builder() - .id(dto.getId()) - .tableUuid(dto.getTableUuid()) - .databaseName(dto.getDatabaseName()) - .tableName(dto.getTableName()) - .operationType(toModelOperationType(dto.getOperationType())) - .status(toModelOperationStatus(dto.getStatus())) - .createdAt(dto.getCreatedAt()) - .scheduledAt(dto.getScheduledAt()) - .build(); - } - - public TableOperationsDto toDto(TableOperation op) { - if (op == null) { - return null; - } - return TableOperationsDto.builder() - .id(op.getId()) - .tableUuid(op.getTableUuid()) - .databaseName(op.getDatabaseName()) - .tableName(op.getTableName()) - .operationType(toApiOperationType(op.getOperationType())) - .status(toApiOperationStatus(op.getStatus())) - .createdAt(op.getCreatedAt()) - .scheduledAt(op.getScheduledAt()) - .build(); - } - - // --- TableOperationsHistoryDto <-> TableOperationsHistory --- - - public TableOperationsHistory toHistory(TableOperationsHistoryDto dto) { - if (dto == null) { - return null; - } - return TableOperationsHistory.builder() - .id(dto.getId()) - .tableUuid(dto.getTableUuid()) - .databaseName(dto.getDatabaseName()) - .tableName(dto.getTableName()) - .operationType(toModelOperationType(dto.getOperationType())) - .completedAt(dto.getCompletedAt()) - .status(toModelHistoryStatus(dto.getStatus())) - .build(); - } - - public TableOperationsHistoryDto toDto(TableOperationsHistory history) { - if (history == null) { - return null; - } - return TableOperationsHistoryDto.builder() - .id(history.getId()) - .tableUuid(history.getTableUuid()) - .databaseName(history.getDatabaseName()) - .tableName(history.getTableName()) - .operationType(toApiOperationType(history.getOperationType())) - .completedAt(history.getCompletedAt()) - .status(toApiHistoryStatus(history.getStatus())) - .build(); - } - - // --- Table <-> TableStatsDto / UpsertTableStatsRequest --- - - /** - * Build an internal-model {@link Table} from a wire upsert request. {@link Table#getUpdatedAt()} - * is intentionally left null — the service stamps it server-side at write time. - */ - public Table toTable(String tableUuid, UpsertTableStatsRequest request) { - if (request == null) { - return null; - } - return Table.builder() - .tableUuid(tableUuid) - .databaseName(request.getDatabaseName()) - .tableId(request.getTableName()) - .tableProperties( - request.getTableProperties() != null - ? request.getTableProperties() - : Collections.emptyMap()) - .stats(toModelStats(request.getStats())) - .build(); - } - - public TableStatsDto toDto(Table table) { - if (table == null) { - return null; - } - return TableStatsDto.builder() - .tableUuid(table.getTableUuid()) - .databaseName(table.getDatabaseName()) - .tableName(table.getTableId()) - .stats(toApiStats(table.getStats())) - .tableProperties(table.getTableProperties()) - .updatedAt(table.getUpdatedAt()) - .build(); - } - - // --- TableStatsHistory <-> TableStatsHistoryDto --- - - public TableStatsHistoryDto toDto(TableStatsHistory history) { - if (history == null) { - return null; - } - return TableStatsHistoryDto.builder() - .id(history.getId()) - .tableUuid(history.getTableUuid()) - .databaseName(history.getDatabaseName()) - .tableName(history.getTableName()) - .stats(toApiStats(history.getStats())) - .recordedAt(history.getRecordedAt()) - .build(); - } - - // --- TableStats payload --- - - public TableStats toModelStats(com.linkedin.openhouse.optimizer.api.model.TableStats apiStats) { - if (apiStats == null) { - return null; - } - return TableStats.builder() - .snapshot(toModelSnapshot(apiStats.getSnapshot())) - .delta(toModelDelta(apiStats.getDelta())) - .build(); - } - - public com.linkedin.openhouse.optimizer.api.model.TableStats toApiStats(TableStats modelStats) { - if (modelStats == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.TableStats.builder() - .snapshot(toApiSnapshot(modelStats.getSnapshot())) - .delta(toApiDelta(modelStats.getDelta())) - .build(); - } - - // --- enum helpers --- - - public OperationType toModelOperationType( - com.linkedin.openhouse.optimizer.api.model.OperationType apiValue) { - return apiValue == null ? null : OperationType.valueOf(apiValue.name()); - } - - public com.linkedin.openhouse.optimizer.api.model.OperationType toApiOperationType( - OperationType modelValue) { - return modelValue == null - ? null - : com.linkedin.openhouse.optimizer.api.model.OperationType.valueOf(modelValue.name()); - } - - public OperationStatus toModelOperationStatus( - com.linkedin.openhouse.optimizer.api.model.OperationStatus apiValue) { - return apiValue == null ? null : OperationStatus.valueOf(apiValue.name()); - } - - public com.linkedin.openhouse.optimizer.api.model.OperationStatus toApiOperationStatus( - OperationStatus modelValue) { - return modelValue == null - ? null - : com.linkedin.openhouse.optimizer.api.model.OperationStatus.valueOf(modelValue.name()); - } - - public HistoryStatus toModelHistoryStatus( - com.linkedin.openhouse.optimizer.api.model.HistoryStatus apiValue) { - return apiValue == null ? null : HistoryStatus.valueOf(apiValue.name()); - } - - public com.linkedin.openhouse.optimizer.api.model.HistoryStatus toApiHistoryStatus( - HistoryStatus modelValue) { - return modelValue == null - ? null - : com.linkedin.openhouse.optimizer.api.model.HistoryStatus.valueOf(modelValue.name()); - } - - // --- TableStats inner classes --- - - private TableStats.SnapshotMetrics toModelSnapshot( - com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics apiValue) { - if (apiValue == null) { - return null; - } - return TableStats.SnapshotMetrics.builder() - .tableVersion(apiValue.getTableVersion()) - .tableLocation(apiValue.getTableLocation()) - .tableSizeBytes(apiValue.getTableSizeBytes()) - .numCurrentFiles(apiValue.getNumCurrentFiles()) - .build(); - } - - private com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics toApiSnapshot( - TableStats.SnapshotMetrics modelValue) { - if (modelValue == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics.builder() - .tableVersion(modelValue.getTableVersion()) - .tableLocation(modelValue.getTableLocation()) - .tableSizeBytes(modelValue.getTableSizeBytes()) - .numCurrentFiles(modelValue.getNumCurrentFiles()) - .build(); - } - - private TableStats.CommitDelta toModelDelta( - com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta apiValue) { - if (apiValue == null) { - return null; - } - return TableStats.CommitDelta.builder() - .numFilesAdded(apiValue.getNumFilesAdded()) - .numFilesDeleted(apiValue.getNumFilesDeleted()) - .addedSizeBytes(apiValue.getAddedSizeBytes()) - .deletedSizeBytes(apiValue.getDeletedSizeBytes()) - .build(); - } - - private com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta toApiDelta( - TableStats.CommitDelta modelValue) { - if (modelValue == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta.builder() - .numFilesAdded(modelValue.getNumFilesAdded()) - .numFilesDeleted(modelValue.getNumFilesDeleted()) - .addedSizeBytes(modelValue.getAddedSizeBytes()) - .deletedSizeBytes(modelValue.getDeletedSizeBytes()) - .build(); - } -} From 8ae8777422a940e3b730ede226f8801db5618619 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 16:02:28 -0700 Subject: [PATCH 51/81] =?UTF-8?q?refactor(optimizer):=20move=20model?= =?UTF-8?q?=E2=86=94db=20conversion=20onto=20model=20types;=20delete=20Mod?= =?UTF-8?q?elDbMapper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the model/db boundary mapper with conversion methods on the model types themselves. Same pattern that opt-0 just applied at the api↔model boundary — each layer's type carries the to/from methods for the layer below. The dependency chain after this commit: api → model → db api/* → model/* (added on opt-0). model/* → db/* (this commit). db/* still imports nothing — bottom of the chain. model/* changes (each gets a `toRow()` instance method + a static `fromRow(...)` factory): - Table ↔ db.TableStatsRow (current-state row; snapshot only, delta lives on history rows). - TableOperation ↔ db.TableOperationsRow. - TableOperationsHistory ↔ db.TableOperationsHistoryRow. - TableStatsHistory ↔ db.TableStatsHistoryRow (joins/splits the snapshot + delta columns). - TableStats inner: SnapshotMetrics ↔ db.SnapshotMetrics, CommitDelta ↔ db.CommitDeltaMetrics. TableStats itself exposes toSnapshotRow() / toDeltaRow() for the split-write side and a static fromRows(snapshot, delta) for the join-read side. - OperationType / OperationStatus / HistoryStatus (model enums) ↔ db enums. Delete services/optimizer/.../model/mapper/ModelDbMapper.java. --- .../optimizer/model/HistoryStatus.java | 12 +- .../optimizer/model/OperationStatus.java | 12 +- .../optimizer/model/OperationType.java | 12 +- .../openhouse/optimizer/model/Table.java | 37 ++- .../optimizer/model/TableOperation.java | 37 ++- .../model/TableOperationsHistory.java | 30 +++ .../openhouse/optimizer/model/TableStats.java | 71 +++++ .../optimizer/model/TableStatsHistory.java | 31 ++- .../optimizer/model/mapper/ModelDbMapper.java | 252 ------------------ 9 files changed, 232 insertions(+), 262 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java index 97b8e2992..e6321873d 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java @@ -13,5 +13,15 @@ public enum HistoryStatus { SUCCESS, /** The operation failed. */ - FAILED + FAILED; + + /** Convert to the DB-layer counterpart. */ + public com.linkedin.openhouse.optimizer.db.HistoryStatus toDb() { + return com.linkedin.openhouse.optimizer.db.HistoryStatus.valueOf(name()); + } + + /** Build the internal-model enum from the DB-layer counterpart. */ + public static HistoryStatus fromDb(com.linkedin.openhouse.optimizer.db.HistoryStatus v) { + return v == null ? null : HistoryStatus.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java index f284fedaf..137d97902 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java @@ -19,5 +19,15 @@ public enum OperationStatus { SCHEDULED, /** Scheduler marked this row as a duplicate of another PENDING row; not claimable. */ - CANCELED + CANCELED; + + /** Convert to the DB-layer counterpart. */ + public com.linkedin.openhouse.optimizer.db.OperationStatus toDb() { + return com.linkedin.openhouse.optimizer.db.OperationStatus.valueOf(name()); + } + + /** Build the internal-model enum from the DB-layer counterpart. */ + public static OperationStatus fromDb(com.linkedin.openhouse.optimizer.db.OperationStatus v) { + return v == null ? null : OperationStatus.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java index 8f4fe35a8..13c7e9c61 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -8,5 +8,15 @@ public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ - ORPHAN_FILES_DELETION + ORPHAN_FILES_DELETION; + + /** Convert to the DB-layer counterpart. */ + public com.linkedin.openhouse.optimizer.db.OperationType toDb() { + return com.linkedin.openhouse.optimizer.db.OperationType.valueOf(name()); + } + + /** Build the internal-model enum from the DB-layer counterpart. */ + public static OperationType fromDb(com.linkedin.openhouse.optimizer.db.OperationType v) { + return v == null ? null : OperationType.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index bca7e2420..659dd18da 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.time.Instant; import java.util.Collections; import java.util.Map; @@ -13,8 +14,8 @@ * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads * stats for bin-packing). * - *

Pure internal-model type — no references to wire-API or DB types. Construct via {@link - * com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper#toTable} at the DB boundary. + *

Conversion methods cross into the DB layer one-way; the inverse lives on the api side. db/ + * types know nothing about model/ or api/. */ @Data @Builder @@ -39,4 +40,36 @@ public class Table { /** When the current snapshot was last written. Stamped server-side on every upsert. */ private Instant updatedAt; + + /** + * Project to the current-state DB row. {@code table_stats} carries the snapshot only — per-commit + * deltas live on {@code table_stats_history} (see {@link TableStatsHistory#toRow()}). + */ + public TableStatsRow toRow() { + return TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableId) + .snapshot(stats == null ? null : stats.toSnapshotRow()) + .tableProperties(tableProperties) + .updatedAt(updatedAt) + .build(); + } + + /** Build a {@link Table} from a current-state DB row. */ + public static Table fromRow(TableStatsRow row) { + if (row == null) { + return null; + } + return Table.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableId(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + // table_stats holds only the snapshot — deltas live on the history table. + .stats(TableStats.fromRows(row.getSnapshot(), null)) + .updatedAt(row.getUpdatedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java index 1f14dddff..81f97f1de 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import java.time.Instant; import java.util.Comparator; import java.util.UUID; @@ -12,9 +13,8 @@ * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks * up and submits. * - *

Pure internal-model type — no references to wire-API or DB types. Cross-layer construction - * happens via {@link com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper} (DB boundary) or - * {@link com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper} (API boundary). + *

Conversion methods cross into the DB layer one-way; the inverse lives on the api side. db/ + * types know nothing about model/ or api/. * *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not @@ -75,4 +75,35 @@ public static TableOperation mostRecent(TableOperation a, TableOperation b) { Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); return byCreatedAt.compare(a, b) >= 0 ? a : b; } + + /** Convert to the corresponding DB row. */ + public TableOperationsRow toRow() { + return TableOperationsRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType == null ? null : operationType.toDb()) + .status(status == null ? null : status.toDb()) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .build(); + } + + /** Build a {@link TableOperation} from a DB row. */ + public static TableOperation fromRow(TableOperationsRow row) { + if (row == null) { + return null; + } + return TableOperation.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.fromDb(row.getOperationType())) + .status(OperationStatus.fromDb(row.getStatus())) + .createdAt(row.getCreatedAt()) + .scheduledAt(row.getScheduledAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index fe5bee5f7..42a48479a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -38,4 +39,33 @@ public class TableOperationsHistory { /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ private HistoryStatus status; + + /** Convert to the corresponding DB row. */ + public TableOperationsHistoryRow toRow() { + return TableOperationsHistoryRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType == null ? null : operationType.toDb()) + .completedAt(completedAt) + .status(status == null ? null : status.toDb()) + .build(); + } + + /** Build a {@link TableOperationsHistory} from a DB row. */ + public static TableOperationsHistory fromRow(TableOperationsHistoryRow row) { + if (row == null) { + return null; + } + return TableOperationsHistory.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.fromDb(row.getOperationType())) + .completedAt(row.getCompletedAt()) + .status(HistoryStatus.fromDb(row.getStatus())) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 56291e510..212390af9 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -20,6 +20,31 @@ public class TableStats { /** Delta fields — accumulated across commit events. */ private CommitDelta delta; + /** Project to the DB-layer {@link com.linkedin.openhouse.optimizer.db.SnapshotMetrics} object. */ + public com.linkedin.openhouse.optimizer.db.SnapshotMetrics toSnapshotRow() { + return snapshot == null ? null : snapshot.toDb(); + } + + /** + * Project to the DB-layer {@link com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics} object. + */ + public com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics toDeltaRow() { + return delta == null ? null : delta.toDb(); + } + + /** Join the two DB-side columns back into a single internal-model {@link TableStats}. */ + public static TableStats fromRows( + com.linkedin.openhouse.optimizer.db.SnapshotMetrics dbSnapshot, + com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics dbDelta) { + if (dbSnapshot == null && dbDelta == null) { + return null; + } + return TableStats.builder() + .snapshot(SnapshotMetrics.fromDb(dbSnapshot)) + .delta(CommitDelta.fromDb(dbDelta)) + .build(); + } + /** Point-in-time metadata read from Iceberg at scan time. */ @Data @Builder(toBuilder = true) @@ -39,6 +64,29 @@ public static class SnapshotMetrics { /** Total number of data files as of the latest snapshot — used for bin-packing. */ private Long numCurrentFiles; + + /** Convert to the DB-layer counterpart. */ + public com.linkedin.openhouse.optimizer.db.SnapshotMetrics toDb() { + return com.linkedin.openhouse.optimizer.db.SnapshotMetrics.builder() + .tableVersion(tableVersion) + .tableLocation(tableLocation) + .tableSizeBytes(tableSizeBytes) + .numCurrentFiles(numCurrentFiles) + .build(); + } + + /** Build the internal-model inner object from the DB-layer counterpart. */ + public static SnapshotMetrics fromDb(com.linkedin.openhouse.optimizer.db.SnapshotMetrics v) { + if (v == null) { + return null; + } + return SnapshotMetrics.builder() + .tableVersion(v.getTableVersion()) + .tableLocation(v.getTableLocation()) + .tableSizeBytes(v.getTableSizeBytes()) + .numCurrentFiles(v.getNumCurrentFiles()) + .build(); + } } /** Per-commit incremental counters; accumulated across all recorded commit events. */ @@ -60,5 +108,28 @@ public static class CommitDelta { /** Total bytes removed by this commit. */ private Long deletedSizeBytes; + + /** Convert to the DB-layer counterpart. */ + public com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics toDb() { + return com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics.builder() + .numFilesAdded(numFilesAdded) + .numFilesDeleted(numFilesDeleted) + .addedSizeBytes(addedSizeBytes) + .deletedSizeBytes(deletedSizeBytes) + .build(); + } + + /** Build the internal-model inner object from the DB-layer counterpart. */ + public static CommitDelta fromDb(com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics v) { + if (v == null) { + return null; + } + return CommitDelta.builder() + .numFilesAdded(v.getNumFilesAdded()) + .numFilesDeleted(v.getNumFilesDeleted()) + .addedSizeBytes(v.getAddedSizeBytes()) + .deletedSizeBytes(v.getDeletedSizeBytes()) + .build(); + } } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java index 53bb54d1e..f7f111151 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -11,8 +12,6 @@ * *

One per Iceberg commit. {@link #stats} carries both the snapshot at commit time and the commit * delta — consumers can reconstruct change rates over arbitrary time windows. - * - *

Pure internal-model type — no references to wire-API or DB types. */ @Data @Builder @@ -37,4 +36,32 @@ public class TableStatsHistory { /** When this history row was recorded. */ private Instant recordedAt; + + /** Convert to the corresponding DB row. */ + public TableStatsHistoryRow toRow() { + return TableStatsHistoryRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .snapshot(stats == null ? null : stats.toSnapshotRow()) + .delta(stats == null ? null : stats.toDeltaRow()) + .recordedAt(recordedAt) + .build(); + } + + /** Build a {@link TableStatsHistory} from a DB row. */ + public static TableStatsHistory fromRow(TableStatsHistoryRow row) { + if (row == null) { + return null; + } + return TableStatsHistory.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .stats(TableStats.fromRows(row.getSnapshot(), row.getDelta())) + .recordedAt(row.getRecordedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java deleted file mode 100644 index 59d7e8680..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java +++ /dev/null @@ -1,252 +0,0 @@ -package com.linkedin.openhouse.optimizer.model.mapper; - -import com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics; -import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; -import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; -import com.linkedin.openhouse.optimizer.db.TableOperationsRow; -import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.db.TableStatsRow; -import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.OperationStatus; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.linkedin.openhouse.optimizer.model.TableStatsHistory; -import java.util.Collections; -import org.springframework.stereotype.Component; - -/** - * Converts between internal {@code model/} domain objects and database row entities. - * - *

The only place inside {@code model/} where {@code db/} types are referenced — this is the - * boundary at which the internal model meets the database layer. Pure data types under {@code - * model/} stay free of any DB-side imports. - * - *

Each layer carries its own per-layer enum + payload types. The DB layer flattens the wire-side - * {@code TableStats} envelope into two separate columns ({@code snapshot} and {@code delta}); this - * mapper joins / splits them at the boundary. - */ -@Component -public class ModelDbMapper { - - // --- TableOperationsRow <-> TableOperation --- - - public TableOperation toOperation(TableOperationsRow row) { - if (row == null) { - return null; - } - return TableOperation.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .operationType(toModelOperationType(row.getOperationType())) - .status(toModelOperationStatus(row.getStatus())) - .createdAt(row.getCreatedAt()) - .scheduledAt(row.getScheduledAt()) - .build(); - } - - public TableOperationsRow toRow(TableOperation op) { - if (op == null) { - return null; - } - return TableOperationsRow.builder() - .id(op.getId()) - .tableUuid(op.getTableUuid()) - .databaseName(op.getDatabaseName()) - .tableName(op.getTableName()) - .operationType(toDbOperationType(op.getOperationType())) - .status(toDbOperationStatus(op.getStatus())) - .createdAt(op.getCreatedAt()) - .scheduledAt(op.getScheduledAt()) - .build(); - } - - // --- TableOperationsHistoryRow <-> TableOperationsHistory --- - - public TableOperationsHistory toHistory(TableOperationsHistoryRow row) { - if (row == null) { - return null; - } - return TableOperationsHistory.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .operationType(toModelOperationType(row.getOperationType())) - .completedAt(row.getCompletedAt()) - .status(toModelHistoryStatus(row.getStatus())) - .build(); - } - - public TableOperationsHistoryRow toRow(TableOperationsHistory history) { - if (history == null) { - return null; - } - return TableOperationsHistoryRow.builder() - .id(history.getId()) - .tableUuid(history.getTableUuid()) - .databaseName(history.getDatabaseName()) - .tableName(history.getTableName()) - .operationType(toDbOperationType(history.getOperationType())) - .completedAt(history.getCompletedAt()) - .status(toDbHistoryStatus(history.getStatus())) - .build(); - } - - // --- TableStatsRow -> Table --- - - public Table toTable(TableStatsRow row) { - if (row == null) { - return null; - } - return Table.builder() - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableId(row.getTableName()) - .tableProperties( - row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) - // table_stats holds only the snapshot — deltas live on the history table. - .stats(joinStats(row.getSnapshot(), null)) - .updatedAt(row.getUpdatedAt()) - .build(); - } - - // --- TableStatsHistoryRow -> TableStatsHistory --- - - public TableStatsHistory toStatsHistory(TableStatsHistoryRow row) { - if (row == null) { - return null; - } - return TableStatsHistory.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .stats(joinStats(row.getSnapshot(), row.getDelta())) - .recordedAt(row.getRecordedAt()) - .build(); - } - - // --- TableStats payload <-> (snapshot, delta) --- - - /** Join the two DB-side columns into a single internal-model {@link TableStats}. */ - public TableStats joinStats(SnapshotMetrics dbSnapshot, CommitDeltaMetrics dbDelta) { - if (dbSnapshot == null && dbDelta == null) { - return null; - } - return TableStats.builder() - .snapshot(toModelSnapshot(dbSnapshot)) - .delta(toModelDelta(dbDelta)) - .build(); - } - - /** Project the internal-model {@link TableStats#getSnapshot()} side. */ - public SnapshotMetrics toDbSnapshot(TableStats modelStats) { - return modelStats == null ? null : toDbSnapshot(modelStats.getSnapshot()); - } - - /** Project the internal-model {@link TableStats#getDelta()} side. */ - public CommitDeltaMetrics toDbDelta(TableStats modelStats) { - return modelStats == null ? null : toDbDelta(modelStats.getDelta()); - } - - public TableStatsHistoryRow toStatsHistoryRow( - String id, - String tableUuid, - String databaseName, - String tableName, - TableStats stats, - java.time.Instant recordedAt) { - return TableStatsHistoryRow.builder() - .id(id) - .tableUuid(tableUuid) - .databaseName(databaseName) - .tableName(tableName) - .snapshot(toDbSnapshot(stats)) - .delta(toDbDelta(stats)) - .recordedAt(recordedAt) - .build(); - } - - // --- enum helpers --- - - public OperationType toModelOperationType(com.linkedin.openhouse.optimizer.db.OperationType v) { - return v == null ? null : OperationType.valueOf(v.name()); - } - - public com.linkedin.openhouse.optimizer.db.OperationType toDbOperationType(OperationType v) { - return v == null ? null : com.linkedin.openhouse.optimizer.db.OperationType.valueOf(v.name()); - } - - public OperationStatus toModelOperationStatus( - com.linkedin.openhouse.optimizer.db.OperationStatus v) { - return v == null ? null : OperationStatus.valueOf(v.name()); - } - - public com.linkedin.openhouse.optimizer.db.OperationStatus toDbOperationStatus( - OperationStatus v) { - return v == null ? null : com.linkedin.openhouse.optimizer.db.OperationStatus.valueOf(v.name()); - } - - public HistoryStatus toModelHistoryStatus(com.linkedin.openhouse.optimizer.db.HistoryStatus v) { - return v == null ? null : HistoryStatus.valueOf(v.name()); - } - - public com.linkedin.openhouse.optimizer.db.HistoryStatus toDbHistoryStatus(HistoryStatus v) { - return v == null ? null : com.linkedin.openhouse.optimizer.db.HistoryStatus.valueOf(v.name()); - } - - // --- inner-payload field copies --- - - private TableStats.SnapshotMetrics toModelSnapshot(SnapshotMetrics v) { - if (v == null) { - return null; - } - return TableStats.SnapshotMetrics.builder() - .tableVersion(v.getTableVersion()) - .tableLocation(v.getTableLocation()) - .tableSizeBytes(v.getTableSizeBytes()) - .numCurrentFiles(v.getNumCurrentFiles()) - .build(); - } - - private SnapshotMetrics toDbSnapshot(TableStats.SnapshotMetrics v) { - if (v == null) { - return null; - } - return SnapshotMetrics.builder() - .tableVersion(v.getTableVersion()) - .tableLocation(v.getTableLocation()) - .tableSizeBytes(v.getTableSizeBytes()) - .numCurrentFiles(v.getNumCurrentFiles()) - .build(); - } - - private TableStats.CommitDelta toModelDelta(CommitDeltaMetrics v) { - if (v == null) { - return null; - } - return TableStats.CommitDelta.builder() - .numFilesAdded(v.getNumFilesAdded()) - .numFilesDeleted(v.getNumFilesDeleted()) - .addedSizeBytes(v.getAddedSizeBytes()) - .deletedSizeBytes(v.getDeletedSizeBytes()) - .build(); - } - - private CommitDeltaMetrics toDbDelta(TableStats.CommitDelta v) { - if (v == null) { - return null; - } - return CommitDeltaMetrics.builder() - .numFilesAdded(v.getNumFilesAdded()) - .numFilesDeleted(v.getNumFilesDeleted()) - .addedSizeBytes(v.getAddedSizeBytes()) - .deletedSizeBytes(v.getDeletedSizeBytes()) - .build(); - } -} From bb8aa4d2c4685caa141f830ffa165b8f0ab75a26 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 16:07:15 -0700 Subject: [PATCH 52/81] refactor(optimizer): service + controllers use type to/from methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The api↔model and model↔db boundaries no longer go through injected mapper beans. Switch every call site to the to/from methods that now live on the types themselves. OptimizerDataServiceImpl: - Drop the ModelDbMapper field. No DI at all (only repositories). - Row → model via TableOperation::fromRow, TableOperationsHistory::fromRow, Table::fromRow, TableStatsHistory::fromRow. - Model → row via instance methods: history.toRow(), table.toBuilder() ...build().toRow(), and TableStats stats.toSnapshotRow() / .toDeltaRow(). - Enum filters on list() use OperationType::toDb / OperationStatus::toDb method references. Controllers (TableOperationsController, TableOperationsHistoryController, TableStatsController): - Drop the ApiModelMapper field. - api → model on the way in: dto.toModel(), request.toModel(uuid), request.getStatus().toModel(), apiEnum.toModel(). - model → api on the way out: Dto.fromModel(modelObj). --- .../controller/TableOperationsController.java | 18 +++-- .../TableOperationsHistoryController.java | 6 +- .../api/controller/TableStatsController.java | 10 +-- .../openhouse/optimizer/model/Table.java | 2 +- .../model/TableOperationsHistory.java | 2 +- .../service/OptimizerDataServiceImpl.java | 81 ++++++++----------- 6 files changed, 51 insertions(+), 68 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index 2c2483c1b..19e878910 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -5,7 +5,6 @@ import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.Optional; @@ -28,7 +27,6 @@ public class TableOperationsController { private final OptimizerDataService service; - private final ApiModelMapper apiMapper; /** * Report that an operation has completed. The body carries the {@code operationId} the caller is @@ -41,8 +39,12 @@ public ResponseEntity completeOperation( @RequestBody CompleteOperationRequest request) { return service .completeOperation( - request.getOperationId(), apiMapper.toModelHistoryStatus(request.getStatus())) - .map(history -> ResponseEntity.status(HttpStatus.CREATED).body(apiMapper.toDto(history))) + request.getOperationId(), + request.getStatus() == null ? null : request.getStatus().toModel()) + .map( + history -> + ResponseEntity.status(HttpStatus.CREATED) + .body(TableOperationsHistoryDto.fromModel(history))) .orElse(ResponseEntity.notFound().build()); } @@ -51,7 +53,7 @@ public ResponseEntity completeOperation( public ResponseEntity getTableOperation(@PathVariable String id) { return service .getTableOperation(id) - .map(apiMapper::toDto) + .map(TableOperationsDto::fromModel) .map(ResponseEntity::ok) .orElse(ResponseEntity.notFound().build()); } @@ -70,13 +72,13 @@ public ResponseEntity> listTableOperations( List result = service .listTableOperations( - Optional.ofNullable(operationType).map(apiMapper::toModelOperationType), - Optional.ofNullable(status).map(apiMapper::toModelOperationStatus), + Optional.ofNullable(operationType).map(OperationType::toModel), + Optional.ofNullable(status).map(OperationStatus::toModel), Optional.ofNullable(databaseName), Optional.ofNullable(tableName), Optional.ofNullable(tableUuid)) .stream() - .map(apiMapper::toDto) + .map(TableOperationsDto::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index df7cabeff..0c6f4834c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -1,7 +1,6 @@ package com.linkedin.openhouse.optimizer.api.controller; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.stream.Collectors; @@ -23,14 +22,13 @@ public class TableOperationsHistoryController { private final OptimizerDataService service; - private final ApiModelMapper apiMapper; /** Append a completed-job result. Called by the SparkJob after each run (success or failure). */ @PostMapping public ResponseEntity appendHistory( @RequestBody TableOperationsHistoryDto dto) { return ResponseEntity.status(HttpStatus.CREATED) - .body(apiMapper.toDto(service.appendHistory(apiMapper.toHistory(dto)))); + .body(TableOperationsHistoryDto.fromModel(service.appendHistory(dto.toModel()))); } /** Return the most recent history for a table, newest first, up to {@code limit} rows. */ @@ -39,7 +37,7 @@ public ResponseEntity> getHistory( @PathVariable String tableUuid, @RequestParam(defaultValue = "100") int limit) { List result = service.getHistory(tableUuid, limit).stream() - .map(apiMapper::toDto) + .map(TableOperationsHistoryDto::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 2b738a6c3..aa299b015 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -3,7 +3,6 @@ import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; -import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.time.Instant; import java.util.List; @@ -26,7 +25,6 @@ public class TableStatsController { private final OptimizerDataService service; - private final ApiModelMapper apiMapper; /** * Create or overwrite the stats row for {@code tableUuid}. Called by the Tables Service on every @@ -36,7 +34,7 @@ public class TableStatsController { public ResponseEntity upsertTableStats( @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { return ResponseEntity.ok( - apiMapper.toDto(service.upsertTableStats(apiMapper.toTable(tableUuid, request)))); + TableStatsDto.fromModel(service.upsertTableStats(request.toModel(tableUuid)))); } /** Fetch the stats row for {@code tableUuid}. Returns 404 if no stats have been written yet. */ @@ -44,7 +42,7 @@ public ResponseEntity upsertTableStats( public ResponseEntity getTableStats(@PathVariable String tableUuid) { return service .getTableStats(tableUuid) - .map(apiMapper::toDto) + .map(TableStatsDto::fromModel) .map(ResponseEntity::ok) .orElse(ResponseEntity.notFound().build()); } @@ -65,7 +63,7 @@ public ResponseEntity> listTableStats( Optional.ofNullable(tableName), Optional.ofNullable(tableUuid)) .stream() - .map(apiMapper::toDto) + .map(TableStatsDto::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } @@ -81,7 +79,7 @@ public ResponseEntity> getStatsHistory( @RequestParam(defaultValue = "100") int limit) { List result = service.getStatsHistory(tableUuid, Optional.ofNullable(since), limit).stream() - .map(apiMapper::toDto) + .map(TableStatsHistoryDto::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index 659dd18da..149128f44 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -18,7 +18,7 @@ * types know nothing about model/ or api/. */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class Table { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index 42a48479a..8cbfb6ff7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -14,7 +14,7 @@ * components that need to reason about completed operations (e.g., scheduling-cadence analyzers). */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class TableOperationsHistory { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 47143118c..87f300192 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.service; -import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import com.linkedin.openhouse.optimizer.db.TableStatsRow; import com.linkedin.openhouse.optimizer.model.HistoryStatus; @@ -9,8 +8,8 @@ import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.linkedin.openhouse.optimizer.model.TableStatsHistory; -import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; @@ -28,8 +27,9 @@ /** * Implementation of {@link OptimizerDataService}. * - *

Operates purely on model/ and db/ types. The model↔db boundary is the {@link ModelDbMapper}. - * No api/-package types appear in this class. + *

Operates purely on model/ and db/ types. Conversion happens via the {@code toRow()} / {@code + * fromRow(...)} methods on the model types themselves — no injected mapper. No api/-package types + * appear in this class. */ @Service @RequiredArgsConstructor @@ -39,7 +39,6 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { private final TableOperationsHistoryRepository historyRepository; private final TableStatsRepository statsRepository; private final TableStatsHistoryRepository statsHistoryRepository; - private final ModelDbMapper dbMapper; // --- TableOperations --- @@ -52,13 +51,13 @@ public List listTableOperations( Optional tableUuid) { return operationsRepository .find( - operationType.map(dbMapper::toDbOperationType).orElse(null), - status.map(dbMapper::toDbOperationStatus).orElse(null), + operationType.map(OperationType::toDb).orElse(null), + status.map(OperationStatus::toDb).orElse(null), tableUuid.orElse(null), databaseName.orElse(null), tableName.orElse(null)) .stream() - .map(dbMapper::toOperation) + .map(TableOperation::fromRow) .collect(Collectors.toList()); } @@ -69,24 +68,22 @@ public Optional completeOperation( return operationsRepository .findById(operationId) .map( - row -> { - TableOperationsHistoryRow historyRow = - TableOperationsHistoryRow.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .operationType(row.getOperationType()) - .completedAt(Instant.now()) - .status(dbMapper.toDbHistoryStatus(status)) - .build(); - return dbMapper.toHistory(historyRepository.save(historyRow)); - }); + row -> + TableOperationsHistory.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.fromDb(row.getOperationType())) + .completedAt(Instant.now()) + .status(status) + .build()) + .map(history -> TableOperationsHistory.fromRow(historyRepository.save(history.toRow()))); } @Override public Optional getTableOperation(String id) { - return operationsRepository.findById(id).map(dbMapper::toOperation); + return operationsRepository.findById(id).map(TableOperation::fromRow); } // --- TableStats --- @@ -96,6 +93,7 @@ public Optional getTableOperation(String id) { public Table upsertTableStats(Table table) { Instant now = Instant.now(); String tableUuid = table.getTableUuid(); + TableStats stats = table.getStats(); TableStatsRow row = statsRepository @@ -106,19 +104,11 @@ public Table upsertTableStats(Table table) { .toBuilder() .databaseName(table.getDatabaseName()) .tableName(table.getTableId()) - .snapshot(dbMapper.toDbSnapshot(table.getStats())) + .snapshot(stats == null ? null : stats.toSnapshotRow()) .tableProperties(table.getTableProperties()) .updatedAt(now) .build()) - .orElse( - TableStatsRow.builder() - .tableUuid(tableUuid) - .databaseName(table.getDatabaseName()) - .tableName(table.getTableId()) - .snapshot(dbMapper.toDbSnapshot(table.getStats())) - .tableProperties(table.getTableProperties()) - .updatedAt(now) - .build()); + .orElse(table.toBuilder().updatedAt(now).build().toRow()); TableStatsRow saved = statsRepository.save(row); statsHistoryRepository.save( @@ -127,17 +117,17 @@ public Table upsertTableStats(Table table) { .tableUuid(tableUuid) .databaseName(table.getDatabaseName()) .tableName(table.getTableId()) - .snapshot(dbMapper.toDbSnapshot(table.getStats())) - .delta(dbMapper.toDbDelta(table.getStats())) + .snapshot(stats == null ? null : stats.toSnapshotRow()) + .delta(stats == null ? null : stats.toDeltaRow()) .recordedAt(now) .build()); - return dbMapper.toTable(saved); + return Table.fromRow(saved); } @Override public Optional

getTableStats(String tableUuid) { - return statsRepository.findById(tableUuid).map(dbMapper::toTable); + return statsRepository.findById(tableUuid).map(Table::fromRow); } @Override @@ -145,7 +135,7 @@ public List
listTableStats( Optional databaseName, Optional tableName, Optional tableUuid) { return statsRepository .find(databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null)).stream() - .map(dbMapper::toTable) + .map(Table::fromRow) .collect(Collectors.toList()); } @@ -154,7 +144,7 @@ public List getStatsHistory( String tableUuid, Optional since, int limit) { return statsHistoryRepository.find(tableUuid, since.orElse(null), PageRequest.of(0, limit)) .stream() - .map(dbMapper::toStatsHistory) + .map(TableStatsHistory::fromRow) .collect(Collectors.toList()); } @@ -163,25 +153,20 @@ public List getStatsHistory( @Override @Transactional public TableOperationsHistory appendHistory(TableOperationsHistory history) { - TableOperationsHistoryRow row = - TableOperationsHistoryRow.builder() - .id(history.getId()) - .tableUuid(history.getTableUuid()) - .databaseName(history.getDatabaseName()) - .tableName(history.getTableName()) - .operationType(dbMapper.toDbOperationType(history.getOperationType())) + TableOperationsHistory toWrite = + history + .toBuilder() .completedAt( history.getCompletedAt() != null ? history.getCompletedAt() : Instant.now()) - .status(dbMapper.toDbHistoryStatus(history.getStatus())) .build(); - return dbMapper.toHistory(historyRepository.save(row)); + return TableOperationsHistory.fromRow(historyRepository.save(toWrite.toRow())); } @Override public List getHistory(String tableUuid, int limit) { return historyRepository .findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, limit)).stream() - .map(dbMapper::toHistory) + .map(TableOperationsHistory::fromRow) .collect(Collectors.toList()); } } From af23d5ef63ff1e44a483392e6a364c507d4cae34 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:02:51 -0700 Subject: [PATCH 53/81] fix(optimizer): make TableStats self-describing; route DTO conversion to TableStats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit model.TableStats now carries its own identity (tableUuid, databaseName, tableName) and metadata (tableProperties, updatedAt) alongside the snapshot + delta payload. Consumers no longer need an outer wrapper to know which table the stats belong to. api.TableStatsDto.toModel() and api.UpsertTableStatsRequest.toModel() now return model.TableStats (was model.Table). The two types only happened to have the same shape — semantically a DTO for stats is stats, not a table. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/api/model/TableStatsDto.java | 31 +++++++++++-------- .../api/model/UpsertTableStatsRequest.java | 17 +++++----- .../openhouse/optimizer/model/TableStats.java | 31 +++++++++++++++++-- 3 files changed, 56 insertions(+), 23 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 82dc552c2..244050b04 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.Table; import java.time.Instant; import java.util.Collections; import java.util.Map; @@ -35,29 +34,35 @@ public class TableStatsDto { private Instant updatedAt; /** Convert to the internal-model counterpart. */ - public Table toModel() { - return Table.builder() + public com.linkedin.openhouse.optimizer.model.TableStats toModel() { + com.linkedin.openhouse.optimizer.model.TableStats payload = + stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + return payload + .toBuilder() .tableUuid(tableUuid) .databaseName(databaseName) - .tableId(tableName) + .tableName(tableName) .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) - .stats(stats == null ? null : stats.toModel()) .updatedAt(updatedAt) .build(); } /** Build a wire DTO from the internal-model counterpart. */ - public static TableStatsDto fromModel(Table t) { - if (t == null) { + public static TableStatsDto fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + if (m == null) { return null; } return TableStatsDto.builder() - .tableUuid(t.getTableUuid()) - .databaseName(t.getDatabaseName()) - .tableName(t.getTableId()) - .stats(TableStats.fromModel(t.getStats())) - .tableProperties(t.getTableProperties()) - .updatedAt(t.getUpdatedAt()) + .tableUuid(m.getTableUuid()) + .databaseName(m.getDatabaseName()) + .tableName(m.getTableName()) + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.fromModel(m.getSnapshot())) + .delta(TableStats.CommitDelta.fromModel(m.getDelta())) + .build()) + .tableProperties(m.getTableProperties()) + .updatedAt(m.getUpdatedAt()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 13476543f..08b42050f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.Table; import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; @@ -33,17 +32,19 @@ public class UpsertTableStatsRequest { private Map tableProperties; /** - * Build the internal-model {@link Table} described by this request. {@code tableUuid} comes from - * the URL path, not the body. {@link Table#getUpdatedAt()} is left {@code null}; the service - * stamps it server-side at write time. + * Build the internal-model {@link com.linkedin.openhouse.optimizer.model.TableStats} described by + * this request. {@code tableUuid} comes from the URL path, not the body. {@code updatedAt} is + * left {@code null}; the service stamps it server-side at write time. */ - public Table toModel(String tableUuid) { - return Table.builder() + public com.linkedin.openhouse.optimizer.model.TableStats toModel(String tableUuid) { + com.linkedin.openhouse.optimizer.model.TableStats payload = + stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + return payload + .toBuilder() .tableUuid(tableUuid) .databaseName(databaseName) - .tableId(tableName) + .tableName(tableName) .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) - .stats(stats == null ? null : stats.toModel()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 56291e510..906d01669 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -1,12 +1,24 @@ package com.linkedin.openhouse.optimizer.model; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.time.Instant; +import java.util.Collections; +import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; -/** Combined stats payload stored as a single JSON blob per table. */ +/** + * Self-describing per-table stats record. Carries the table's identity and metadata alongside the + * snapshot + delta payload so consumers don't need an outer wrapper to know which table the stats + * belong to. + * + *

Identity ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}) and metadata ({@link + * #tableProperties}, {@link #updatedAt}) are populated when read from a current-state row. When + * this record is built from a per-commit history row, {@link #delta} is populated and {@link + * #tableProperties} / {@link #updatedAt} are typically {@code null}. + */ @Data @Builder(toBuilder = true) @NoArgsConstructor @@ -14,12 +26,27 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class TableStats { + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ + private String tableUuid; + + /** Database the table lives in. */ + private String databaseName; + + /** Iceberg table name (the human-readable identifier, not the UUID). */ + private String tableName; + + /** Current table-property map (e.g. maintenance opt-in flags). Never null. */ + @Builder.Default private Map tableProperties = Collections.emptyMap(); + /** Snapshot fields — overwritten on every upsert. */ private SnapshotMetrics snapshot; - /** Delta fields — accumulated across commit events. */ + /** Delta fields — accumulated across commit events. Null when read from a current-state row. */ private CommitDelta delta; + /** When the current snapshot was last written. Stamped server-side on every upsert. */ + private Instant updatedAt; + /** Point-in-time metadata read from Iceberg at scan time. */ @Data @Builder(toBuilder = true) From 3864e4257d0476333cbd1d78f87207dc1c46b16e Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:04:24 -0700 Subject: [PATCH 54/81] chore(optimizer): cascade self-describing TableStats from opt-0 to opt-1 Enriches model.TableStats with identity (tableUuid, databaseName, tableName) and metadata (tableProperties, updatedAt), and reroutes the api DTOs' toModel/fromModel pair to model.TableStats. opt-1's existing toSnapshotRow / toDeltaRow / fromRows helpers are preserved. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/api/model/TableStatsDto.java | 31 +++++++++++-------- .../api/model/UpsertTableStatsRequest.java | 17 +++++----- .../openhouse/optimizer/model/TableStats.java | 31 +++++++++++++++++-- 3 files changed, 56 insertions(+), 23 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 82dc552c2..244050b04 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.Table; import java.time.Instant; import java.util.Collections; import java.util.Map; @@ -35,29 +34,35 @@ public class TableStatsDto { private Instant updatedAt; /** Convert to the internal-model counterpart. */ - public Table toModel() { - return Table.builder() + public com.linkedin.openhouse.optimizer.model.TableStats toModel() { + com.linkedin.openhouse.optimizer.model.TableStats payload = + stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + return payload + .toBuilder() .tableUuid(tableUuid) .databaseName(databaseName) - .tableId(tableName) + .tableName(tableName) .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) - .stats(stats == null ? null : stats.toModel()) .updatedAt(updatedAt) .build(); } /** Build a wire DTO from the internal-model counterpart. */ - public static TableStatsDto fromModel(Table t) { - if (t == null) { + public static TableStatsDto fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + if (m == null) { return null; } return TableStatsDto.builder() - .tableUuid(t.getTableUuid()) - .databaseName(t.getDatabaseName()) - .tableName(t.getTableId()) - .stats(TableStats.fromModel(t.getStats())) - .tableProperties(t.getTableProperties()) - .updatedAt(t.getUpdatedAt()) + .tableUuid(m.getTableUuid()) + .databaseName(m.getDatabaseName()) + .tableName(m.getTableName()) + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.fromModel(m.getSnapshot())) + .delta(TableStats.CommitDelta.fromModel(m.getDelta())) + .build()) + .tableProperties(m.getTableProperties()) + .updatedAt(m.getUpdatedAt()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 13476543f..08b42050f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.Table; import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; @@ -33,17 +32,19 @@ public class UpsertTableStatsRequest { private Map tableProperties; /** - * Build the internal-model {@link Table} described by this request. {@code tableUuid} comes from - * the URL path, not the body. {@link Table#getUpdatedAt()} is left {@code null}; the service - * stamps it server-side at write time. + * Build the internal-model {@link com.linkedin.openhouse.optimizer.model.TableStats} described by + * this request. {@code tableUuid} comes from the URL path, not the body. {@code updatedAt} is + * left {@code null}; the service stamps it server-side at write time. */ - public Table toModel(String tableUuid) { - return Table.builder() + public com.linkedin.openhouse.optimizer.model.TableStats toModel(String tableUuid) { + com.linkedin.openhouse.optimizer.model.TableStats payload = + stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + return payload + .toBuilder() .tableUuid(tableUuid) .databaseName(databaseName) - .tableId(tableName) + .tableName(tableName) .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) - .stats(stats == null ? null : stats.toModel()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 212390af9..eb11c9d25 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -1,12 +1,24 @@ package com.linkedin.openhouse.optimizer.model; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.time.Instant; +import java.util.Collections; +import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; -/** Combined stats payload stored as a single JSON blob per table. */ +/** + * Self-describing per-table stats record. Carries the table's identity and metadata alongside the + * snapshot + delta payload so consumers don't need an outer wrapper to know which table the stats + * belong to. + * + *

Identity ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}) and metadata ({@link + * #tableProperties}, {@link #updatedAt}) are populated when read from a current-state row. When + * this record is built from a per-commit history row, {@link #delta} is populated and {@link + * #tableProperties} / {@link #updatedAt} are typically {@code null}. + */ @Data @Builder(toBuilder = true) @NoArgsConstructor @@ -14,12 +26,27 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class TableStats { + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ + private String tableUuid; + + /** Database the table lives in. */ + private String databaseName; + + /** Iceberg table name (the human-readable identifier, not the UUID). */ + private String tableName; + + /** Current table-property map (e.g. maintenance opt-in flags). Never null. */ + @Builder.Default private Map tableProperties = Collections.emptyMap(); + /** Snapshot fields — overwritten on every upsert. */ private SnapshotMetrics snapshot; - /** Delta fields — accumulated across commit events. */ + /** Delta fields — accumulated across commit events. Null when read from a current-state row. */ private CommitDelta delta; + /** When the current snapshot was last written. Stamped server-side on every upsert. */ + private Instant updatedAt; + /** Project to the DB-layer {@link com.linkedin.openhouse.optimizer.db.SnapshotMetrics} object. */ public com.linkedin.openhouse.optimizer.db.SnapshotMetrics toSnapshotRow() { return snapshot == null ? null : snapshot.toDb(); From a6045b5534b39b14299b4eb36a61c3f872ef3ab6 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:06:06 -0700 Subject: [PATCH 55/81] =?UTF-8?q?feat(optimizer):=20add=20TableStats?= =?UTF-8?q?=E2=86=94TableStatsRow=20conversion=20on=20model?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TableStats.toRow() / fromRow() let the service operate purely on the self-describing model.TableStats type instead of going through Table. Existing toSnapshotRow / toDeltaRow / fromRows helpers are preserved for the history path. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/optimizer/model/TableStats.java | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index eb11c9d25..847f5a00e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -47,6 +47,38 @@ public class TableStats { /** When the current snapshot was last written. Stamped server-side on every upsert. */ private Instant updatedAt; + /** + * Project to the current-state {@code table_stats} row. Snapshot only; deltas live on history. + */ + public com.linkedin.openhouse.optimizer.db.TableStatsRow toRow() { + return com.linkedin.openhouse.optimizer.db.TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .snapshot(snapshot == null ? null : snapshot.toDb()) + .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) + .updatedAt(updatedAt) + .build(); + } + + /** + * Build a {@link TableStats} from a current-state DB row. {@link #delta} is left {@code null}. + */ + public static TableStats fromRow(com.linkedin.openhouse.optimizer.db.TableStatsRow row) { + if (row == null) { + return null; + } + return TableStats.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + .snapshot(SnapshotMetrics.fromDb(row.getSnapshot())) + .updatedAt(row.getUpdatedAt()) + .build(); + } + /** Project to the DB-layer {@link com.linkedin.openhouse.optimizer.db.SnapshotMetrics} object. */ public com.linkedin.openhouse.optimizer.db.SnapshotMetrics toSnapshotRow() { return snapshot == null ? null : snapshot.toDb(); From db5921e038d22f5e2191ee5766c3ca8aefac5bfd Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:07:59 -0700 Subject: [PATCH 56/81] refactor(optimizer): service stats methods take/return TableStats, not Table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OptimizerDataService.upsertTableStats / getTableStats / listTableStats now operate on model.TableStats. The service stays decoupled from Table — stats are stats, not tables. Conversion to TableStatsRow goes through TableStats.toRow / fromRow. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../service/OptimizerDataService.java | 12 ++--- .../service/OptimizerDataServiceImpl.java | 34 ++++++------ .../service/OptimizerDataServiceImplTest.java | 54 +++++++------------ 3 files changed, 40 insertions(+), 60 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index e8a4da86e..5d5edaee2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -3,9 +3,9 @@ import com.linkedin.openhouse.optimizer.model.HistoryStatus; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import java.time.Instant; import java.util.List; @@ -50,20 +50,20 @@ List listTableOperations( // --- TableStats --- /** - * Create or update the stats row for {@code table.getTableUuid()}. Fully idempotent: the same + * Create or update the stats row for {@code stats.getTableUuid()}. Fully idempotent: the same * call overwrites the previous snapshot with the latest commit values. The service stamps {@link - * Table#getUpdatedAt()} server-side and returns the resulting {@link Table}. + * TableStats#getUpdatedAt()} server-side and returns the resulting {@link TableStats}. */ - Table upsertTableStats(Table table); + TableStats upsertTableStats(TableStats stats); /** Return the stats row for {@code tableUuid}, or empty if none exists. */ - Optional

getTableStats(String tableUuid); + Optional getTableStats(String tableUuid); /** * List stats rows matching the given filters. Every parameter is optional — pass {@link * Optional#empty()} to skip that filter. No filters returns all rows. */ - List
listTableStats( + List listTableStats( Optional databaseName, Optional tableName, Optional tableUuid); /** diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 87f300192..633411e98 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -5,7 +5,6 @@ import com.linkedin.openhouse.optimizer.model.HistoryStatus; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import com.linkedin.openhouse.optimizer.model.TableStats; @@ -90,10 +89,9 @@ public Optional getTableOperation(String id) { @Override @Transactional - public Table upsertTableStats(Table table) { + public TableStats upsertTableStats(TableStats stats) { Instant now = Instant.now(); - String tableUuid = table.getTableUuid(); - TableStats stats = table.getStats(); + String tableUuid = stats.getTableUuid(); TableStatsRow row = statsRepository @@ -102,40 +100,40 @@ public Table upsertTableStats(Table table) { existing -> existing .toBuilder() - .databaseName(table.getDatabaseName()) - .tableName(table.getTableId()) - .snapshot(stats == null ? null : stats.toSnapshotRow()) - .tableProperties(table.getTableProperties()) + .databaseName(stats.getDatabaseName()) + .tableName(stats.getTableName()) + .snapshot(stats.toSnapshotRow()) + .tableProperties(stats.getTableProperties()) .updatedAt(now) .build()) - .orElse(table.toBuilder().updatedAt(now).build().toRow()); + .orElse(stats.toBuilder().updatedAt(now).build().toRow()); TableStatsRow saved = statsRepository.save(row); statsHistoryRepository.save( TableStatsHistoryRow.builder() .id(UUID.randomUUID().toString()) .tableUuid(tableUuid) - .databaseName(table.getDatabaseName()) - .tableName(table.getTableId()) - .snapshot(stats == null ? null : stats.toSnapshotRow()) - .delta(stats == null ? null : stats.toDeltaRow()) + .databaseName(stats.getDatabaseName()) + .tableName(stats.getTableName()) + .snapshot(stats.toSnapshotRow()) + .delta(stats.toDeltaRow()) .recordedAt(now) .build()); - return Table.fromRow(saved); + return TableStats.fromRow(saved); } @Override - public Optional
getTableStats(String tableUuid) { - return statsRepository.findById(tableUuid).map(Table::fromRow); + public Optional getTableStats(String tableUuid) { + return statsRepository.findById(tableUuid).map(TableStats::fromRow); } @Override - public List
listTableStats( + public List listTableStats( Optional databaseName, Optional tableName, Optional tableUuid) { return statsRepository .find(databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null)).stream() - .map(Table::fromRow) + .map(TableStats::fromRow) .collect(Collectors.toList()); } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 9d653e21d..b329459ad 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -7,7 +7,6 @@ import com.linkedin.openhouse.optimizer.model.HistoryStatus; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import com.linkedin.openhouse.optimizer.model.TableStats; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; @@ -78,23 +77,20 @@ void completeOperation_notFound_returnsEmpty() { @Test void upsertTableStats_createsNewRow() { String tableUuid = UUID.randomUUID().toString(); - Table input = - Table.builder() + TableStats input = + TableStats.builder() .tableUuid(tableUuid) .databaseName("db1") - .tableId("tbl1") + .tableName("tbl1") .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(1024L).build()) - .build()) + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(1024L).build()) .build(); - Table result = service.upsertTableStats(input); + TableStats result = service.upsertTableStats(input); assertThat(result.getTableUuid()).isEqualTo(tableUuid); assertThat(result.getDatabaseName()).isEqualTo("db1"); - assertThat(result.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(result.getSnapshot().getTableSizeBytes()).isEqualTo(1024L); assertThat(result.getTableProperties()) .containsEntry("maintenance.optimizer.ofd.enabled", "true"); assertThat(result.getUpdatedAt()).isNotNull(); @@ -104,41 +100,27 @@ void upsertTableStats_createsNewRow() { @Test void upsertTableStats_updatesExistingRow_andAppendsHistory() { String tableUuid = UUID.randomUUID().toString(); - Table first = - Table.builder() + TableStats first = + TableStats.builder() .tableUuid(tableUuid) .databaseName("db1") - .tableId("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .delta( - TableStats.CommitDelta.builder() - .numFilesAdded(5L) - .numFilesDeleted(1L) - .build()) - .build()) + .tableName("tbl1") + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .delta(TableStats.CommitDelta.builder().numFilesAdded(5L).numFilesDeleted(1L).build()) .build(); - Table second = - Table.builder() + TableStats second = + TableStats.builder() .tableUuid(tableUuid) .databaseName("db1") - .tableId("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .delta( - TableStats.CommitDelta.builder() - .numFilesAdded(3L) - .numFilesDeleted(0L) - .build()) - .build()) + .tableName("tbl1") + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(0L).build()) .build(); service.upsertTableStats(first); - Table result = service.upsertTableStats(second); + TableStats result = service.upsertTableStats(second); - assertThat(result.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(200L); + assertThat(result.getSnapshot().getTableSizeBytes()).isEqualTo(200L); assertThat(statsRepository.findAll()).hasSize(1); List history = From 3aebf64b743fb88b2d92a7d623ed70b5dbdee981 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:32:00 -0700 Subject: [PATCH 57/81] chore(optimizer): enable toBuilder on model.Table and model.TableOperationsHistory Moved down from opt-2. The service-layer code (opt-2) uses .toBuilder() on both types; the lombok annotation that enables it belongs on the PR that owns model/. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../main/java/com/linkedin/openhouse/optimizer/model/Table.java | 2 +- .../openhouse/optimizer/model/TableOperationsHistory.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index bca7e2420..089a52982 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -17,7 +17,7 @@ * com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper#toTable} at the DB boundary. */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class Table { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index fe5bee5f7..c8950ee26 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -13,7 +13,7 @@ * components that need to reason about completed operations (e.g., scheduling-cadence analyzers). */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class TableOperationsHistory { From bf30f86e18a8f53f185b2c00fb4b0880847a976d Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:33:19 -0700 Subject: [PATCH 58/81] chore(optimizer): cascade toBuilder annotations from opt-0 to opt-1 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../main/java/com/linkedin/openhouse/optimizer/model/Table.java | 2 +- .../openhouse/optimizer/model/TableOperationsHistory.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index 659dd18da..149128f44 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -18,7 +18,7 @@ * types know nothing about model/ or api/. */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class Table { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index 42a48479a..8cbfb6ff7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -14,7 +14,7 @@ * components that need to reason about completed operations (e.g., scheduling-cadence analyzers). */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class TableOperationsHistory { From b6c7f42774a61214cdabe6d01384b89c685cda35 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 10:32:28 -0700 Subject: [PATCH 59/81] refactor(optimizer): drop fileCount enrichment from model.TableOperation TableOperation becomes a pure operation record. Consumers (scheduler) look up TableStats at the point they need it, rather than carrying enrichment data on the model type. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/optimizer/model/TableOperation.java | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java index 1f14dddff..fe91c38d0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -15,10 +15,6 @@ *

Pure internal-model type — no references to wire-API or DB types. Cross-layer construction * happens via {@link com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper} (DB boundary) or * {@link com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper} (API boundary). - * - *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., - * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not - * carry it. */ @Data @Builder @@ -50,12 +46,6 @@ public class TableOperation { /** When the scheduler last submitted a job for this operation. */ private Instant scheduledAt; - /** - * Number of current data files on the table at evaluation time. Non-persisted enrichment; - * populated by consumers that need it. Null when not enriched. - */ - private Long fileCount; - /** Create a new PENDING operation for the given table and operation type. */ public static TableOperation pending(Table table, OperationType operationType) { return TableOperation.builder() From 2b06c92e0cb3f5eaf0ab8f205dcb141eb9c47650 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 14:44:35 -0700 Subject: [PATCH 60/81] feat(repo): add findClaimedIds for transactional batch-claim verification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit markSchedulingBatch returns only a count of rows transitioned; callers that need to know *which* rows they own must re-query. findClaimedIds takes the same id list + scheduledAt watermark passed to the UPDATE and returns the subset whose SCHEDULING transition matches that watermark — i.e. the rows this caller actually claimed in this call. Used by the scheduler to subset its bin to actually-claimed operations before submitting the Spark job; without this the scheduler can launch a job for ids another instance already owns and then incorrectly mark all of them SCHEDULED. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../repository/TableOperationsRepository.java | 19 +++++- .../TableOperationsRepositoryTest.java | 58 +++++++++++++++++++ 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 8baddfe42..513006bf6 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -33,8 +33,9 @@ List find( /** * Batch CAS: PENDING → SCHEDULING for every {@code id} still in PENDING. Returns the number of - * rows transitioned. Rows already claimed by another instance are skipped silently; callers must - * re-query if they need the precise list. + * rows transitioned. Rows already claimed by another instance are skipped silently; pair this + * call with {@link #findClaimedIds(List, Instant)} (using the same {@code scheduledAt}) to get + * the precise list of rows this caller now owns. */ @Modifying @Query( @@ -46,6 +47,20 @@ List find( int markSchedulingBatch( @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); + /** + * Return the subset of {@code ids} that are currently {@code SCHEDULING} with the given {@code + * scheduledAt} watermark. Used after {@link #markSchedulingBatch(List, Instant)} to determine + * which rows this caller actually claimed (vs. rows another instance owns or rows that no longer + * exist). + */ + @Query( + "SELECT r.id FROM TableOperationsRow r " + + "WHERE r.id IN :ids " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING " + + "AND r.scheduledAt = :scheduledAt") + List findClaimedIds( + @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); + /** * Batch CAS: SCHEDULING → SCHEDULED with the given {@code jobId} for every {@code id} still in * SCHEDULING. Returns the number of rows transitioned. diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index 44a03ba9e..bfe3fc437 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -105,6 +105,64 @@ void find_byStatus() { assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); } + @Test + void findClaimedIds_returnsOnlyClaimedSubset() { + String idA = UUID.randomUUID().toString(); + String idB = UUID.randomUUID().toString(); + String idC = UUID.randomUUID().toString(); + repository.save(pending(idA)); + repository.save(pending(idB)); + // idC is already SCHEDULING with a different scheduledAt — must NOT appear. + repository.save( + TableOperationsRow.builder() + .id(idC) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl_c") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULING) + .createdAt(Instant.now()) + .scheduledAt(Instant.now().minusSeconds(60)) + .build()); + + Instant now = Instant.now(); + repository.markSchedulingBatch(List.of(idA, idB, idC), now); + + List claimed = repository.findClaimedIds(List.of(idA, idB, idC), now); + assertThat(claimed).containsExactlyInAnyOrder(idA, idB); + } + + @Test + void findClaimedIds_emptyWhenNothingClaimed() { + String id = UUID.randomUUID().toString(); + repository.save( + TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl_x") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .scheduledAt(Instant.now()) + .build()); + + List claimed = repository.findClaimedIds(List.of(id), Instant.now()); + assertThat(claimed).isEmpty(); + } + + private TableOperationsRow pending(String id) { + return TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl_" + id) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + } + @Test void find_byDatabaseAndTable() { repository.save( From 437a0ed84a2fa7a53ea827b241404f60d20ac230 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 19 May 2026 13:35:27 -0700 Subject: [PATCH 61/81] refactor(optimizer): add Dto suffix to all api/model classes (PR #527 review) Per @abhisheknath2011 review comment 3262776356: > "We could change all the internal model add Dto suffix something like > TableOperationsDto. This aligns with the existing services codebase." Renames (suffix added): - CompleteOperationRequest -> CompleteOperationRequestDto - UpsertTableStatsRequest -> UpsertTableStatsRequestDto - OperationType (enum) -> OperationTypeDto - OperationStatus (enum) -> OperationStatusDto - HistoryStatus (enum) -> HistoryStatusDto - TableStats (inner payload) -> TableStatsPayloadDto - TableStats.SnapshotMetrics -> TableStatsPayloadDto.SnapshotMetricsDto - TableStats.CommitDelta -> TableStatsPayloadDto.CommitDeltaDto Cross-reference updates inside api/model. Internal model layer (services/optimizer/.../model/) is intentionally unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- ....java => CompleteOperationRequestDto.java} | 6 ++--- ...storyStatus.java => HistoryStatusDto.java} | 6 ++--- ...ionStatus.java => OperationStatusDto.java} | 6 ++--- ...erationType.java => OperationTypeDto.java} | 6 ++--- .../api/model/TableOperationsDto.java | 8 +++--- .../api/model/TableOperationsHistoryDto.java | 8 +++--- .../optimizer/api/model/TableStatsDto.java | 8 +++--- .../api/model/TableStatsHistoryDto.java | 4 +-- ...leStats.java => TableStatsPayloadDto.java} | 27 ++++++++++--------- ...t.java => UpsertTableStatsRequestDto.java} | 4 +-- 10 files changed, 42 insertions(+), 41 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{CompleteOperationRequest.java => CompleteOperationRequestDto.java} (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{HistoryStatus.java => HistoryStatusDto.java} (73%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationStatus.java => OperationStatusDto.java} (87%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationType.java => OperationTypeDto.java} (72%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{TableStats.java => TableStatsPayloadDto.java} (86%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{UpsertTableStatsRequest.java => UpsertTableStatsRequestDto.java} (95%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java index 0add634b5..0db7a8a37 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java @@ -25,13 +25,13 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class CompleteOperationRequest { +public class CompleteOperationRequestDto { /** Operation row's UUID — the primary lookup key. */ private String operationId; /** Terminal outcome for this single operation. */ - private HistoryStatus status; + private HistoryStatusDto status; /** Debug echo: stable table identity the caller believed it was completing. */ private String tableUuid; @@ -43,5 +43,5 @@ public class CompleteOperationRequest { private String tableName; /** Debug echo: operation type. */ - private OperationType operationType; + private OperationTypeDto operationType; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java similarity index 73% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java index 0c9ff95da..5a4421332 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Terminal states for a completed Spark maintenance job. */ -public enum HistoryStatus { +public enum HistoryStatusDto { /** The Spark job for this operation completed successfully. */ SUCCESS, @@ -15,7 +15,7 @@ public com.linkedin.openhouse.optimizer.model.HistoryStatus toModel() { } /** Build the api-layer enum from the internal-model counterpart. */ - public static HistoryStatus fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { - return v == null ? null : HistoryStatus.valueOf(v.name()); + public static HistoryStatusDto fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { + return v == null ? null : HistoryStatusDto.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java similarity index 87% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java index 300c28263..89fa9f1b0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Lifecycle states for a table operation recommendation. */ -public enum OperationStatus { +public enum OperationStatusDto { /** Recommended by the Analyzer but not yet claimed by the Scheduler. */ PENDING, @@ -25,8 +25,8 @@ public com.linkedin.openhouse.optimizer.model.OperationStatus toModel() { } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationStatus fromModel( + public static OperationStatusDto fromModel( com.linkedin.openhouse.optimizer.model.OperationStatus v) { - return v == null ? null : OperationStatus.valueOf(v.name()); + return v == null ? null : OperationStatusDto.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java similarity index 72% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java index 5f325e712..210010eb0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Maintenance operation types supported by the continuous optimizer. */ -public enum OperationType { +public enum OperationTypeDto { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION; @@ -11,7 +11,7 @@ public com.linkedin.openhouse.optimizer.model.OperationType toModel() { } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationType fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { - return v == null ? null : OperationType.valueOf(v.name()); + public static OperationTypeDto fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { + return v == null ? null : OperationTypeDto.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index db8ef1039..880fe7926 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -27,10 +27,10 @@ public class TableOperationsDto { private String tableName; /** The type of maintenance operation (e.g. ORPHAN_FILES_DELETION). */ - private OperationType operationType; + private OperationTypeDto operationType; /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ - private OperationStatus status; + private OperationStatusDto status; /** Server-set when the row is first created by the Analyzer. */ private Instant createdAt; @@ -65,8 +65,8 @@ public static TableOperationsDto fromModel(TableOperation op) { .tableUuid(op.getTableUuid()) .databaseName(op.getDatabaseName()) .tableName(op.getTableName()) - .operationType(OperationType.fromModel(op.getOperationType())) - .status(OperationStatus.fromModel(op.getStatus())) + .operationType(OperationTypeDto.fromModel(op.getOperationType())) + .status(OperationStatusDto.fromModel(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) .build(); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 935435040..652a58b3f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -27,13 +27,13 @@ public class TableOperationsHistoryDto { private String tableName; /** The type of maintenance operation this history row records. */ - private OperationType operationType; + private OperationTypeDto operationType; /** When the operation completed, as recorded by the complete endpoint. */ private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private HistoryStatus status; + private HistoryStatusDto status; /** Convert to the internal-model counterpart. */ public TableOperationsHistory toModel() { @@ -58,9 +58,9 @@ public static TableOperationsHistoryDto fromModel(TableOperationsHistory h) { .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .operationType(OperationType.fromModel(h.getOperationType())) + .operationType(OperationTypeDto.fromModel(h.getOperationType())) .completedAt(h.getCompletedAt()) - .status(HistoryStatus.fromModel(h.getStatus())) + .status(HistoryStatusDto.fromModel(h.getStatus())) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 244050b04..6852081ab 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -25,7 +25,7 @@ public class TableStatsDto { private String tableName; /** Combined snapshot + delta stats payload, stored as JSON. */ - private TableStats stats; + private TableStatsPayloadDto stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; @@ -57,9 +57,9 @@ public static TableStatsDto fromModel(com.linkedin.openhouse.optimizer.model.Tab .databaseName(m.getDatabaseName()) .tableName(m.getTableName()) .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.fromModel(m.getSnapshot())) - .delta(TableStats.CommitDelta.fromModel(m.getDelta())) + TableStatsPayloadDto.builder() + .snapshot(TableStatsPayloadDto.SnapshotMetricsDto.fromModel(m.getSnapshot())) + .delta(TableStatsPayloadDto.CommitDeltaDto.fromModel(m.getDelta())) .build()) .tableProperties(m.getTableProperties()) .updatedAt(m.getUpdatedAt()) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index b5f971bbf..bac3782ff 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -27,7 +27,7 @@ public class TableStatsHistoryDto { private String tableName; /** Snapshot + delta stats from this commit event. */ - private TableStats stats; + private TableStatsPayloadDto stats; /** When this history row was recorded. */ private Instant recordedAt; @@ -54,7 +54,7 @@ public static TableStatsHistoryDto fromModel(TableStatsHistory h) { .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .stats(TableStats.fromModel(h.getStats())) + .stats(TableStatsPayloadDto.fromModel(h.getStats())) .recordedAt(h.getRecordedAt()) .build(); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java similarity index 86% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java index c75d21d75..692cb7247 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java @@ -17,13 +17,13 @@ @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { +public class TableStatsPayloadDto { /** Snapshot fields — overwritten on every upsert. */ - private SnapshotMetrics snapshot; + private SnapshotMetricsDto snapshot; /** Delta fields — accumulated across commit events. */ - private CommitDelta delta; + private CommitDeltaDto delta; /** Convert to the internal-model counterpart. */ public com.linkedin.openhouse.optimizer.model.TableStats toModel() { @@ -34,13 +34,14 @@ public com.linkedin.openhouse.optimizer.model.TableStats toModel() { } /** Build the api-layer payload from the internal-model counterpart. */ - public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + public static TableStatsPayloadDto fromModel( + com.linkedin.openhouse.optimizer.model.TableStats m) { if (m == null) { return null; } - return TableStats.builder() - .snapshot(SnapshotMetrics.fromModel(m.getSnapshot())) - .delta(CommitDelta.fromModel(m.getDelta())) + return TableStatsPayloadDto.builder() + .snapshot(SnapshotMetricsDto.fromModel(m.getSnapshot())) + .delta(CommitDeltaDto.fromModel(m.getDelta())) .build(); } @@ -50,7 +51,7 @@ public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableS @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) - public static class SnapshotMetrics { + public static class SnapshotMetricsDto { /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; @@ -75,12 +76,12 @@ public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel } /** Build the api-layer inner object from the internal-model counterpart. */ - public static SnapshotMetrics fromModel( + public static SnapshotMetricsDto fromModel( com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics m) { if (m == null) { return null; } - return SnapshotMetrics.builder() + return SnapshotMetricsDto.builder() .tableVersion(m.getTableVersion()) .tableLocation(m.getTableLocation()) .tableSizeBytes(m.getTableSizeBytes()) @@ -95,7 +96,7 @@ public static SnapshotMetrics fromModel( @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) - public static class CommitDelta { + public static class CommitDeltaDto { /** Number of data files this commit added to the table. */ private Long numFilesAdded; @@ -120,12 +121,12 @@ public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { } /** Build the api-layer inner object from the internal-model counterpart. */ - public static CommitDelta fromModel( + public static CommitDeltaDto fromModel( com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta m) { if (m == null) { return null; } - return CommitDelta.builder() + return CommitDeltaDto.builder() .numFilesAdded(m.getNumFilesAdded()) .numFilesDeleted(m.getNumFilesDeleted()) .addedSizeBytes(m.getAddedSizeBytes()) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java similarity index 95% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java index 08b42050f..75753fa69 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java @@ -17,7 +17,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class UpsertTableStatsRequest { +public class UpsertTableStatsRequestDto { /** Denormalized database name for display. */ private String databaseName; @@ -26,7 +26,7 @@ public class UpsertTableStatsRequest { private String tableName; /** Combined snapshot + delta stats payload from this commit. */ - private TableStats stats; + private TableStatsPayloadDto stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; From eedf6d09f619c5ca153dc0bc0490be26bfb25673 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 19 May 2026 13:37:56 -0700 Subject: [PATCH 62/81] refactor(optimizer): update controllers for renamed api/model Dto types Follow-up to opt-0 Dto rename: controllers now import the renamed types. - TableOperationsController: CompleteOperationRequest -> CompleteOperationRequestDto, OperationType -> OperationTypeDto, OperationStatus -> OperationStatusDto. - TableStatsController: UpsertTableStatsRequest -> UpsertTableStatsRequestDto. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../controller/TableOperationsController.java | 16 ++++++++-------- .../api/controller/TableStatsController.java | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index 19e878910..f963380da 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -1,8 +1,8 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequestDto; +import com.linkedin.openhouse.optimizer.api.model.OperationStatusDto; +import com.linkedin.openhouse.optimizer.api.model.OperationTypeDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; @@ -36,7 +36,7 @@ public class TableOperationsController { */ @PostMapping("/complete") public ResponseEntity completeOperation( - @RequestBody CompleteOperationRequest request) { + @RequestBody CompleteOperationRequestDto request) { return service .completeOperation( request.getOperationId(), @@ -64,16 +64,16 @@ public ResponseEntity getTableOperation(@PathVariable String */ @GetMapping public ResponseEntity> listTableOperations( - @RequestParam(required = false) OperationType operationType, - @RequestParam(required = false) OperationStatus status, + @RequestParam(required = false) OperationTypeDto operationType, + @RequestParam(required = false) OperationStatusDto status, @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { List result = service .listTableOperations( - Optional.ofNullable(operationType).map(OperationType::toModel), - Optional.ofNullable(status).map(OperationStatus::toModel), + Optional.ofNullable(operationType).map(OperationTypeDto::toModel), + Optional.ofNullable(status).map(OperationStatusDto::toModel), Optional.ofNullable(databaseName), Optional.ofNullable(tableName), Optional.ofNullable(tableUuid)) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index aa299b015..469170d0a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -2,7 +2,7 @@ import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; +import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequestDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.time.Instant; import java.util.List; @@ -32,7 +32,7 @@ public class TableStatsController { */ @PutMapping("/{tableUuid}") public ResponseEntity upsertTableStats( - @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { + @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequestDto request) { return ResponseEntity.ok( TableStatsDto.fromModel(service.upsertTableStats(request.toModel(tableUuid)))); } From 4f98c228b6ea661291fb924ed870d41e82757159 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 19 May 2026 13:56:57 -0700 Subject: [PATCH 63/81] refactor(optimizer): rename api.model package to api.spec (PR #527 review) Per @abhisheknath2011 review comment 3262769497: > "Can we change the client side API to api.spec instead of api.model? > This also aligns with existing services." Mechanical package rename. The 10 api wire types move from services/optimizer/.../api/model/ to services/optimizer/.../api/spec/. No type or signature changes. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/{model => spec}/CompleteOperationRequestDto.java | 2 +- .../optimizer/api/{model => spec}/HistoryStatusDto.java | 2 +- .../optimizer/api/{model => spec}/OperationStatusDto.java | 2 +- .../optimizer/api/{model => spec}/OperationTypeDto.java | 2 +- .../optimizer/api/{model => spec}/TableOperationsDto.java | 2 +- .../api/{model => spec}/TableOperationsHistoryDto.java | 2 +- .../openhouse/optimizer/api/{model => spec}/TableStatsDto.java | 2 +- .../optimizer/api/{model => spec}/TableStatsHistoryDto.java | 2 +- .../optimizer/api/{model => spec}/TableStatsPayloadDto.java | 2 +- .../api/{model => spec}/UpsertTableStatsRequestDto.java | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/CompleteOperationRequestDto.java (96%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/HistoryStatusDto.java (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/OperationStatusDto.java (95%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/OperationTypeDto.java (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableOperationsDto.java (97%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableOperationsHistoryDto.java (97%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableStatsDto.java (97%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableStatsHistoryDto.java (96%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableStatsPayloadDto.java (98%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/UpsertTableStatsRequestDto.java (96%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java index 0db7a8a37..9dca54a8e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java index 5a4421332..034be4cf2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; /** Terminal states for a completed Spark maintenance job. */ public enum HistoryStatusDto { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java similarity index 95% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java index 89fa9f1b0..f02ee2815 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; /** Lifecycle states for a table operation recommendation. */ public enum OperationStatusDto { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java index 210010eb0..4e057b232 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; /** Maintenance operation types supported by the continuous optimizer. */ public enum OperationTypeDto { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java similarity index 97% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java index 880fe7926..496f59f42 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.linkedin.openhouse.optimizer.model.TableOperation; import java.time.Instant; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java similarity index 97% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java index 652a58b3f..8b508bf36 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.time.Instant; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java similarity index 97% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java index 6852081ab..165ae47dc 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import java.time.Instant; import java.util.Collections; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java index bac3782ff..9e7c44c56 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import java.time.Instant; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java similarity index 98% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java index 692cb7247..761471f91 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java index 75753fa69..3e1fe4764 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import java.util.Collections; import java.util.Map; From 231efde0ec369c12699a23684d8a38fc3ec5566d Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 19 May 2026 13:58:15 -0700 Subject: [PATCH 64/81] refactor(optimizer): update controller imports for api.model -> api.spec rename Follow-up to opt-0 package rename: controllers now import from com.linkedin.openhouse.optimizer.api.spec instead of api.model. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/controller/TableOperationsController.java | 10 +++++----- .../controller/TableOperationsHistoryController.java | 2 +- .../optimizer/api/controller/TableStatsController.java | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index f963380da..accf6d543 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -1,10 +1,10 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequestDto; -import com.linkedin.openhouse.optimizer.api.model.OperationStatusDto; -import com.linkedin.openhouse.optimizer.api.model.OperationTypeDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.spec.CompleteOperationRequestDto; +import com.linkedin.openhouse.optimizer.api.spec.OperationStatusDto; +import com.linkedin.openhouse.optimizer.api.spec.OperationTypeDto; +import com.linkedin.openhouse.optimizer.api.spec.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.Optional; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 0c6f4834c..124697f10 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.stream.Collectors; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 469170d0a..19dcbabb9 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -1,8 +1,8 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequestDto; +import com.linkedin.openhouse.optimizer.api.spec.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.spec.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.api.spec.UpsertTableStatsRequestDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.time.Instant; import java.util.List; From b31decf8a6cb93351ce5fd153b2740f1ea0329e3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 14:51:19 -0700 Subject: [PATCH 65/81] refactor(optimizer): move Dto suffix from api/spec to model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reversal of an earlier inconsistency surfaced by abhisheknath2011 in the PR #527 review thread on api/spec/HistoryStatusDto.java. The api wire types are the canonical contract; they should carry the canonical name. The internal-model types are transfer objects between layers and now carry the Dto suffix. api/spec/ — Dto stripped from class + filename (10 files): CompleteOperationRequestDto -> CompleteOperationRequest HistoryStatusDto -> HistoryStatus OperationStatusDto -> OperationStatus OperationTypeDto -> OperationType TableOperationsDto -> TableOperations TableOperationsHistoryDto -> TableOperationsHistory TableStatsDto -> TableStats TableStatsHistoryDto -> TableStatsHistory TableStatsPayloadDto -> TableStatsPayload UpsertTableStatsRequestDto -> UpsertTableStatsRequest model/ — Dto added to class + filename (8 files): HistoryStatus -> HistoryStatusDto OperationStatus -> OperationStatusDto OperationType -> OperationTypeDto Table -> TableDto TableOperation -> TableOperationDto TableOperationsHistory -> TableOperationsHistoryDto TableStats -> TableStatsDto TableStatsHistory -> TableStatsHistoryDto Both renames land on opt-0 because opt-0 owns api/spec/ and model/. Cascade up the stack in follow-up commits. Out of scope here: HistoryStatus enum value additions (CANCELED, QUEUED) also raised in the same review thread; separate semantic change. Co-Authored-By: Claude Opus 4.7 --- ...Dto.java => CompleteOperationRequest.java} | 6 ++--- ...storyStatusDto.java => HistoryStatus.java} | 10 ++++---- ...ionStatusDto.java => OperationStatus.java} | 12 +++++----- ...erationTypeDto.java => OperationType.java} | 10 ++++---- ...perationsDto.java => TableOperations.java} | 20 ++++++++-------- ...ryDto.java => TableOperationsHistory.java} | 20 ++++++++-------- .../{TableStatsDto.java => TableStats.java} | 22 +++++++++-------- ...HistoryDto.java => TableStatsHistory.java} | 16 ++++++------- ...PayloadDto.java => TableStatsPayload.java} | 24 +++++++++---------- ...tDto.java => UpsertTableStatsRequest.java} | 16 +++++++------ ...storyStatus.java => HistoryStatusDto.java} | 2 +- ...ionStatus.java => OperationStatusDto.java} | 2 +- ...erationType.java => OperationTypeDto.java} | 2 +- .../model/{Table.java => TableDto.java} | 6 ++--- ...eOperation.java => TableOperationDto.java} | 16 ++++++------- ...ry.java => TableOperationsHistoryDto.java} | 8 +++---- .../{TableStats.java => TableStatsDto.java} | 2 +- ...History.java => TableStatsHistoryDto.java} | 4 ++-- 18 files changed, 101 insertions(+), 97 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{CompleteOperationRequestDto.java => CompleteOperationRequest.java} (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{HistoryStatusDto.java => HistoryStatus.java} (52%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{OperationStatusDto.java => OperationStatus.java} (73%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{OperationTypeDto.java => OperationType.java} (50%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableOperationsDto.java => TableOperations.java} (80%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableOperationsHistoryDto.java => TableOperationsHistory.java} (82%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableStatsDto.java => TableStats.java} (70%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableStatsHistoryDto.java => TableStatsHistory.java} (82%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableStatsPayloadDto.java => TableStatsPayload.java} (81%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{UpsertTableStatsRequestDto.java => UpsertTableStatsRequest.java} (71%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{HistoryStatus.java => HistoryStatusDto.java} (93%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{OperationStatus.java => OperationStatusDto.java} (95%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{OperationType.java => OperationTypeDto.java} (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{Table.java => TableDto.java} (93%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableOperation.java => TableOperationDto.java} (80%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableOperationsHistory.java => TableOperationsHistoryDto.java} (82%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableStats.java => TableStatsDto.java} (99%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableStatsHistory.java => TableStatsHistoryDto.java} (94%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java index 9dca54a8e..15112882d 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java @@ -25,13 +25,13 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class CompleteOperationRequestDto { +public class CompleteOperationRequest { /** Operation row's UUID — the primary lookup key. */ private String operationId; /** Terminal outcome for this single operation. */ - private HistoryStatusDto status; + private HistoryStatus status; /** Debug echo: stable table identity the caller believed it was completing. */ private String tableUuid; @@ -43,5 +43,5 @@ public class CompleteOperationRequestDto { private String tableName; /** Debug echo: operation type. */ - private OperationTypeDto operationType; + private OperationType operationType; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatus.java similarity index 52% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatus.java index 034be4cf2..1d799818f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.spec; /** Terminal states for a completed Spark maintenance job. */ -public enum HistoryStatusDto { +public enum HistoryStatus { /** The Spark job for this operation completed successfully. */ SUCCESS, @@ -10,12 +10,12 @@ public enum HistoryStatusDto { FAILED; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.HistoryStatus toModel() { - return com.linkedin.openhouse.optimizer.model.HistoryStatus.valueOf(name()); + public com.linkedin.openhouse.optimizer.model.HistoryStatusDto toModel() { + return com.linkedin.openhouse.optimizer.model.HistoryStatusDto.valueOf(name()); } /** Build the api-layer enum from the internal-model counterpart. */ - public static HistoryStatusDto fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { - return v == null ? null : HistoryStatusDto.valueOf(v.name()); + public static HistoryStatus fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatusDto v) { + return v == null ? null : HistoryStatus.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatus.java similarity index 73% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatus.java index f02ee2815..b1cbe42b0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.spec; /** Lifecycle states for a table operation recommendation. */ -public enum OperationStatusDto { +public enum OperationStatus { /** Recommended by the Analyzer but not yet claimed by the Scheduler. */ PENDING, @@ -20,13 +20,13 @@ public enum OperationStatusDto { CANCELED; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.OperationStatus toModel() { - return com.linkedin.openhouse.optimizer.model.OperationStatus.valueOf(name()); + public com.linkedin.openhouse.optimizer.model.OperationStatusDto toModel() { + return com.linkedin.openhouse.optimizer.model.OperationStatusDto.valueOf(name()); } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationStatusDto fromModel( - com.linkedin.openhouse.optimizer.model.OperationStatus v) { - return v == null ? null : OperationStatusDto.valueOf(v.name()); + public static OperationStatus fromModel( + com.linkedin.openhouse.optimizer.model.OperationStatusDto v) { + return v == null ? null : OperationStatus.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationType.java similarity index 50% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationType.java index 4e057b232..ea6d2797c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationType.java @@ -1,17 +1,17 @@ package com.linkedin.openhouse.optimizer.api.spec; /** Maintenance operation types supported by the continuous optimizer. */ -public enum OperationTypeDto { +public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.OperationType toModel() { - return com.linkedin.openhouse.optimizer.model.OperationType.valueOf(name()); + public com.linkedin.openhouse.optimizer.model.OperationTypeDto toModel() { + return com.linkedin.openhouse.optimizer.model.OperationTypeDto.valueOf(name()); } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationTypeDto fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { - return v == null ? null : OperationTypeDto.valueOf(v.name()); + public static OperationType fromModel(com.linkedin.openhouse.optimizer.model.OperationTypeDto v) { + return v == null ? null : OperationType.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java similarity index 80% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java index 496f59f42..60f2c3dd8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.spec; -import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -12,7 +12,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperationsDto { +public class TableOperations { /** Client-generated UUID identifying this specific operation recommendation. */ private String id; @@ -27,10 +27,10 @@ public class TableOperationsDto { private String tableName; /** The type of maintenance operation (e.g. ORPHAN_FILES_DELETION). */ - private OperationTypeDto operationType; + private OperationType operationType; /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ - private OperationStatusDto status; + private OperationStatus status; /** Server-set when the row is first created by the Analyzer. */ private Instant createdAt; @@ -42,8 +42,8 @@ public class TableOperationsDto { private String jobId; /** Convert to the internal-model counterpart. */ - public TableOperation toModel() { - return TableOperation.builder() + public TableOperationDto toModel() { + return TableOperationDto.builder() .id(id) .tableUuid(tableUuid) .databaseName(databaseName) @@ -56,17 +56,17 @@ public TableOperation toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableOperationsDto fromModel(TableOperation op) { + public static TableOperations fromModel(TableOperationDto op) { if (op == null) { return null; } - return TableOperationsDto.builder() + return TableOperations.builder() .id(op.getId()) .tableUuid(op.getTableUuid()) .databaseName(op.getDatabaseName()) .tableName(op.getTableName()) - .operationType(OperationTypeDto.fromModel(op.getOperationType())) - .status(OperationStatusDto.fromModel(op.getStatus())) + .operationType(OperationType.fromModel(op.getOperationType())) + .status(OperationStatus.fromModel(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) .build(); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistory.java similarity index 82% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistory.java index 8b508bf36..7a000f840 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistory.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.spec; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -12,7 +12,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperationsHistoryDto { +public class TableOperationsHistory { /** Same UUID as the originating {@code table_operations.id}; supplied by the caller. */ private String id; @@ -27,17 +27,17 @@ public class TableOperationsHistoryDto { private String tableName; /** The type of maintenance operation this history row records. */ - private OperationTypeDto operationType; + private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private HistoryStatusDto status; + private HistoryStatus status; /** Convert to the internal-model counterpart. */ - public TableOperationsHistory toModel() { - return TableOperationsHistory.builder() + public TableOperationsHistoryDto toModel() { + return TableOperationsHistoryDto.builder() .id(id) .tableUuid(tableUuid) .databaseName(databaseName) @@ -49,18 +49,18 @@ public TableOperationsHistory toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableOperationsHistoryDto fromModel(TableOperationsHistory h) { + public static TableOperationsHistory fromModel(TableOperationsHistoryDto h) { if (h == null) { return null; } - return TableOperationsHistoryDto.builder() + return TableOperationsHistory.builder() .id(h.getId()) .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .operationType(OperationTypeDto.fromModel(h.getOperationType())) + .operationType(OperationType.fromModel(h.getOperationType())) .completedAt(h.getCompletedAt()) - .status(HistoryStatusDto.fromModel(h.getStatus())) + .status(HistoryStatus.fromModel(h.getStatus())) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStats.java similarity index 70% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStats.java index 165ae47dc..41f44f763 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStats.java @@ -13,7 +13,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableStatsDto { +public class TableStats { /** Stable Iceberg table UUID. Primary key of the stats row. */ private String tableUuid; @@ -25,7 +25,7 @@ public class TableStatsDto { private String tableName; /** Combined snapshot + delta stats payload, stored as JSON. */ - private TableStatsPayloadDto stats; + private TableStatsPayload stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; @@ -34,9 +34,11 @@ public class TableStatsDto { private Instant updatedAt; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats toModel() { - com.linkedin.openhouse.optimizer.model.TableStats payload = - stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + public com.linkedin.openhouse.optimizer.model.TableStatsDto toModel() { + com.linkedin.openhouse.optimizer.model.TableStatsDto payload = + stats == null + ? new com.linkedin.openhouse.optimizer.model.TableStatsDto() + : stats.toModel(); return payload .toBuilder() .tableUuid(tableUuid) @@ -48,18 +50,18 @@ public com.linkedin.openhouse.optimizer.model.TableStats toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableStatsDto fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableStatsDto m) { if (m == null) { return null; } - return TableStatsDto.builder() + return TableStats.builder() .tableUuid(m.getTableUuid()) .databaseName(m.getDatabaseName()) .tableName(m.getTableName()) .stats( - TableStatsPayloadDto.builder() - .snapshot(TableStatsPayloadDto.SnapshotMetricsDto.fromModel(m.getSnapshot())) - .delta(TableStatsPayloadDto.CommitDeltaDto.fromModel(m.getDelta())) + TableStatsPayload.builder() + .snapshot(TableStatsPayload.SnapshotMetricsDto.fromModel(m.getSnapshot())) + .delta(TableStatsPayload.CommitDeltaDto.fromModel(m.getDelta())) .build()) .tableProperties(m.getTableProperties()) .updatedAt(m.getUpdatedAt()) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistory.java similarity index 82% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistory.java index 9e7c44c56..5508aca27 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistory.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.spec; -import com.linkedin.openhouse.optimizer.model.TableStatsHistory; +import com.linkedin.openhouse.optimizer.model.TableStatsHistoryDto; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -12,7 +12,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableStatsHistoryDto { +public class TableStatsHistory { /** UUID primary key set by the caller. */ private String id; @@ -27,14 +27,14 @@ public class TableStatsHistoryDto { private String tableName; /** Snapshot + delta stats from this commit event. */ - private TableStatsPayloadDto stats; + private TableStatsPayload stats; /** When this history row was recorded. */ private Instant recordedAt; /** Convert to the internal-model counterpart. */ - public TableStatsHistory toModel() { - return TableStatsHistory.builder() + public TableStatsHistoryDto toModel() { + return TableStatsHistoryDto.builder() .id(id) .tableUuid(tableUuid) .databaseName(databaseName) @@ -45,16 +45,16 @@ public TableStatsHistory toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableStatsHistoryDto fromModel(TableStatsHistory h) { + public static TableStatsHistory fromModel(TableStatsHistoryDto h) { if (h == null) { return null; } - return TableStatsHistoryDto.builder() + return TableStatsHistory.builder() .id(h.getId()) .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .stats(TableStatsPayloadDto.fromModel(h.getStats())) + .stats(TableStatsPayload.fromModel(h.getStats())) .recordedAt(h.getRecordedAt()) .build(); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayload.java similarity index 81% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayload.java index 761471f91..c347bf385 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayload.java @@ -17,7 +17,7 @@ @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) -public class TableStatsPayloadDto { +public class TableStatsPayload { /** Snapshot fields — overwritten on every upsert. */ private SnapshotMetricsDto snapshot; @@ -26,20 +26,20 @@ public class TableStatsPayloadDto { private CommitDeltaDto delta; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats toModel() { - return com.linkedin.openhouse.optimizer.model.TableStats.builder() + public com.linkedin.openhouse.optimizer.model.TableStatsDto toModel() { + return com.linkedin.openhouse.optimizer.model.TableStatsDto.builder() .snapshot(snapshot == null ? null : snapshot.toModel()) .delta(delta == null ? null : delta.toModel()) .build(); } /** Build the api-layer payload from the internal-model counterpart. */ - public static TableStatsPayloadDto fromModel( - com.linkedin.openhouse.optimizer.model.TableStats m) { + public static TableStatsPayload fromModel( + com.linkedin.openhouse.optimizer.model.TableStatsDto m) { if (m == null) { return null; } - return TableStatsPayloadDto.builder() + return TableStatsPayload.builder() .snapshot(SnapshotMetricsDto.fromModel(m.getSnapshot())) .delta(CommitDeltaDto.fromModel(m.getDelta())) .build(); @@ -66,8 +66,8 @@ public static class SnapshotMetricsDto { private Long numCurrentFiles; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel() { - return com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics.builder() + public com.linkedin.openhouse.optimizer.model.TableStatsDto.SnapshotMetrics toModel() { + return com.linkedin.openhouse.optimizer.model.TableStatsDto.SnapshotMetrics.builder() .tableVersion(tableVersion) .tableLocation(tableLocation) .tableSizeBytes(tableSizeBytes) @@ -77,7 +77,7 @@ public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel /** Build the api-layer inner object from the internal-model counterpart. */ public static SnapshotMetricsDto fromModel( - com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics m) { + com.linkedin.openhouse.optimizer.model.TableStatsDto.SnapshotMetrics m) { if (m == null) { return null; } @@ -111,8 +111,8 @@ public static class CommitDeltaDto { private Long deletedSizeBytes; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { - return com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta.builder() + public com.linkedin.openhouse.optimizer.model.TableStatsDto.CommitDelta toModel() { + return com.linkedin.openhouse.optimizer.model.TableStatsDto.CommitDelta.builder() .numFilesAdded(numFilesAdded) .numFilesDeleted(numFilesDeleted) .addedSizeBytes(addedSizeBytes) @@ -122,7 +122,7 @@ public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { /** Build the api-layer inner object from the internal-model counterpart. */ public static CommitDeltaDto fromModel( - com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta m) { + com.linkedin.openhouse.optimizer.model.TableStatsDto.CommitDelta m) { if (m == null) { return null; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java similarity index 71% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java index 3e1fe4764..d1b4a5fe2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java @@ -17,7 +17,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class UpsertTableStatsRequestDto { +public class UpsertTableStatsRequest { /** Denormalized database name for display. */ private String databaseName; @@ -26,19 +26,21 @@ public class UpsertTableStatsRequestDto { private String tableName; /** Combined snapshot + delta stats payload from this commit. */ - private TableStatsPayloadDto stats; + private TableStatsPayload stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; /** - * Build the internal-model {@link com.linkedin.openhouse.optimizer.model.TableStats} described by - * this request. {@code tableUuid} comes from the URL path, not the body. {@code updatedAt} is + * Build the internal-model {@link com.linkedin.openhouse.optimizer.model.TableStatsDto} described + * by this request. {@code tableUuid} comes from the URL path, not the body. {@code updatedAt} is * left {@code null}; the service stamps it server-side at write time. */ - public com.linkedin.openhouse.optimizer.model.TableStats toModel(String tableUuid) { - com.linkedin.openhouse.optimizer.model.TableStats payload = - stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + public com.linkedin.openhouse.optimizer.model.TableStatsDto toModel(String tableUuid) { + com.linkedin.openhouse.optimizer.model.TableStatsDto payload = + stats == null + ? new com.linkedin.openhouse.optimizer.model.TableStatsDto() + : stats.toModel(); return payload .toBuilder() .tableUuid(tableUuid) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatusDto.java similarity index 93% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatusDto.java index 97b8e2992..463c62605 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatusDto.java @@ -7,7 +7,7 @@ * *

Intentionally separate from the wire-API and DB representations. */ -public enum HistoryStatus { +public enum HistoryStatusDto { /** The operation completed successfully. */ SUCCESS, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatusDto.java similarity index 95% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatusDto.java index f284fedaf..b766f7dbe 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatusDto.java @@ -7,7 +7,7 @@ * *

Intentionally separate from the wire-API and DB representations. */ -public enum OperationStatus { +public enum OperationStatusDto { /** Analyzer has written the row; not yet claimed by the scheduler. */ PENDING, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationTypeDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationTypeDto.java index 8f4fe35a8..39b299806 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationTypeDto.java @@ -5,7 +5,7 @@ * separate from the wire-API and DB representations so the internal model can evolve its set of * supported operations without churning either boundary. */ -public enum OperationType { +public enum OperationTypeDto { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableDto.java similarity index 93% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableDto.java index 089a52982..408bc4fc7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableDto.java @@ -10,7 +10,7 @@ /** * An OpenHouse table enriched with stats and properties, built by combining data sources. Consumed - * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads + * by the analyzer (decides whether to produce a {@link TableOperationDto}) and the scheduler (reads * stats for bin-packing). * *

Pure internal-model type — no references to wire-API or DB types. Construct via {@link @@ -20,7 +20,7 @@ @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor -public class Table { +public class TableDto { /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ private String tableUuid; @@ -35,7 +35,7 @@ public class Table { @Builder.Default private Map tableProperties = Collections.emptyMap(); /** Latest snapshot stats for this table. Delta is null when read from the current-state row. */ - private TableStats stats; + private TableStatsDto stats; /** When the current snapshot was last written. Stamped server-side on every upsert. */ private Instant updatedAt; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java similarity index 80% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java index fe91c38d0..8809a1b62 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java @@ -20,7 +20,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperation { +public class TableOperationDto { /** Unique operation ID (UUID). */ private String id; @@ -35,10 +35,10 @@ public class TableOperation { private String tableName; /** Operation type. */ - private OperationType operationType; + private OperationTypeDto operationType; /** Current lifecycle status. */ - private OperationStatus status; + private OperationStatusDto status; /** When this operation record was created. */ private Instant createdAt; @@ -47,21 +47,21 @@ public class TableOperation { private Instant scheduledAt; /** Create a new PENDING operation for the given table and operation type. */ - public static TableOperation pending(Table table, OperationType operationType) { - return TableOperation.builder() + public static TableOperationDto pending(TableDto table, OperationTypeDto operationType) { + return TableOperationDto.builder() .id(UUID.randomUUID().toString()) .tableUuid(table.getTableUuid()) .databaseName(table.getDatabaseName()) .tableName(table.getTableId()) .operationType(operationType) - .status(OperationStatus.PENDING) + .status(OperationStatusDto.PENDING) .createdAt(Instant.now()) .build(); } /** Return the more recently created of two operations. */ - public static TableOperation mostRecent(TableOperation a, TableOperation b) { - Comparator byCreatedAt = + public static TableOperationDto mostRecent(TableOperationDto a, TableOperationDto b) { + Comparator byCreatedAt = Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); return byCreatedAt.compare(a, b) >= 0 ? a : b; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistoryDto.java similarity index 82% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistoryDto.java index c8950ee26..e05bb641e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistoryDto.java @@ -16,7 +16,7 @@ @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor -public class TableOperationsHistory { +public class TableOperationsHistoryDto { /** Same UUID as the originating live-operations row. */ private String id; @@ -31,11 +31,11 @@ public class TableOperationsHistory { private String tableName; /** Operation type for this completed run. */ - private OperationType operationType; + private OperationTypeDto operationType; /** When the operation completed, as recorded by the complete endpoint. */ private Instant completedAt; - /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ - private HistoryStatus status; + /** Terminal outcome: {@link HistoryStatusDto#SUCCESS} or {@link HistoryStatusDto#FAILED}. */ + private HistoryStatusDto status; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsDto.java similarity index 99% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsDto.java index 906d01669..d142dcc8b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsDto.java @@ -24,7 +24,7 @@ @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { +public class TableStatsDto { /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ private String tableUuid; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistoryDto.java similarity index 94% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistoryDto.java index 53bb54d1e..5579c95ed 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistoryDto.java @@ -18,7 +18,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableStatsHistory { +public class TableStatsHistoryDto { /** UUID primary key — set by the caller, not generated server-side. */ private String id; @@ -33,7 +33,7 @@ public class TableStatsHistory { private String tableName; /** Snapshot + delta for this commit event. */ - private TableStats stats; + private TableStatsDto stats; /** When this history row was recorded. */ private Instant recordedAt; From 91e89efa44712cd0483cfd8ca0748e60e174b1f9 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 14:57:05 -0700 Subject: [PATCH 66/81] refactor(optimizer): update controller + service refs after Dto suffix swap Follow-up to the api/spec strip + model add-Dto rename on opt-0 (commit b31decf8). On opt-2 the merged content compiles after updating the consumer-side refs: api controllers (3 files): strip Dto from api type names (now canonical: TableOperations, CompleteOperationRequest, etc.) service interface + impl + test: add Dto to model type names (now: TableOperationDto, TableStatsDto, HistoryStatusDto, etc.) Co-Authored-By: Claude Opus 4.7 --- .../controller/TableOperationsController.java | 34 ++++----- .../TableOperationsHistoryController.java | 14 ++-- .../api/controller/TableStatsController.java | 28 ++++---- .../service/OptimizerDataService.java | 43 ++++++------ .../service/OptimizerDataServiceImpl.java | 70 +++++++++---------- .../service/OptimizerDataServiceImplTest.java | 54 +++++++------- 6 files changed, 123 insertions(+), 120 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index accf6d543..6f9d6a177 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -1,10 +1,10 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.spec.CompleteOperationRequestDto; -import com.linkedin.openhouse.optimizer.api.spec.OperationStatusDto; -import com.linkedin.openhouse.optimizer.api.spec.OperationTypeDto; -import com.linkedin.openhouse.optimizer.api.spec.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.spec.CompleteOperationRequest; +import com.linkedin.openhouse.optimizer.api.spec.OperationStatus; +import com.linkedin.openhouse.optimizer.api.spec.OperationType; +import com.linkedin.openhouse.optimizer.api.spec.TableOperations; +import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistory; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.Optional; @@ -35,8 +35,8 @@ public class TableOperationsController { * row, or 404 if the operation does not exist. */ @PostMapping("/complete") - public ResponseEntity completeOperation( - @RequestBody CompleteOperationRequestDto request) { + public ResponseEntity completeOperation( + @RequestBody CompleteOperationRequest request) { return service .completeOperation( request.getOperationId(), @@ -44,16 +44,16 @@ public ResponseEntity completeOperation( .map( history -> ResponseEntity.status(HttpStatus.CREATED) - .body(TableOperationsHistoryDto.fromModel(history))) + .body(TableOperationsHistory.fromModel(history))) .orElse(ResponseEntity.notFound().build()); } /** Fetch a single operation row by its ID, regardless of status. Returns 404 if not found. */ @GetMapping("/{id}") - public ResponseEntity getTableOperation(@PathVariable String id) { + public ResponseEntity getTableOperation(@PathVariable String id) { return service .getTableOperation(id) - .map(TableOperationsDto::fromModel) + .map(TableOperations::fromModel) .map(ResponseEntity::ok) .orElse(ResponseEntity.notFound().build()); } @@ -63,22 +63,22 @@ public ResponseEntity getTableOperation(@PathVariable String * every row. */ @GetMapping - public ResponseEntity> listTableOperations( - @RequestParam(required = false) OperationTypeDto operationType, - @RequestParam(required = false) OperationStatusDto status, + public ResponseEntity> listTableOperations( + @RequestParam(required = false) OperationType operationType, + @RequestParam(required = false) OperationStatus status, @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { - List result = + List result = service .listTableOperations( - Optional.ofNullable(operationType).map(OperationTypeDto::toModel), - Optional.ofNullable(status).map(OperationStatusDto::toModel), + Optional.ofNullable(operationType).map(OperationType::toModel), + Optional.ofNullable(status).map(OperationStatus::toModel), Optional.ofNullable(databaseName), Optional.ofNullable(tableName), Optional.ofNullable(tableUuid)) .stream() - .map(TableOperationsDto::fromModel) + .map(TableOperations::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 124697f10..36c422623 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistory; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.stream.Collectors; @@ -25,19 +25,19 @@ public class TableOperationsHistoryController { /** Append a completed-job result. Called by the SparkJob after each run (success or failure). */ @PostMapping - public ResponseEntity appendHistory( - @RequestBody TableOperationsHistoryDto dto) { + public ResponseEntity appendHistory( + @RequestBody TableOperationsHistory dto) { return ResponseEntity.status(HttpStatus.CREATED) - .body(TableOperationsHistoryDto.fromModel(service.appendHistory(dto.toModel()))); + .body(TableOperationsHistory.fromModel(service.appendHistory(dto.toModel()))); } /** Return the most recent history for a table, newest first, up to {@code limit} rows. */ @GetMapping("/{tableUuid}") - public ResponseEntity> getHistory( + public ResponseEntity> getHistory( @PathVariable String tableUuid, @RequestParam(defaultValue = "100") int limit) { - List result = + List result = service.getHistory(tableUuid, limit).stream() - .map(TableOperationsHistoryDto::fromModel) + .map(TableOperationsHistory::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 19dcbabb9..7cb745250 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -1,8 +1,8 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.spec.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.spec.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.api.spec.UpsertTableStatsRequestDto; +import com.linkedin.openhouse.optimizer.api.spec.TableStats; +import com.linkedin.openhouse.optimizer.api.spec.TableStatsHistory; +import com.linkedin.openhouse.optimizer.api.spec.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.time.Instant; import java.util.List; @@ -31,18 +31,18 @@ public class TableStatsController { * Iceberg commit. Idempotent. */ @PutMapping("/{tableUuid}") - public ResponseEntity upsertTableStats( - @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequestDto request) { + public ResponseEntity upsertTableStats( + @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { return ResponseEntity.ok( - TableStatsDto.fromModel(service.upsertTableStats(request.toModel(tableUuid)))); + TableStats.fromModel(service.upsertTableStats(request.toModel(tableUuid)))); } /** Fetch the stats row for {@code tableUuid}. Returns 404 if no stats have been written yet. */ @GetMapping("/{tableUuid}") - public ResponseEntity getTableStats(@PathVariable String tableUuid) { + public ResponseEntity getTableStats(@PathVariable String tableUuid) { return service .getTableStats(tableUuid) - .map(TableStatsDto::fromModel) + .map(TableStats::fromModel) .map(ResponseEntity::ok) .orElse(ResponseEntity.notFound().build()); } @@ -52,18 +52,18 @@ public ResponseEntity getTableStats(@PathVariable String tableUui * every row. */ @GetMapping - public ResponseEntity> listTableStats( + public ResponseEntity> listTableStats( @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { - List result = + List result = service .listTableStats( Optional.ofNullable(databaseName), Optional.ofNullable(tableName), Optional.ofNullable(tableUuid)) .stream() - .map(TableStatsDto::fromModel) + .map(TableStats::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } @@ -73,13 +73,13 @@ public ResponseEntity> listTableStats( * {@code since} (inclusive) and cap at {@code limit} rows. */ @GetMapping("/{tableUuid}/history") - public ResponseEntity> getStatsHistory( + public ResponseEntity> getStatsHistory( @PathVariable String tableUuid, @RequestParam(required = false) Instant since, @RequestParam(defaultValue = "100") int limit) { - List result = + List result = service.getStatsHistory(tableUuid, Optional.ofNullable(since), limit).stream() - .map(TableStatsHistoryDto::fromModel) + .map(TableStatsHistory::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index 5d5edaee2..c32a67bae 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -1,12 +1,12 @@ package com.linkedin.openhouse.optimizer.service; -import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.OperationStatus; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.linkedin.openhouse.optimizer.model.TableStatsHistory; +import com.linkedin.openhouse.optimizer.model.HistoryStatusDto; +import com.linkedin.openhouse.optimizer.model.OperationStatusDto; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.model.TableStatsHistoryDto; import java.time.Instant; import java.util.List; import java.util.Optional; @@ -26,9 +26,9 @@ public interface OptimizerDataService { * List operations matching the given filters. Every parameter is optional — pass {@link * Optional#empty()} to skip that filter. No filters returns all rows. */ - List listTableOperations( - Optional operationType, - Optional status, + List listTableOperations( + Optional operationType, + Optional status, Optional databaseName, Optional tableName, Optional tableUuid); @@ -39,31 +39,32 @@ List listTableOperations( * {@code status}, and saves it. Returns the history record, or empty if the operation does not * exist. */ - Optional completeOperation(String operationId, HistoryStatus status); + Optional completeOperation( + String operationId, HistoryStatusDto status); /** * Return the operation row for {@code id} regardless of status, or empty if it does not exist. * Used to poll a specific operation (e.g. waiting for SUCCESS after a Spark job completes). */ - Optional getTableOperation(String id); + Optional getTableOperation(String id); - // --- TableStats --- + // --- TableStatsDto --- /** * Create or update the stats row for {@code stats.getTableUuid()}. Fully idempotent: the same * call overwrites the previous snapshot with the latest commit values. The service stamps {@link - * TableStats#getUpdatedAt()} server-side and returns the resulting {@link TableStats}. + * TableStatsDto#getUpdatedAt()} server-side and returns the resulting {@link TableStatsDto}. */ - TableStats upsertTableStats(TableStats stats); + TableStatsDto upsertTableStats(TableStatsDto stats); /** Return the stats row for {@code tableUuid}, or empty if none exists. */ - Optional getTableStats(String tableUuid); + Optional getTableStats(String tableUuid); /** * List stats rows matching the given filters. Every parameter is optional — pass {@link * Optional#empty()} to skip that filter. No filters returns all rows. */ - List listTableStats( + List listTableStats( Optional databaseName, Optional tableName, Optional tableUuid); /** @@ -73,12 +74,12 @@ List listTableStats( * @param since if present, only return rows recorded at or after this instant * @param limit maximum number of rows to return */ - List getStatsHistory(String tableUuid, Optional since, int limit); + List getStatsHistory(String tableUuid, Optional since, int limit); - // --- TableOperationsHistory --- + // --- TableOperationsHistoryDto --- /** Append a completed-job result record. */ - TableOperationsHistory appendHistory(TableOperationsHistory history); + TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto history); /** * Return the most recent history rows for a table UUID, newest first. @@ -86,5 +87,5 @@ List listTableStats( * @param tableUuid the stable table UUID * @param limit maximum number of rows to return */ - List getHistory(String tableUuid, int limit); + List getHistory(String tableUuid, int limit); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 633411e98..a9ead77ce 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -2,13 +2,13 @@ import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import com.linkedin.openhouse.optimizer.db.TableStatsRow; -import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.OperationStatus; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.linkedin.openhouse.optimizer.model.TableStatsHistory; +import com.linkedin.openhouse.optimizer.model.HistoryStatusDto; +import com.linkedin.openhouse.optimizer.model.OperationStatusDto; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.model.TableStatsHistoryDto; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; @@ -42,54 +42,54 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { // --- TableOperations --- @Override - public List listTableOperations( - Optional operationType, - Optional status, + public List listTableOperations( + Optional operationType, + Optional status, Optional databaseName, Optional tableName, Optional tableUuid) { return operationsRepository .find( - operationType.map(OperationType::toDb).orElse(null), - status.map(OperationStatus::toDb).orElse(null), + operationType.map(OperationTypeDto::toDb).orElse(null), + status.map(OperationStatusDto::toDb).orElse(null), tableUuid.orElse(null), databaseName.orElse(null), tableName.orElse(null)) .stream() - .map(TableOperation::fromRow) + .map(TableOperationDto::fromRow) .collect(Collectors.toList()); } @Override @Transactional - public Optional completeOperation( - String operationId, HistoryStatus status) { + public Optional completeOperation( + String operationId, HistoryStatusDto status) { return operationsRepository .findById(operationId) .map( row -> - TableOperationsHistory.builder() + TableOperationsHistoryDto.builder() .id(row.getId()) .tableUuid(row.getTableUuid()) .databaseName(row.getDatabaseName()) .tableName(row.getTableName()) - .operationType(OperationType.fromDb(row.getOperationType())) + .operationType(OperationTypeDto.fromDb(row.getOperationType())) .completedAt(Instant.now()) .status(status) .build()) - .map(history -> TableOperationsHistory.fromRow(historyRepository.save(history.toRow()))); + .map(history -> TableOperationsHistoryDto.fromRow(historyRepository.save(history.toRow()))); } @Override - public Optional getTableOperation(String id) { - return operationsRepository.findById(id).map(TableOperation::fromRow); + public Optional getTableOperation(String id) { + return operationsRepository.findById(id).map(TableOperationDto::fromRow); } - // --- TableStats --- + // --- TableStatsDto --- @Override @Transactional - public TableStats upsertTableStats(TableStats stats) { + public TableStatsDto upsertTableStats(TableStatsDto stats) { Instant now = Instant.now(); String tableUuid = stats.getTableUuid(); @@ -120,51 +120,51 @@ public TableStats upsertTableStats(TableStats stats) { .recordedAt(now) .build()); - return TableStats.fromRow(saved); + return TableStatsDto.fromRow(saved); } @Override - public Optional getTableStats(String tableUuid) { - return statsRepository.findById(tableUuid).map(TableStats::fromRow); + public Optional getTableStats(String tableUuid) { + return statsRepository.findById(tableUuid).map(TableStatsDto::fromRow); } @Override - public List listTableStats( + public List listTableStats( Optional databaseName, Optional tableName, Optional tableUuid) { return statsRepository .find(databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null)).stream() - .map(TableStats::fromRow) + .map(TableStatsDto::fromRow) .collect(Collectors.toList()); } @Override - public List getStatsHistory( + public List getStatsHistory( String tableUuid, Optional since, int limit) { return statsHistoryRepository.find(tableUuid, since.orElse(null), PageRequest.of(0, limit)) .stream() - .map(TableStatsHistory::fromRow) + .map(TableStatsHistoryDto::fromRow) .collect(Collectors.toList()); } - // --- TableOperationsHistory --- + // --- TableOperationsHistoryDto --- @Override @Transactional - public TableOperationsHistory appendHistory(TableOperationsHistory history) { - TableOperationsHistory toWrite = + public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto history) { + TableOperationsHistoryDto toWrite = history .toBuilder() .completedAt( history.getCompletedAt() != null ? history.getCompletedAt() : Instant.now()) .build(); - return TableOperationsHistory.fromRow(historyRepository.save(toWrite.toRow())); + return TableOperationsHistoryDto.fromRow(historyRepository.save(toWrite.toRow())); } @Override - public List getHistory(String tableUuid, int limit) { + public List getHistory(String tableUuid, int limit) { return historyRepository .findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, limit)).stream() - .map(TableOperationsHistory::fromRow) + .map(TableOperationsHistoryDto::fromRow) .collect(Collectors.toList()); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index b329459ad..8457949cd 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -4,11 +4,11 @@ import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.OperationStatus; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.model.HistoryStatusDto; +import com.linkedin.openhouse.optimizer.model.OperationStatusDto; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.model.TableStatsDto; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -53,21 +53,21 @@ void completeOperation_writesHistoryFromOperationRow() { .jobId("spark-job-123") .build()); - Optional result = - service.completeOperation(operationId, HistoryStatus.SUCCESS); + Optional result = + service.completeOperation(operationId, HistoryStatusDto.SUCCESS); assertThat(result).isPresent(); - assertThat(result.get().getStatus()).isEqualTo(HistoryStatus.SUCCESS); + assertThat(result.get().getStatus()).isEqualTo(HistoryStatusDto.SUCCESS); assertThat(result.get().getTableUuid()).isEqualTo(tableUuid); - assertThat(result.get().getOperationType()).isEqualTo(OperationType.ORPHAN_FILES_DELETION); + assertThat(result.get().getOperationType()).isEqualTo(OperationTypeDto.ORPHAN_FILES_DELETION); assertThat(result.get().getDatabaseName()).isEqualTo("db1"); assertThat(result.get().getCompletedAt()).isNotNull(); } @Test void completeOperation_notFound_returnsEmpty() { - Optional result = - service.completeOperation(UUID.randomUUID().toString(), HistoryStatus.FAILED); + Optional result = + service.completeOperation(UUID.randomUUID().toString(), HistoryStatusDto.FAILED); assertThat(result).isEmpty(); } @@ -77,16 +77,16 @@ void completeOperation_notFound_returnsEmpty() { @Test void upsertTableStats_createsNewRow() { String tableUuid = UUID.randomUUID().toString(); - TableStats input = - TableStats.builder() + TableStatsDto input = + TableStatsDto.builder() .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(1024L).build()) + .snapshot(TableStatsDto.SnapshotMetrics.builder().tableSizeBytes(1024L).build()) .build(); - TableStats result = service.upsertTableStats(input); + TableStatsDto result = service.upsertTableStats(input); assertThat(result.getTableUuid()).isEqualTo(tableUuid); assertThat(result.getDatabaseName()).isEqualTo("db1"); @@ -100,25 +100,27 @@ void upsertTableStats_createsNewRow() { @Test void upsertTableStats_updatesExistingRow_andAppendsHistory() { String tableUuid = UUID.randomUUID().toString(); - TableStats first = - TableStats.builder() + TableStatsDto first = + TableStatsDto.builder() .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .delta(TableStats.CommitDelta.builder().numFilesAdded(5L).numFilesDeleted(1L).build()) + .snapshot(TableStatsDto.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .delta( + TableStatsDto.CommitDelta.builder().numFilesAdded(5L).numFilesDeleted(1L).build()) .build(); - TableStats second = - TableStats.builder() + TableStatsDto second = + TableStatsDto.builder() .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(0L).build()) + .snapshot(TableStatsDto.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .delta( + TableStatsDto.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(0L).build()) .build(); service.upsertTableStats(first); - TableStats result = service.upsertTableStats(second); + TableStatsDto result = service.upsertTableStats(second); assertThat(result.getSnapshot().getTableSizeBytes()).isEqualTo(200L); assertThat(statsRepository.findAll()).hasSize(1); @@ -159,8 +161,8 @@ void listTableOperations_filtersByOperationTypeAndStatus() { assertThat( service.listTableOperations( - Optional.of(OperationType.ORPHAN_FILES_DELETION), - Optional.of(OperationStatus.PENDING), + Optional.of(OperationTypeDto.ORPHAN_FILES_DELETION), + Optional.of(OperationStatusDto.PENDING), Optional.empty(), Optional.empty(), Optional.empty())) From 4e86569ce2e4327665b0d8885276c6b2e048612a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:16:10 -0700 Subject: [PATCH 67/81] feat(optimizer): propagate jobId through model + api conversions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit model.TableOperationDto grows a jobId field; api.TableOperations conversions copy it across the api ↔ model boundary. The api DTO already had the field; the model side was missing it. Relocated from opt-5 to its proper owner per the model-layer rule. Model ↔ db plumbing for the same field lands on opt-1 in a follow-up. Co-Authored-By: Claude Opus 4.7 --- .../linkedin/openhouse/optimizer/api/spec/TableOperations.java | 2 ++ .../linkedin/openhouse/optimizer/model/TableOperationDto.java | 3 +++ 2 files changed, 5 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java index 60f2c3dd8..0bca95734 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java @@ -52,6 +52,7 @@ public TableOperationDto toModel() { .status(status == null ? null : status.toModel()) .createdAt(createdAt) .scheduledAt(scheduledAt) + .jobId(jobId) .build(); } @@ -69,6 +70,7 @@ public static TableOperations fromModel(TableOperationDto op) { .status(OperationStatus.fromModel(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) + .jobId(op.getJobId()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java index 8809a1b62..4cac14187 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java @@ -46,6 +46,9 @@ public class TableOperationDto { /** When the scheduler last submitted a job for this operation. */ private Instant scheduledAt; + /** Job ID returned by the Jobs Service after the scheduler submitted; null until SCHEDULED. */ + private String jobId; + /** Create a new PENDING operation for the given table and operation type. */ public static TableOperationDto pending(TableDto table, OperationTypeDto operationType) { return TableOperationDto.builder() From efcceeaa9d4656fe6ec2028c72a60fe7d92f59a3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:17:06 -0700 Subject: [PATCH 68/81] =?UTF-8?q?feat(optimizer):=20propagate=20jobId=20th?= =?UTF-8?q?rough=20model=20=E2=86=94=20db=20conversions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Companion to the opt-0 jobId field addition: now that model.TableOperationDto carries jobId, wire it through toRow/fromRow so the db row's job_id column round-trips through the model layer. Relocated from opt-5. Co-Authored-By: Claude Opus 4.7 --- .../linkedin/openhouse/optimizer/model/TableOperationDto.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java index c39a71ecd..18d57ce66 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java @@ -80,6 +80,7 @@ public TableOperationsRow toRow() { .status(status == null ? null : status.toDb()) .createdAt(createdAt) .scheduledAt(scheduledAt) + .jobId(jobId) .build(); } @@ -97,6 +98,7 @@ public static TableOperationDto fromRow(TableOperationsRow row) { .status(OperationStatusDto.fromDb(row.getStatus())) .createdAt(row.getCreatedAt()) .scheduledAt(row.getScheduledAt()) + .jobId(row.getJobId()) .build(); } } From c00f20188d0ccf76d17f8698fc6cf7aaf6ae3bda Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:17:42 -0700 Subject: [PATCH 69/81] =?UTF-8?q?chore(optimizer):=20rename=20OPTIMIZER=5F?= =?UTF-8?q?DB=5FUSERNAME=20=E2=86=92=20OPTIMIZER=5FDB=5FUSER?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The analyzer and scheduler application.properties (introduced on opt-3 / opt-4) both already use OPTIMIZER_DB_USER. The service was the odd one out. Normalize so all three JVMs read the same env var name. Relocated from opt-5. Co-Authored-By: Claude Opus 4.7 --- services/optimizer/src/main/resources/application.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index c6c3f8437..e78745d00 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -12,7 +12,7 @@ spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.mode spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:mysql://localhost:3306/oh_db} -spring.datasource.username=${OPTIMIZER_DB_USERNAME:oh_user} +spring.datasource.username=${OPTIMIZER_DB_USER:oh_user} spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} spring.datasource.hikari.maximum-pool-size=20 From 1fe71f043260d2c5b57c6556cb69ea051f5fafbe Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:25:48 -0700 Subject: [PATCH 70/81] =?UTF-8?q?refactor(optimizer):=20rename=20CompleteO?= =?UTF-8?q?perationRequest=20=E2=86=92=20UpdateOperationRequest?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symbol rename only. The HistoryStatus enum (SUCCESS/FAILED) and the once-terminal semantics are unchanged; the endpoint's behavior is the same. Future broadening (CANCELED/QUEUED, idempotency, mid-lifecycle status changes) is a separate concern. Method names + URL path will follow on opt-2; Spark-app caller + docs follow on opt-5. Co-Authored-By: Claude Opus 4.7 --- ...nRequest.java => UpdateOperationRequest.java} | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{CompleteOperationRequest.java => UpdateOperationRequest.java} (70%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java similarity index 70% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java index 15112882d..a216e9db3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java @@ -6,26 +6,26 @@ import lombok.NoArgsConstructor; /** - * Request body for {@code POST /v1/table-operations/complete}. + * Request body for {@code POST /v1/table-operations/update}. * - *

Reports the outcome of a single completed operation. The service looks up the operation row by + *

Reports the outcome of a single operation update. The service looks up the operation row by * {@link #operationId} and writes a history entry for it. * *

A single Spark job typically processes N tables and yields N independent (status) outcomes — - * one per operation. Callers issue one complete request per operation; the service does not - * bulk-complete by job. + * one per operation. Callers issue one update request per operation; the service does not + * bulk-update by job. * *

The remaining fields ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}, {@link * #operationType}) are debug-only echo information. The server does not key off them; they are - * preserved on log lines and traces so an operator looking at a failing complete call can see which - * (db, table, operation) the caller believed it was completing without joining back to the - * operation row. + * preserved on log lines and traces so an operator looking at a failing update call can see which + * (db, table, operation) the caller believed it was updating without joining back to the operation + * row. */ @Data @Builder @NoArgsConstructor @AllArgsConstructor -public class CompleteOperationRequest { +public class UpdateOperationRequest { /** Operation row's UUID — the primary lookup key. */ private String operationId; From 947bedfb69869fc8843c766e9dba24f04db58042 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:27:48 -0700 Subject: [PATCH 71/81] =?UTF-8?q?refactor(optimizer):=20rename=20completeO?= =?UTF-8?q?peration=20=E2=86=92=20updateOperation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symbol rename across the controller, service interface, service impl, and tests. URL path POST /v1/optimizer/operations/complete → /update. Symbol-rename only; behavior, semantics, and HistoryStatus values (SUCCESS/FAILED) are unchanged. Spark-app caller + docs follow on opt-5. Co-Authored-By: Claude Opus 4.7 --- .../api/controller/TableOperationsController.java | 14 +++++++------- .../optimizer/service/OptimizerDataService.java | 5 ++--- .../service/OptimizerDataServiceImpl.java | 2 +- .../service/OptimizerDataServiceImplTest.java | 6 +++--- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index 6f9d6a177..c28002bf7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -1,10 +1,10 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.spec.CompleteOperationRequest; import com.linkedin.openhouse.optimizer.api.spec.OperationStatus; import com.linkedin.openhouse.optimizer.api.spec.OperationType; import com.linkedin.openhouse.optimizer.api.spec.TableOperations; import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.api.spec.UpdateOperationRequest; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.Optional; @@ -29,16 +29,16 @@ public class TableOperationsController { private final OptimizerDataService service; /** - * Report that an operation has completed. The body carries the {@code operationId} the caller is - * completing along with its terminal status. The backend looks up the operation row, writes a + * Report an update to an operation. The body carries the {@code operationId} the caller is + * updating along with its terminal status. The backend looks up the operation row, writes a * history entry with the operation's table metadata, and returns 201 Created with the history * row, or 404 if the operation does not exist. */ - @PostMapping("/complete") - public ResponseEntity completeOperation( - @RequestBody CompleteOperationRequest request) { + @PostMapping("/update") + public ResponseEntity updateOperation( + @RequestBody UpdateOperationRequest request) { return service - .completeOperation( + .updateOperation( request.getOperationId(), request.getStatus() == null ? null : request.getStatus().toModel()) .map( diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index c32a67bae..0529d3608 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -34,13 +34,12 @@ List listTableOperations( Optional tableUuid); /** - * Complete an operation by writing a history entry. Looks up the operation row by {@code + * Update an operation by writing a history entry. Looks up the operation row by {@code * operationId}, copies its table metadata into a new history row with the supplied terminal * {@code status}, and saves it. Returns the history record, or empty if the operation does not * exist. */ - Optional completeOperation( - String operationId, HistoryStatusDto status); + Optional updateOperation(String operationId, HistoryStatusDto status); /** * Return the operation row for {@code id} regardless of status, or empty if it does not exist. diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index a9ead77ce..1ca9c7777 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -62,7 +62,7 @@ public List listTableOperations( @Override @Transactional - public Optional completeOperation( + public Optional updateOperation( String operationId, HistoryStatusDto status) { return operationsRepository .findById(operationId) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 8457949cd..e817e3fd5 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -34,7 +34,7 @@ class OptimizerDataServiceImplTest { @Autowired TableStatsRepository statsRepository; @Autowired TableStatsHistoryRepository statsHistoryRepository; - // --- completeOperation --- + // --- updateOperation --- @Test void completeOperation_writesHistoryFromOperationRow() { @@ -54,7 +54,7 @@ void completeOperation_writesHistoryFromOperationRow() { .build()); Optional result = - service.completeOperation(operationId, HistoryStatusDto.SUCCESS); + service.updateOperation(operationId, HistoryStatusDto.SUCCESS); assertThat(result).isPresent(); assertThat(result.get().getStatus()).isEqualTo(HistoryStatusDto.SUCCESS); @@ -67,7 +67,7 @@ void completeOperation_writesHistoryFromOperationRow() { @Test void completeOperation_notFound_returnsEmpty() { Optional result = - service.completeOperation(UUID.randomUUID().toString(), HistoryStatusDto.FAILED); + service.updateOperation(UUID.randomUUID().toString(), HistoryStatusDto.FAILED); assertThat(result).isEmpty(); } From d65b511d472ff27f08ad12e86d393cf877457c51 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 17:19:47 -0700 Subject: [PATCH 72/81] refactor(optimizer-repo): unify find/updateBatch with Optional params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Repo public API now: - find(...) with Optional filters + required Pageable, on all four repos - updateBatch(ids, fromStatus, toStatus, Optional scheduledAt, Optional jobId) — replaces markSchedulingBatch, markScheduledBatch, markPendingBatch - cancel(ids) — replaces cancelDuplicatePendingBatch; deletes by-id with a defensive PENDING-only gate - findLatest(opType, Pageable) — was findLatestPerTable - history.find(tableUuid, Pageable) — was findByTableUuidOrderByCompletedAtDesc Side-effect columns on updateBatch use COALESCE with Optional.empty() → leave-unchanged. scheduledAt is not cleared on SCHEDULING → PENDING revert; status is the source of truth and the watermark is overwritten on the next claim. @Modifying queries get flushAutomatically + clearAutomatically so the L1 cache reflects the change immediately (caught by the unit tests). Spring Data @Query can't share an "IS NULL OR IN :list" pattern (Hibernate expands the list inline and the IS NULL check turns ungrammatical). The find path uses two internal queries dispatched by the default method — one with the ids predicate, one without. Callers (service, analyzer, scheduler) update on opt-2..opt-4 in follow-up commits. Co-Authored-By: Claude Opus 4.7 --- .../TableOperationsHistoryRepository.java | 15 +- .../repository/TableOperationsRepository.java | 181 +++++++---- .../TableStatsHistoryRepository.java | 19 +- .../repository/TableStatsRepository.java | 37 ++- .../TableOperationsHistoryRepositoryTest.java | 8 +- .../TableOperationsRepositoryTest.java | 307 ++++++++++++------ .../TableStatsHistoryRepositoryTest.java | 14 +- .../repository/TableStatsRepositoryTest.java | 17 +- 8 files changed, 396 insertions(+), 202 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 5faf349e3..6c08f844a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -13,11 +13,14 @@ public interface TableOperationsHistoryRepository extends JpaRepository { /** - * Return history rows for a single {@code tableUuid}, newest first. Used by the service-layer - * {@code getHistory} endpoint. + * Return history rows for a single {@code tableUuid}, newest first. {@code pageable} is required; + * callers pick the row cap (default limit lives in {@code optimizer.repo.default-limit}). */ - List findByTableUuidOrderByCompletedAtDesc( - String tableUuid, Pageable pageable); + @Query( + "SELECT r FROM TableOperationsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "ORDER BY r.completedAt DESC") + List find(@Param("tableUuid") String tableUuid, Pageable pageable); /** * Return the most-recent history row per {@code (table_uuid, operation_type)}, filtered to a @@ -37,6 +40,6 @@ List findByTableUuidOrderByCompletedAtDesc( + "AND r.completedAt = (" + " SELECT MAX(r2.completedAt) FROM TableOperationsHistoryRow r2 " + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") - List findLatestPerTable( - @Param("operationType") OperationType operationType); + List findLatest( + @Param("operationType") OperationType operationType, Pageable pageable); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 513006bf6..e0df2cd21 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -5,6 +5,8 @@ import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import java.time.Instant; import java.util.List; +import java.util.Optional; +import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Modifying; import org.springframework.data.jpa.repository.Query; @@ -14,90 +16,131 @@ public interface TableOperationsRepository extends JpaRepository { /** - * Return operations matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. + * Find operation rows matching the given filters. Every filter is optional ({@link + * Optional#empty()} to skip). {@code pageable} is required; callers pick the row cap (default + * limit lives in {@code optimizer.repo.default-limit}). */ + default List find( + Optional operationType, + Optional status, + Optional tableUuid, + Optional databaseName, + Optional tableName, + Optional scheduledAt, + Optional> ids, + Pageable pageable) { + // List parameters can't share an :ids IS NULL pattern with the IN clause — + // Hibernate expands the list inline and the IS NULL check turns ungrammatical. + // Two internal queries; dispatch by presence. + if (ids.isPresent()) { + return findInternalWithIds( + operationType.orElse(null), + status.orElse(null), + tableUuid.orElse(null), + databaseName.orElse(null), + tableName.orElse(null), + scheduledAt.orElse(null), + ids.get(), + pageable); + } + return findInternal( + operationType.orElse(null), + status.orElse(null), + tableUuid.orElse(null), + databaseName.orElse(null), + tableName.orElse(null), + scheduledAt.orElse(null), + pageable); + } + + /** + * Batch CAS: transition rows from {@code fromStatus} to {@code toStatus} for every id in {@code + * ids} that is still in {@code fromStatus}. Rows in a different status are skipped silently. + * Returns the number of rows transitioned. + * + *

Side-effect columns use COALESCE — {@link Optional#empty()} means "leave unchanged". The + * underlying transitions are: + * + *

    + *
  • PENDING → SCHEDULING: pass {@code scheduledAt = Optional.of(claimedAt)}; the watermark + * lets {@link #find} resolve the precise set of rows this caller claimed. + *
  • SCHEDULING → SCHEDULED: pass {@code jobId = Optional.of(...)}. + *
  • SCHEDULING → PENDING: pass both empty; {@code scheduledAt} stays at the prior claim's + * watermark (overwritten on the next claim) and {@code jobId} stays null. + *
+ */ + default int updateBatch( + List ids, + OperationStatus fromStatus, + OperationStatus toStatus, + Optional scheduledAt, + Optional jobId) { + return updateBatchInternal( + ids, fromStatus, toStatus, scheduledAt.orElse(null), jobId.orElse(null)); + } + + /** + * Delete the specified rows, but only if they are still {@code PENDING}. The status gate is + * defensive — never drop a row another instance has claimed. Returns the number of rows actually + * removed. + */ + @Modifying(flushAutomatically = true, clearAutomatically = true) + @Query( + "DELETE FROM TableOperationsRow r " + + "WHERE r.id IN :ids " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING") + int cancel(@Param("ids") List ids); + + // ---- Internals. Use the Optional-typed default methods above. ---- + @Query( "SELECT r FROM TableOperationsRow r " + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:status IS NULL OR r.status = :status) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName)") - List find( + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:scheduledAt IS NULL OR r.scheduledAt = :scheduledAt)") + List findInternal( @Param("operationType") OperationType operationType, @Param("status") OperationStatus status, @Param("tableUuid") String tableUuid, @Param("databaseName") String databaseName, - @Param("tableName") String tableName); - - /** - * Batch CAS: PENDING → SCHEDULING for every {@code id} still in PENDING. Returns the number of - * rows transitioned. Rows already claimed by another instance are skipped silently; pair this - * call with {@link #findClaimedIds(List, Instant)} (using the same {@code scheduledAt}) to get - * the precise list of rows this caller now owns. - */ - @Modifying - @Query( - "UPDATE TableOperationsRow r " - + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING," - + " r.scheduledAt = :scheduledAt " - + "WHERE r.id IN :ids " - + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING") - int markSchedulingBatch( - @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); - - /** - * Return the subset of {@code ids} that are currently {@code SCHEDULING} with the given {@code - * scheduledAt} watermark. Used after {@link #markSchedulingBatch(List, Instant)} to determine - * which rows this caller actually claimed (vs. rows another instance owns or rows that no longer - * exist). - */ - @Query( - "SELECT r.id FROM TableOperationsRow r " - + "WHERE r.id IN :ids " - + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING " - + "AND r.scheduledAt = :scheduledAt") - List findClaimedIds( - @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); + @Param("tableName") String tableName, + @Param("scheduledAt") Instant scheduledAt, + Pageable pageable); - /** - * Batch CAS: SCHEDULING → SCHEDULED with the given {@code jobId} for every {@code id} still in - * SCHEDULING. Returns the number of rows transitioned. - */ - @Modifying @Query( - "UPDATE TableOperationsRow r " - + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED," - + " r.jobId = :jobId " - + "WHERE r.id IN :ids " - + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") - int markScheduledBatch(@Param("ids") List ids, @Param("jobId") String jobId); + "SELECT r FROM TableOperationsRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:scheduledAt IS NULL OR r.scheduledAt = :scheduledAt) " + + "AND r.id IN :ids") + List findInternalWithIds( + @Param("operationType") OperationType operationType, + @Param("status") OperationStatus status, + @Param("tableUuid") String tableUuid, + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("scheduledAt") Instant scheduledAt, + @Param("ids") List ids, + Pageable pageable); - /** - * Batch transition: SCHEDULING → PENDING for every {@code id} still in SCHEDULING. Used by the - * scheduler to release claimed rows when job submission fails so the next pass can retry. Returns - * the number of rows reverted. - */ - @Modifying + @Modifying(flushAutomatically = true, clearAutomatically = true) @Query( "UPDATE TableOperationsRow r " - + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING," - + " r.scheduledAt = NULL " + + "SET r.status = :toStatus, " + + " r.scheduledAt = COALESCE(:scheduledAt, r.scheduledAt), " + + " r.jobId = COALESCE(:jobId, r.jobId) " + "WHERE r.id IN :ids " - + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") - int markPendingBatch(@Param("ids") List ids); - - /** - * Batch-delete duplicate PENDING rows for the given operation type, keeping only the IDs in - * {@code keepIds}. Used by the scheduler to deduplicate before claiming. - */ - @Modifying - @Query( - "DELETE FROM TableOperationsRow r " - + "WHERE r.operationType = :operationType " - + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING " - + "AND r.id NOT IN :keepIds") - int cancelDuplicatePendingBatch( - @Param("operationType") OperationType operationType, @Param("keepIds") List keepIds); + + "AND r.status = :fromStatus") + int updateBatchInternal( + @Param("ids") List ids, + @Param("fromStatus") OperationStatus fromStatus, + @Param("toStatus") OperationStatus toStatus, + @Param("scheduledAt") Instant scheduledAt, + @Param("jobId") String jobId); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java index 6f9595275..9b603f265 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -3,6 +3,7 @@ import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import java.time.Instant; import java.util.List; +import java.util.Optional; import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; @@ -12,18 +13,22 @@ public interface TableStatsHistoryRepository extends JpaRepository { /** - * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the - * time filter. - * - * @param tableUuid the stable table UUID - * @param since inclusive lower bound on recorded_at; {@code null} to skip - * @param pageable use {@code PageRequest.of(0, limit)} to cap results + * Return history rows for a table, newest first. {@code since} is optional ({@link + * Optional#empty()} to skip the time filter). {@code pageable} is required; callers pick the row + * cap (default limit lives in {@code optimizer.repo.default-limit}). */ + default List find( + String tableUuid, Optional since, Pageable pageable) { + return findInternal(tableUuid, since.orElse(null), pageable); + } + + // ---- Internals. Use the Optional-typed default method above. ---- + @Query( "SELECT r FROM TableStatsHistoryRow r " + "WHERE r.tableUuid = :tableUuid " + "AND (:since IS NULL OR r.recordedAt >= :since) " + "ORDER BY r.recordedAt DESC") - List find( + List findInternal( @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index dbf1de0ae..1123c0e7a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -2,6 +2,8 @@ import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.util.List; +import java.util.Optional; +import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; @@ -10,18 +12,18 @@ public interface TableStatsRepository extends JpaRepository { /** - * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. + * Return stats rows matching the given filters. Every filter is optional ({@link + * Optional#empty()} to skip). {@code pageable} is required; callers pick the row cap (default + * limit lives in {@code optimizer.repo.default-limit}). */ - @Query( - "SELECT r FROM TableStatsRow r " - + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") - List find( - @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid); + default List find( + Optional databaseName, + Optional tableName, + Optional tableUuid, + Pageable pageable) { + return findInternal( + databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null), pageable); + } /** * Return the distinct {@code database_name} values present in {@code table_stats}. Used by the @@ -30,4 +32,17 @@ List find( */ @Query("SELECT DISTINCT r.databaseName FROM TableStatsRow r") List findDistinctDatabaseNames(); + + // ---- Internals. Use the Optional-typed default methods above. ---- + + @Query( + "SELECT r FROM TableStatsRow r " + + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List findInternal( + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid, + Pageable pageable); } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index 706ecd877..9f1de0c0c 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -52,8 +52,7 @@ void findByTableUuid_returnsRowsNewestFirst() { .status(HistoryStatus.FAILED) .build()); - List rows = - repository.findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, 10)); + List rows = repository.find(tableUuid, PageRequest.of(0, 10)); assertThat(rows).hasSize(2); assertThat(rows.get(0).getId()).isEqualTo(idNewer); @@ -77,8 +76,7 @@ void findByTableUuid_respectsLimit() { .build()); } - List rows = - repository.findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, 3)); + List rows = repository.find(tableUuid, PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @@ -121,7 +119,7 @@ void findLatestPerTable_returnsOneRowPerTableUuid() { .build()); List latest = - repository.findLatestPerTable(OperationType.ORPHAN_FILES_DELETION); + repository.findLatest(OperationType.ORPHAN_FILES_DELETION, PageRequest.of(0, 10_000)); assertThat(latest).hasSize(2); TableOperationsHistoryRow forTarget = diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index bfe3fc437..8f46af1bf 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -9,9 +9,12 @@ import java.util.List; import java.util.Optional; import java.util.UUID; +import java.util.stream.Collectors; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.data.domain.Pageable; import org.springframework.test.context.ActiveProfiles; import org.springframework.transaction.annotation.Transactional; @@ -20,24 +23,15 @@ @Transactional class TableOperationsRepositoryTest { + private static final Pageable PAGE = PageRequest.of(0, 10_000); + @Autowired TableOperationsRepository repository; @Test void saveAndFindById() { String id = UUID.randomUUID().toString(); - TableOperationsRow row = - TableOperationsRow.builder() - .id(id) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build(); - - repository.save(row); + repository.save(pendingRow(id, "tbl1")); Optional found = repository.findById(id); assertThat(found).isPresent(); @@ -45,74 +39,103 @@ void saveAndFindById() { } @Test - void find_noParams_returnsAll() { - repository.save( - TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build()); - repository.save( - TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) - .createdAt(Instant.now()) - .build()); + void find_noFilters_returnsAll() { + repository.save(pendingRow(UUID.randomUUID().toString(), "tbl1")); + repository.save(scheduledRow(UUID.randomUUID().toString(), "tbl2")); - List rows = repository.find(null, null, null, null, null); + List rows = + repository.find( + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + PAGE); assertThat(rows).hasSize(2); } @Test void find_byStatus() { - repository.save( - TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build()); - repository.save( - TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) - .createdAt(Instant.now()) - .build()); + repository.save(pendingRow(UUID.randomUUID().toString(), "tbl1")); + repository.save(scheduledRow(UUID.randomUUID().toString(), "tbl2")); List pending = - repository.find(null, OperationStatus.PENDING, null, null, null); + repository.find( + Optional.empty(), + Optional.of(OperationStatus.PENDING), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + PAGE); assertThat(pending).hasSize(1); assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); List scheduled = - repository.find(null, OperationStatus.SCHEDULED, null, null, null); + repository.find( + Optional.empty(), + Optional.of(OperationStatus.SCHEDULED), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + PAGE); assertThat(scheduled).hasSize(1); assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); } @Test - void findClaimedIds_returnsOnlyClaimedSubset() { + void find_byDatabaseAndTable() { + repository.save(pendingRow(UUID.randomUUID().toString(), "tbl1", "db1")); + repository.save(pendingRow(UUID.randomUUID().toString(), "tbl2", "db2")); + + assertThat( + repository.find( + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.of("db1"), + Optional.empty(), + Optional.empty(), + Optional.empty(), + PAGE)) + .hasSize(1); + assertThat( + repository.find( + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.of("db2"), + Optional.of("tbl2"), + Optional.empty(), + Optional.empty(), + PAGE)) + .hasSize(1); + assertThat( + repository.find( + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.of("db1"), + Optional.of("tbl2"), + Optional.empty(), + Optional.empty(), + PAGE)) + .isEmpty(); + } + + @Test + void find_byScheduledAtAndIds_resolvesClaimedSubset() { String idA = UUID.randomUUID().toString(); String idB = UUID.randomUUID().toString(); String idC = UUID.randomUUID().toString(); - repository.save(pending(idA)); - repository.save(pending(idB)); - // idC is already SCHEDULING with a different scheduledAt — must NOT appear. + repository.save(pendingRow(idA, "tbl_a")); + repository.save(pendingRow(idB, "tbl_b")); + // idC is already SCHEDULING with an older watermark — must NOT appear. repository.save( TableOperationsRow.builder() .id(idC) @@ -126,68 +149,160 @@ void findClaimedIds_returnsOnlyClaimedSubset() { .build()); Instant now = Instant.now(); - repository.markSchedulingBatch(List.of(idA, idB, idC), now); + int transitioned = + repository.updateBatch( + List.of(idA, idB, idC), + OperationStatus.PENDING, + OperationStatus.SCHEDULING, + Optional.of(now), + Optional.empty()); + assertThat(transitioned).isEqualTo(2); - List claimed = repository.findClaimedIds(List.of(idA, idB, idC), now); - assertThat(claimed).containsExactlyInAnyOrder(idA, idB); + List claimedIds = + repository + .find( + Optional.empty(), + Optional.of(OperationStatus.SCHEDULING), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.of(now), + Optional.of(List.of(idA, idB, idC)), + PAGE) + .stream() + .map(TableOperationsRow::getId) + .collect(Collectors.toList()); + assertThat(claimedIds).containsExactlyInAnyOrder(idA, idB); } @Test - void findClaimedIds_emptyWhenNothingClaimed() { + void updateBatch_schedulingToScheduled_setsJobIdAndPreservesScheduledAt() { String id = UUID.randomUUID().toString(); + Instant claimedAt = Instant.parse("2026-05-20T16:42:43Z"); repository.save( TableOperationsRow.builder() .id(id) .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") - .tableName("tbl_x") + .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) + .status(OperationStatus.SCHEDULING) .createdAt(Instant.now()) - .scheduledAt(Instant.now()) + .scheduledAt(claimedAt) .build()); - List claimed = repository.findClaimedIds(List.of(id), Instant.now()); - assertThat(claimed).isEmpty(); - } + int updated = + repository.updateBatch( + List.of(id), + OperationStatus.SCHEDULING, + OperationStatus.SCHEDULED, + Optional.empty(), + Optional.of("job-123")); + assertThat(updated).isEqualTo(1); - private TableOperationsRow pending(String id) { - return TableOperationsRow.builder() - .id(id) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl_" + id) - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build(); + TableOperationsRow row = repository.findById(id).orElseThrow(); + assertThat(row.getStatus()).isEqualTo(OperationStatus.SCHEDULED); + assertThat(row.getJobId()).isEqualTo("job-123"); + assertThat(row.getScheduledAt()).isEqualTo(claimedAt); } @Test - void find_byDatabaseAndTable() { + void updateBatch_schedulingToPending_leavesScheduledAtUntouched() { + // scheduledAt is intentionally NOT cleared on revert. Status is the source of truth; the + // stale watermark gets overwritten on the next PENDING → SCHEDULING transition. + String id = UUID.randomUUID().toString(); + Instant claimedAt = Instant.parse("2026-05-20T16:42:43Z"); repository.save( TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) + .id(id) .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build()); - repository.save( - TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db2") - .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .status(OperationStatus.SCHEDULING) .createdAt(Instant.now()) + .scheduledAt(claimedAt) .build()); - assertThat(repository.find(null, null, null, "db1", null)).hasSize(1); - assertThat(repository.find(null, null, null, "db2", "tbl2")).hasSize(1); - assertThat(repository.find(null, null, null, "db1", "tbl2")).isEmpty(); + int reverted = + repository.updateBatch( + List.of(id), + OperationStatus.SCHEDULING, + OperationStatus.PENDING, + Optional.empty(), + Optional.empty()); + assertThat(reverted).isEqualTo(1); + + TableOperationsRow row = repository.findById(id).orElseThrow(); + assertThat(row.getStatus()).isEqualTo(OperationStatus.PENDING); + assertThat(row.getScheduledAt()).isEqualTo(claimedAt); + } + + @Test + void updateBatch_skipsRowsNotInFromStatus() { + String pendingId = UUID.randomUUID().toString(); + String scheduledId = UUID.randomUUID().toString(); + repository.save(pendingRow(pendingId, "tbl_a")); + repository.save(scheduledRow(scheduledId, "tbl_b")); + + int transitioned = + repository.updateBatch( + List.of(pendingId, scheduledId), + OperationStatus.PENDING, + OperationStatus.SCHEDULING, + Optional.of(Instant.now()), + Optional.empty()); + assertThat(transitioned).isEqualTo(1); + + assertThat(repository.findById(pendingId).orElseThrow().getStatus()) + .isEqualTo(OperationStatus.SCHEDULING); + assertThat(repository.findById(scheduledId).orElseThrow().getStatus()) + .isEqualTo(OperationStatus.SCHEDULED); + } + + @Test + void cancel_deletesOnlyPendingRows() { + String pendingId = UUID.randomUUID().toString(); + String scheduledId = UUID.randomUUID().toString(); + repository.save(pendingRow(pendingId, "tbl_p")); + repository.save(scheduledRow(scheduledId, "tbl_s")); + + int deleted = repository.cancel(List.of(pendingId, scheduledId)); + assertThat(deleted).isEqualTo(1); + + assertThat(repository.findById(pendingId)).isEmpty(); + assertThat(repository.findById(scheduledId)).isPresent(); + } + + // --- helpers --- + + private TableOperationsRow pendingRow(String id, String tableName) { + return pendingRow(id, tableName, "db1"); + } + + private TableOperationsRow pendingRow(String id, String tableName, String databaseName) { + return TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName(databaseName) + .tableName(tableName) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + } + + private TableOperationsRow scheduledRow(String id, String tableName) { + return TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName(tableName) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .scheduledAt(Instant.now()) + .jobId("job-" + id) + .build(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index 536b72e35..cddec50c9 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -8,6 +8,7 @@ import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.List; +import java.util.Optional; import java.util.UUID; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -32,7 +33,8 @@ void saveAndFind() { repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); - List rows = repository.find(tableUuid, null, PageRequest.of(0, 100)); + List rows = + repository.find(tableUuid, Optional.empty(), PageRequest.of(0, 100)); assertThat(rows).hasSize(3); // newest first @@ -49,7 +51,8 @@ void find_respectsLimit() { repository.save(buildRow(tableUuid, "db1", "tbl1", i, 0L, now.minus(i, ChronoUnit.HOURS))); } - List rows = repository.find(tableUuid, null, PageRequest.of(0, 3)); + List rows = + repository.find(tableUuid, Optional.empty(), PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @@ -64,7 +67,8 @@ void find_withSince_filtersOlderRows() { repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); - List rows = repository.find(tableUuid, cutoff, PageRequest.of(0, 100)); + List rows = + repository.find(tableUuid, Optional.of(cutoff), PageRequest.of(0, 100)); // only the 2 rows within the last 90 minutes assertThat(rows).hasSize(2); @@ -80,8 +84,8 @@ void find_isolatesByTableUuid() { repository.save(buildRow(uuid1, "db1", "tbl1", 10L, 0L, now)); repository.save(buildRow(uuid2, "db2", "tbl2", 20L, 0L, now)); - assertThat(repository.find(uuid1, null, PageRequest.of(0, 100))).hasSize(1); - assertThat(repository.find(uuid2, null, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.find(uuid1, Optional.empty(), PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.find(uuid2, Optional.empty(), PageRequest.of(0, 100))).hasSize(1); } @Test diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index f9cc28d57..e73ac0cb4 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -11,6 +11,8 @@ import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.data.domain.Pageable; import org.springframework.test.context.ActiveProfiles; import org.springframework.transaction.annotation.Transactional; @@ -19,6 +21,8 @@ @Transactional class TableStatsRepositoryTest { + private static final Pageable PAGE = PageRequest.of(0, 10_000); + @Autowired TableStatsRepository repository; @Test @@ -90,7 +94,8 @@ void find_noParams_returnsAll() { .updatedAt(Instant.now()) .build()); - assertThat(repository.find(null, null, null)).hasSize(2); + assertThat(repository.find(Optional.empty(), Optional.empty(), Optional.empty(), PAGE)) + .hasSize(2); } @Test @@ -112,7 +117,13 @@ void find_byDatabase() { .updatedAt(Instant.now()) .build()); - assertThat(repository.find("db1", null, null)).hasSize(1); - assertThat(repository.find("db1", null, null).get(0).getDatabaseName()).isEqualTo("db1"); + assertThat(repository.find(Optional.of("db1"), Optional.empty(), Optional.empty(), PAGE)) + .hasSize(1); + assertThat( + repository + .find(Optional.of("db1"), Optional.empty(), Optional.empty(), PAGE) + .get(0) + .getDatabaseName()) + .isEqualTo("db1"); } } From 49e43bc5aeb063e734b062c4223c2fd247fa1ef6 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 17:22:27 -0700 Subject: [PATCH 73/81] refactor(optimizer-service): use Optional repo API + configurable limit OptimizerDataServiceImpl pipes Optional filters straight through to the repo (no .orElse(null) at the boundary). Adds the optimizer.repo.default-limit config property and threads it into the list-shaped calls. Service-impl test updates the one direct statsHistoryRepository.find(...) call to pass Optional.empty(). Co-Authored-By: Claude Opus 4.7 --- .../service/OptimizerDataServiceImpl.java | 27 +++++++++++-------- .../src/main/resources/application.properties | 2 ++ .../service/OptimizerDataServiceImplTest.java | 2 +- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 1ca9c7777..4f820e1b8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -19,6 +19,7 @@ import java.util.UUID; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; +import org.springframework.beans.factory.annotation.Value; import org.springframework.data.domain.PageRequest; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -39,6 +40,9 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { private final TableStatsRepository statsRepository; private final TableStatsHistoryRepository statsHistoryRepository; + @Value("${optimizer.repo.default-limit:10000}") + private int defaultLimit; + // --- TableOperations --- @Override @@ -50,11 +54,14 @@ public List listTableOperations( Optional tableUuid) { return operationsRepository .find( - operationType.map(OperationTypeDto::toDb).orElse(null), - status.map(OperationStatusDto::toDb).orElse(null), - tableUuid.orElse(null), - databaseName.orElse(null), - tableName.orElse(null)) + operationType.map(OperationTypeDto::toDb), + status.map(OperationStatusDto::toDb), + tableUuid, + databaseName, + tableName, + Optional.empty(), + Optional.empty(), + PageRequest.of(0, defaultLimit)) .stream() .map(TableOperationDto::fromRow) .collect(Collectors.toList()); @@ -131,8 +138,8 @@ public Optional getTableStats(String tableUuid) { @Override public List listTableStats( Optional databaseName, Optional tableName, Optional tableUuid) { - return statsRepository - .find(databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null)).stream() + return statsRepository.find(databaseName, tableName, tableUuid, PageRequest.of(0, defaultLimit)) + .stream() .map(TableStatsDto::fromRow) .collect(Collectors.toList()); } @@ -140,8 +147,7 @@ public List listTableStats( @Override public List getStatsHistory( String tableUuid, Optional since, int limit) { - return statsHistoryRepository.find(tableUuid, since.orElse(null), PageRequest.of(0, limit)) - .stream() + return statsHistoryRepository.find(tableUuid, since, PageRequest.of(0, limit)).stream() .map(TableStatsHistoryDto::fromRow) .collect(Collectors.toList()); } @@ -162,8 +168,7 @@ public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto history @Override public List getHistory(String tableUuid, int limit) { - return historyRepository - .findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, limit)).stream() + return historyRepository.find(tableUuid, PageRequest.of(0, limit)).stream() .map(TableOperationsHistoryDto::fromRow) .collect(Collectors.toList()); } diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index e78745d00..1b7eb1a40 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -16,5 +16,7 @@ spring.datasource.username=${OPTIMIZER_DB_USER:oh_user} spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} spring.datasource.hikari.maximum-pool-size=20 +optimizer.repo.default-limit=10000 + management.endpoints.web.exposure.include=health,prometheus management.endpoint.health.enabled=true diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index e817e3fd5..8db14c4d6 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -126,7 +126,7 @@ void upsertTableStats_updatesExistingRow_andAppendsHistory() { assertThat(statsRepository.findAll()).hasSize(1); List history = - statsHistoryRepository.find(tableUuid, null, PageRequest.of(0, 100)); + statsHistoryRepository.find(tableUuid, Optional.empty(), PageRequest.of(0, 100)); assertThat(history).hasSize(2); assertThat(history.get(0).getDelta().getNumFilesAdded()).isEqualTo(3L); assertThat(history.get(1).getDelta().getNumFilesAdded()).isEqualTo(5L); From b69e09a511e684e30dc9a5adb1b8e26951c7190e Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 19:47:22 -0700 Subject: [PATCH 74/81] test(optimizer-repo): truncate Instant to micros for CI precision Instant.now() on Linux CI carries nanoseconds; MySQL TIMESTAMP(6) and H2 in MySQL mode store microseconds. The scheduledAt = :scheduledAt predicate in find(...) compared nano-resolution param against micro-resolution stored value and missed. Local (macOS, micro-only) hid the bug. Truncate to ChronoUnit.MICROS at write time in the one repo test that exercises the watermark round-trip. Co-Authored-By: Claude Opus 4.7 --- .../optimizer/repository/TableOperationsRepositoryTest.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index 8f46af1bf..072be5fd9 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -6,6 +6,7 @@ import com.linkedin.openhouse.optimizer.db.OperationType; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import java.time.Instant; +import java.time.temporal.ChronoUnit; import java.util.List; import java.util.Optional; import java.util.UUID; @@ -148,7 +149,10 @@ void find_byScheduledAtAndIds_resolvesClaimedSubset() { .scheduledAt(Instant.now().minusSeconds(60)) .build()); - Instant now = Instant.now(); + // Truncate to microseconds — MySQL TIMESTAMP(6) (and H2 in MySQL mode) stores microseconds, + // so a nano-precision now() round-trips lossily. On Linux CI Instant.now() carries nanos; + // truncating here keeps the watermark comparison exact across platforms. + Instant now = Instant.now().truncatedTo(ChronoUnit.MICROS); int transitioned = repository.updateBatch( List.of(idA, idB, idC), From a89e037dd41b9096271425099701b5011effb804 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 21 May 2026 16:36:45 -0700 Subject: [PATCH 75/81] feat(optimizer): require limit on list-API endpoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All four list-style endpoints now require a caller-supplied limit. No server-side default, no max — getting the API contract right comes first; bounds can land separately. - TableOperationsController.listTableOperations: @RequestParam int limit - TableStatsController.listTableStats: @RequestParam int limit - TableStatsController.getStatsHistory: drop defaultValue="100" - TableOperationsHistoryController.getHistory: drop defaultValue="100" - OptimizerDataService: listTableOperations / listTableStats gain int limit - OptimizerDataServiceImpl: drop @Value("${optimizer.repo.default-limit}") and the defaultLimit field; thread caller-supplied limit straight to PageRequest.of(0, limit), which cascades to MySQL LIMIT n. - application.properties: remove now-unused optimizer.repo.default-limit. Co-Authored-By: Claude Opus 4.7 --- .../controller/TableOperationsController.java | 10 ++++++---- .../TableOperationsHistoryController.java | 7 +++++-- .../api/controller/TableStatsController.java | 16 +++++++++------- .../optimizer/service/OptimizerDataService.java | 16 ++++++++++------ .../service/OptimizerDataServiceImpl.java | 16 ++++++++-------- .../src/main/resources/application.properties | 2 -- .../service/OptimizerDataServiceImplTest.java | 3 ++- 7 files changed, 40 insertions(+), 30 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index c28002bf7..5db7d31ed 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -59,8 +59,8 @@ public ResponseEntity getTableOperation(@PathVariable String id } /** - * List operations matching the given filters. All parameters are optional — omit all to return - * every row. + * List operations matching the given filters, capped at {@code limit} rows. Every filter is + * optional; {@code limit} is required so callers always state how much they want back. */ @GetMapping public ResponseEntity> listTableOperations( @@ -68,7 +68,8 @@ public ResponseEntity> listTableOperations( @RequestParam(required = false) OperationStatus status, @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, - @RequestParam(required = false) String tableUuid) { + @RequestParam(required = false) String tableUuid, + @RequestParam int limit) { List result = service .listTableOperations( @@ -76,7 +77,8 @@ public ResponseEntity> listTableOperations( Optional.ofNullable(status).map(OperationStatus::toModel), Optional.ofNullable(databaseName), Optional.ofNullable(tableName), - Optional.ofNullable(tableUuid)) + Optional.ofNullable(tableUuid), + limit) .stream() .map(TableOperations::fromModel) .collect(Collectors.toList()); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 36c422623..9a1b6d303 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -31,10 +31,13 @@ public ResponseEntity appendHistory( .body(TableOperationsHistory.fromModel(service.appendHistory(dto.toModel()))); } - /** Return the most recent history for a table, newest first, up to {@code limit} rows. */ + /** + * Return the most recent history for a table, newest first, capped at {@code limit} rows. {@code + * limit} is required. + */ @GetMapping("/{tableUuid}") public ResponseEntity> getHistory( - @PathVariable String tableUuid, @RequestParam(defaultValue = "100") int limit) { + @PathVariable String tableUuid, @RequestParam int limit) { List result = service.getHistory(tableUuid, limit).stream() .map(TableOperationsHistory::fromModel) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 7cb745250..049516110 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -48,20 +48,22 @@ public ResponseEntity getTableStats(@PathVariable String tableUuid) } /** - * List stats rows matching the given filters. All parameters are optional — omit all to return - * every row. + * List stats rows matching the given filters, capped at {@code limit} rows. Every filter is + * optional; {@code limit} is required so callers always state how much they want back. */ @GetMapping public ResponseEntity> listTableStats( @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, - @RequestParam(required = false) String tableUuid) { + @RequestParam(required = false) String tableUuid, + @RequestParam int limit) { List result = service .listTableStats( Optional.ofNullable(databaseName), Optional.ofNullable(tableName), - Optional.ofNullable(tableUuid)) + Optional.ofNullable(tableUuid), + limit) .stream() .map(TableStats::fromModel) .collect(Collectors.toList()); @@ -69,14 +71,14 @@ public ResponseEntity> listTableStats( } /** - * Return per-commit stats history for {@code tableUuid}, newest first. Optionally filter by - * {@code since} (inclusive) and cap at {@code limit} rows. + * Return per-commit stats history for {@code tableUuid}, newest first, capped at {@code limit} + * rows. Optional {@code since} filter (inclusive). {@code limit} is required. */ @GetMapping("/{tableUuid}/history") public ResponseEntity> getStatsHistory( @PathVariable String tableUuid, @RequestParam(required = false) Instant since, - @RequestParam(defaultValue = "100") int limit) { + @RequestParam int limit) { List result = service.getStatsHistory(tableUuid, Optional.ofNullable(since), limit).stream() .map(TableStatsHistory::fromModel) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index 0529d3608..c20ae7bf2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -23,15 +23,16 @@ public interface OptimizerDataService { // --- TableOperations --- /** - * List operations matching the given filters. Every parameter is optional — pass {@link - * Optional#empty()} to skip that filter. No filters returns all rows. + * List operations matching the given filters, capped at {@code limit} rows. Every filter + * parameter is optional — pass {@link Optional#empty()} to skip that filter. */ List listTableOperations( Optional operationType, Optional status, Optional databaseName, Optional tableName, - Optional tableUuid); + Optional tableUuid, + int limit); /** * Update an operation by writing a history entry. Looks up the operation row by {@code @@ -60,11 +61,14 @@ List listTableOperations( Optional getTableStats(String tableUuid); /** - * List stats rows matching the given filters. Every parameter is optional — pass {@link - * Optional#empty()} to skip that filter. No filters returns all rows. + * List stats rows matching the given filters, capped at {@code limit} rows. Every filter + * parameter is optional — pass {@link Optional#empty()} to skip that filter. */ List listTableStats( - Optional databaseName, Optional tableName, Optional tableUuid); + Optional databaseName, + Optional tableName, + Optional tableUuid, + int limit); /** * Return per-commit stats history for {@code tableUuid}, newest first. diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 4f820e1b8..29fd0eeee 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -19,7 +19,6 @@ import java.util.UUID; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; -import org.springframework.beans.factory.annotation.Value; import org.springframework.data.domain.PageRequest; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -40,9 +39,6 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { private final TableStatsRepository statsRepository; private final TableStatsHistoryRepository statsHistoryRepository; - @Value("${optimizer.repo.default-limit:10000}") - private int defaultLimit; - // --- TableOperations --- @Override @@ -51,7 +47,8 @@ public List listTableOperations( Optional status, Optional databaseName, Optional tableName, - Optional tableUuid) { + Optional tableUuid, + int limit) { return operationsRepository .find( operationType.map(OperationTypeDto::toDb), @@ -61,7 +58,7 @@ public List listTableOperations( tableName, Optional.empty(), Optional.empty(), - PageRequest.of(0, defaultLimit)) + PageRequest.of(0, limit)) .stream() .map(TableOperationDto::fromRow) .collect(Collectors.toList()); @@ -137,8 +134,11 @@ public Optional getTableStats(String tableUuid) { @Override public List listTableStats( - Optional databaseName, Optional tableName, Optional tableUuid) { - return statsRepository.find(databaseName, tableName, tableUuid, PageRequest.of(0, defaultLimit)) + Optional databaseName, + Optional tableName, + Optional tableUuid, + int limit) { + return statsRepository.find(databaseName, tableName, tableUuid, PageRequest.of(0, limit)) .stream() .map(TableStatsDto::fromRow) .collect(Collectors.toList()); diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index 1b7eb1a40..e78745d00 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -16,7 +16,5 @@ spring.datasource.username=${OPTIMIZER_DB_USER:oh_user} spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} spring.datasource.hikari.maximum-pool-size=20 -optimizer.repo.default-limit=10000 - management.endpoints.web.exposure.include=health,prometheus management.endpoint.health.enabled=true diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 8db14c4d6..2a3c1e676 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -165,7 +165,8 @@ void listTableOperations_filtersByOperationTypeAndStatus() { Optional.of(OperationStatusDto.PENDING), Optional.empty(), Optional.empty(), - Optional.empty())) + Optional.empty(), + 100)) .extracting(op -> op.getId()) .containsExactly(pendingId); } From 1e361afc3647c4b4570f828daeba73491ba0647c Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 22 May 2026 10:11:53 -0700 Subject: [PATCH 76/81] feat(optimizer): basic error-code handling across controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every optimizer endpoint now returns a uniform ApiError body ({code, message, path}) on non-2xx. Reshape updateOperation so the operationId lives in the URL (and is repeated in the body for self-describing payloads). - api/error/ApiError.java — DTO shape. - api/error/GlobalExceptionHandler.java — @RestControllerAdvice mapping framework exceptions to {VALIDATION_ERROR, INVALID_PARAMETER, MISSING_PARAMETER, MALFORMED_REQUEST, INTERNAL_ERROR}. Reason of ResponseStatusException is parsed as "CODE: message" for endpoint- specific 404s. - Controllers: orElseThrow(ResponseStatusException) replaces bare 404. TableOperationsController moves updateOperation to POST /v1/optimizer/operations/{id}/update; rejects with 400 PATH_BODY_MISMATCH when body.operationId != path.id. - UpdateOperationRequest: @NotBlank operationId, @NotNull status. - UpsertTableStatsRequest: @NotBlank databaseName, tableName. - spring-boot-starter-validation dep added. - New ControllerErrorHandlingTest: 13 MockMvc cases covering 404 / 400 validation / 400 type-mismatch / 400 missing-param / 400 malformed- body / 400 path-body-mismatch + happy-path sanity. Co-Authored-By: Claude Opus 4.7 --- services/optimizer/build.gradle | 1 + .../controller/TableOperationsController.java | 39 +++- .../TableOperationsHistoryController.java | 3 +- .../api/controller/TableStatsController.java | 11 +- .../optimizer/api/error/ApiError.java | 27 +++ .../api/error/GlobalExceptionHandler.java | 102 +++++++++ .../api/spec/UpdateOperationRequest.java | 15 +- .../api/spec/UpsertTableStatsRequest.java | 9 +- .../ControllerErrorHandlingTest.java | 216 ++++++++++++++++++ 9 files changed, 401 insertions(+), 22 deletions(-) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle index c05c7f9c3..c208cf330 100644 --- a/services/optimizer/build.gradle +++ b/services/optimizer/build.gradle @@ -7,6 +7,7 @@ dependencies { implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' + implementation 'org.springframework.boot:spring-boot-starter-validation:2.7.8' implementation 'mysql:mysql-connector-java:8.+' testImplementation 'com.h2database:h2:2.2.224' testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index 5db7d31ed..25fd8ab6c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -9,6 +9,7 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import javax.validation.Valid; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -19,6 +20,7 @@ import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.server.ResponseStatusException; /** REST controller for {@code table_operations}. */ @RestController @@ -29,23 +31,34 @@ public class TableOperationsController { private final OptimizerDataService service; /** - * Report an update to an operation. The body carries the {@code operationId} the caller is - * updating along with its terminal status. The backend looks up the operation row, writes a - * history entry with the operation's table metadata, and returns 201 Created with the history - * row, or 404 if the operation does not exist. + * Report an update to an operation. {@code id} comes from the URL; the body's {@code operationId} + * must match (the controller rejects mismatched requests with 400). The backend looks up the + * operation row, writes a history entry with the operation's table metadata, and returns 201 + * Created with the history row, or 404 if the operation does not exist. */ - @PostMapping("/update") + @PostMapping("/{id}/update") public ResponseEntity updateOperation( - @RequestBody UpdateOperationRequest request) { + @PathVariable String id, @Valid @RequestBody UpdateOperationRequest request) { + if (!id.equals(request.getOperationId())) { + throw new ResponseStatusException( + HttpStatus.BAD_REQUEST, + "PATH_BODY_MISMATCH: operationId in body ('" + + request.getOperationId() + + "') does not match path id ('" + + id + + "')"); + } return service - .updateOperation( - request.getOperationId(), - request.getStatus() == null ? null : request.getStatus().toModel()) + .updateOperation(id, request.getStatus().toModel()) .map( history -> ResponseEntity.status(HttpStatus.CREATED) .body(TableOperationsHistory.fromModel(history))) - .orElse(ResponseEntity.notFound().build()); + .orElseThrow( + () -> + new ResponseStatusException( + HttpStatus.NOT_FOUND, + "OPERATION_NOT_FOUND: no operation with id '" + id + "'")); } /** Fetch a single operation row by its ID, regardless of status. Returns 404 if not found. */ @@ -55,7 +68,11 @@ public ResponseEntity getTableOperation(@PathVariable String id .getTableOperation(id) .map(TableOperations::fromModel) .map(ResponseEntity::ok) - .orElse(ResponseEntity.notFound().build()); + .orElseThrow( + () -> + new ResponseStatusException( + HttpStatus.NOT_FOUND, + "OPERATION_NOT_FOUND: no operation with id '" + id + "'")); } /** diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 9a1b6d303..7a457d9cf 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -4,6 +4,7 @@ import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.stream.Collectors; +import javax.validation.Valid; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -26,7 +27,7 @@ public class TableOperationsHistoryController { /** Append a completed-job result. Called by the SparkJob after each run (success or failure). */ @PostMapping public ResponseEntity appendHistory( - @RequestBody TableOperationsHistory dto) { + @Valid @RequestBody TableOperationsHistory dto) { return ResponseEntity.status(HttpStatus.CREATED) .body(TableOperationsHistory.fromModel(service.appendHistory(dto.toModel()))); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 049516110..976d05e7f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -8,7 +8,9 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import javax.validation.Valid; import lombok.RequiredArgsConstructor; +import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; @@ -17,6 +19,7 @@ import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.server.ResponseStatusException; /** REST controller for managing per-table stats in the optimizer DB. */ @RestController @@ -32,7 +35,7 @@ public class TableStatsController { */ @PutMapping("/{tableUuid}") public ResponseEntity upsertTableStats( - @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { + @PathVariable String tableUuid, @Valid @RequestBody UpsertTableStatsRequest request) { return ResponseEntity.ok( TableStats.fromModel(service.upsertTableStats(request.toModel(tableUuid)))); } @@ -44,7 +47,11 @@ public ResponseEntity getTableStats(@PathVariable String tableUuid) .getTableStats(tableUuid) .map(TableStats::fromModel) .map(ResponseEntity::ok) - .orElse(ResponseEntity.notFound().build()); + .orElseThrow( + () -> + new ResponseStatusException( + HttpStatus.NOT_FOUND, + "STATS_NOT_FOUND: no stats for tableUuid '" + tableUuid + "'")); } /** diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java new file mode 100644 index 000000000..9018e1bbe --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java @@ -0,0 +1,27 @@ +package com.linkedin.openhouse.optimizer.api.error; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Uniform error response body returned by every optimizer endpoint on a non-2xx status. + * + *

Shape: + * + *

    + *
  • {@code code} — machine-readable identifier (e.g. {@code OPERATION_NOT_FOUND}). + *
  • {@code message} — human-readable explanation. + *
  • {@code path} — the request URI that triggered the error. + *
+ */ +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class ApiError { + private String code; + private String message; + private String path; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java new file mode 100644 index 000000000..00baf5bd9 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java @@ -0,0 +1,102 @@ +package com.linkedin.openhouse.optimizer.api.error; + +import javax.servlet.http.HttpServletRequest; +import lombok.extern.slf4j.Slf4j; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.http.converter.HttpMessageNotReadableException; +import org.springframework.web.bind.MethodArgumentNotValidException; +import org.springframework.web.bind.MissingServletRequestParameterException; +import org.springframework.web.bind.annotation.ExceptionHandler; +import org.springframework.web.bind.annotation.RestControllerAdvice; +import org.springframework.web.method.annotation.MethodArgumentTypeMismatchException; +import org.springframework.web.server.ResponseStatusException; + +/** + * Maps framework + service exceptions to {@link ApiError} bodies with consistent HTTP status codes + * across every optimizer endpoint. + * + *

Codes used: {@code VALIDATION_ERROR}, {@code INVALID_PARAMETER}, {@code MISSING_PARAMETER}, + * {@code MALFORMED_REQUEST}, {@code OPERATION_NOT_FOUND}, {@code STATS_NOT_FOUND}, {@code + * INTERNAL_ERROR}. Endpoint-specific 404 codes are passed through via {@link + * ResponseStatusException}'s {@code reason} field. + */ +@Slf4j +@RestControllerAdvice +public class GlobalExceptionHandler { + + @ExceptionHandler(MethodArgumentNotValidException.class) + public ResponseEntity handleValidation( + MethodArgumentNotValidException e, HttpServletRequest req) { + String message = + e.getBindingResult().getFieldErrors().stream() + .map(fe -> fe.getField() + ": " + fe.getDefaultMessage()) + .reduce((a, b) -> a + "; " + b) + .orElse(e.getMessage()); + return error(HttpStatus.BAD_REQUEST, "VALIDATION_ERROR", message, req); + } + + @ExceptionHandler(MethodArgumentTypeMismatchException.class) + public ResponseEntity handleTypeMismatch( + MethodArgumentTypeMismatchException e, HttpServletRequest req) { + String type = e.getRequiredType() == null ? "?" : e.getRequiredType().getSimpleName(); + return error( + HttpStatus.BAD_REQUEST, + "INVALID_PARAMETER", + "Parameter '" + + e.getName() + + "' has invalid value '" + + e.getValue() + + "' (expected " + + type + + ")", + req); + } + + @ExceptionHandler(MissingServletRequestParameterException.class) + public ResponseEntity handleMissingParam( + MissingServletRequestParameterException e, HttpServletRequest req) { + return error( + HttpStatus.BAD_REQUEST, + "MISSING_PARAMETER", + "Required parameter '" + e.getParameterName() + "' is missing", + req); + } + + @ExceptionHandler(HttpMessageNotReadableException.class) + public ResponseEntity handleMalformedBody( + HttpMessageNotReadableException e, HttpServletRequest req) { + return error( + HttpStatus.BAD_REQUEST, "MALFORMED_REQUEST", "Request body is missing or malformed", req); + } + + @ExceptionHandler(ResponseStatusException.class) + public ResponseEntity handleResponseStatus( + ResponseStatusException e, HttpServletRequest req) { + HttpStatus status = HttpStatus.resolve(e.getStatus().value()); + if (status == null) { + status = HttpStatus.INTERNAL_SERVER_ERROR; + } + String reason = e.getReason() == null ? status.getReasonPhrase() : e.getReason(); + // Convention: when callers throw ResponseStatusException, they pack a "CODE: human message" + // into the reason. If no colon is present, the whole reason becomes the message and the code + // defaults to the status name (e.g. NOT_FOUND). + int sep = reason.indexOf(':'); + String code = sep > 0 ? reason.substring(0, sep).trim() : status.name(); + String message = sep > 0 ? reason.substring(sep + 1).trim() : reason; + return error(status, code, message, req); + } + + @ExceptionHandler(Exception.class) + public ResponseEntity handleUncaught(Exception e, HttpServletRequest req) { + log.warn("Unhandled exception on {}: {}", req.getRequestURI(), e.toString(), e); + return error( + HttpStatus.INTERNAL_SERVER_ERROR, "INTERNAL_ERROR", "An unexpected error occurred", req); + } + + private static ResponseEntity error( + HttpStatus status, String code, String message, HttpServletRequest req) { + return ResponseEntity.status(status) + .body(ApiError.builder().code(code).message(message).path(req.getRequestURI()).build()); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java index a216e9db3..fe5bee516 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java @@ -1,5 +1,7 @@ package com.linkedin.openhouse.optimizer.api.spec; +import javax.validation.constraints.NotBlank; +import javax.validation.constraints.NotNull; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -27,11 +29,16 @@ @AllArgsConstructor public class UpdateOperationRequest { - /** Operation row's UUID — the primary lookup key. */ - private String operationId; + /** + * Operation row's UUID. Required. Must match the {@code {id}} path variable on {@code POST + * /v1/optimizer/operations/{id}/update} — the controller rejects mismatched requests with 400. + * Carrying it in the body keeps the payload self-describing for trace/log consumers that may not + * see the URL. + */ + @NotBlank private String operationId; - /** Terminal outcome for this single operation. */ - private HistoryStatus status; + /** Terminal outcome for this single operation. Required. */ + @NotNull private HistoryStatus status; /** Debug echo: stable table identity the caller believed it was completing. */ private String tableUuid; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java index d1b4a5fe2..9d2dadb0e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java @@ -2,6 +2,7 @@ import java.util.Collections; import java.util.Map; +import javax.validation.constraints.NotBlank; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -19,11 +20,11 @@ @AllArgsConstructor public class UpsertTableStatsRequest { - /** Denormalized database name for display. */ - private String databaseName; + /** Denormalized database name for display. Required. */ + @NotBlank private String databaseName; - /** Denormalized table name for display. */ - private String tableName; + /** Denormalized table name for display. Required. */ + @NotBlank private String tableName; /** Combined snapshot + delta stats payload from this commit. */ private TableStatsPayload stats; diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java new file mode 100644 index 000000000..97b63b06f --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java @@ -0,0 +1,216 @@ +package com.linkedin.openhouse.optimizer.api.controller; + +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.put; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; + +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.http.MediaType; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.transaction.annotation.Transactional; + +/** + * Exercises the {@code GlobalExceptionHandler} contract across all three controllers — every + * non-2xx response carries an {@link com.linkedin.openhouse.optimizer.api.error.ApiError} body with + * {@code code}, {@code message}, and {@code path}. + */ +@SpringBootTest +@AutoConfigureMockMvc +@ActiveProfiles("test") +@Transactional +class ControllerErrorHandlingTest { + + @Autowired MockMvc mockMvc; + @Autowired TableOperationsRepository operationsRepository; + + // --- /operations/{id}/update --- + + @Test + void updateOperation_notFound_returns404WithCode() throws Exception { + String id = UUID.randomUUID().toString(); + String body = "{\"operationId\":\"" + id + "\",\"status\":\"SUCCESS\"}"; + mockMvc + .perform( + post("/v1/optimizer/operations/" + id + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isNotFound()) + .andExpect(jsonPath("$.code").value("OPERATION_NOT_FOUND")) + .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString(id))) + .andExpect(jsonPath("$.path").value("/v1/optimizer/operations/" + id + "/update")); + } + + @Test + void updateOperation_pathBodyMismatch_returns400() throws Exception { + String pathId = UUID.randomUUID().toString(); + String bodyId = UUID.randomUUID().toString(); + String body = "{\"operationId\":\"" + bodyId + "\",\"status\":\"SUCCESS\"}"; + mockMvc + .perform( + post("/v1/optimizer/operations/" + pathId + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("PATH_BODY_MISMATCH")); + } + + @Test + void updateOperation_missingStatus_returns400Validation() throws Exception { + String id = UUID.randomUUID().toString(); + String body = "{\"operationId\":\"" + id + "\"}"; + mockMvc + .perform( + post("/v1/optimizer/operations/" + id + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("VALIDATION_ERROR")) + .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString("status"))); + } + + @Test + void updateOperation_missingOperationId_returns400Validation() throws Exception { + String pathId = UUID.randomUUID().toString(); + String body = "{\"status\":\"SUCCESS\"}"; + mockMvc + .perform( + post("/v1/optimizer/operations/" + pathId + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("VALIDATION_ERROR")) + .andExpect( + jsonPath("$.message").value(org.hamcrest.Matchers.containsString("operationId"))); + } + + @Test + void updateOperation_malformedJson_returns400Malformed() throws Exception { + String pathId = UUID.randomUUID().toString(); + mockMvc + .perform( + post("/v1/optimizer/operations/" + pathId + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content("not json")) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("MALFORMED_REQUEST")); + } + + // --- /operations/{id} --- + + @Test + void getTableOperation_notFound_returns404WithCode() throws Exception { + String id = UUID.randomUUID().toString(); + mockMvc + .perform(get("/v1/optimizer/operations/" + id)) + .andExpect(status().isNotFound()) + .andExpect(jsonPath("$.code").value("OPERATION_NOT_FOUND")) + .andExpect(jsonPath("$.path").value("/v1/optimizer/operations/" + id)); + } + + // --- /operations (list) --- + + @Test + void listOperations_missingLimit_returns400Missing() throws Exception { + mockMvc + .perform(get("/v1/optimizer/operations")) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("MISSING_PARAMETER")) + .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString("limit"))); + } + + @Test + void listOperations_badLimit_returns400TypeMismatch() throws Exception { + mockMvc + .perform(get("/v1/optimizer/operations").param("limit", "abc")) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("INVALID_PARAMETER")) + .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString("limit"))); + } + + @Test + void listOperations_badEnum_returns400() throws Exception { + mockMvc + .perform(get("/v1/optimizer/operations").param("status", "BOGUS").param("limit", "10")) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("INVALID_PARAMETER")); + } + + // --- /stats/{tableUuid} --- + + @Test + void getTableStats_notFound_returns404WithCode() throws Exception { + String uuid = UUID.randomUUID().toString(); + mockMvc + .perform(get("/v1/optimizer/stats/" + uuid)) + .andExpect(status().isNotFound()) + .andExpect(jsonPath("$.code").value("STATS_NOT_FOUND")); + } + + // --- /stats (upsert) --- + + @Test + void upsertStats_missingRequiredField_returns400Validation() throws Exception { + String uuid = UUID.randomUUID().toString(); + String body = "{\"tableName\":\"tbl1\"}"; // databaseName missing + mockMvc + .perform( + put("/v1/optimizer/stats/" + uuid) + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("VALIDATION_ERROR")) + .andExpect( + jsonPath("$.message").value(org.hamcrest.Matchers.containsString("databaseName"))); + } + + // --- /stats/{tableUuid}/history --- + + @Test + void getStatsHistory_badSince_returns400() throws Exception { + String uuid = UUID.randomUUID().toString(); + mockMvc + .perform( + get("/v1/optimizer/stats/" + uuid + "/history") + .param("since", "not-a-date") + .param("limit", "10")) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("INVALID_PARAMETER")); + } + + // --- happy path sanity --- + + @Test + void updateOperation_happyPath_stillReturns201() throws Exception { + String id = UUID.randomUUID().toString(); + operationsRepository.save( + TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED) + .createdAt(java.time.Instant.now()) + .scheduledAt(java.time.Instant.now()) + .jobId("job-x") + .build()); + String body = "{\"operationId\":\"" + id + "\",\"status\":\"SUCCESS\"}"; + mockMvc + .perform( + post("/v1/optimizer/operations/" + id + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isCreated()) + .andExpect(jsonPath("$.status").value("SUCCESS")); + } +} From a37169d8a977fc4faec0212c14e855b0ab07d348 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 22 May 2026 10:47:01 -0700 Subject: [PATCH 77/81] refactor(optimizer): simplify error handling per PR review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review comments on PR #596 — minimal diff, no bean validation, no per-framework-exception handlers, scoped to the optimizer controllers. - @RestControllerAdvice scoped to the three optimizer controllers via assignableTypes; no longer global. - Two handlers only: ResponseStatusException → ApiError with code = status.name() + reason as message; Exception → 500 INTERNAL_ERROR. Drop MethodArgumentNotValidException, MethodArgumentTypeMismatchException, MissingServletRequestParameterException, HttpMessageNotReadableException — Spring's defaults handle those. - Drop the "CODE: message" reason-parsing convention. - Drop @NotBlank / @NotNull on UpdateOperationRequest and UpsertTableStatsRequest; drop @Valid on controllers; drop spring-boot-starter-validation dep. Validate operationId / status server- side in TableOperationsController.updateOperation — loose-coupling so relaxing required fields later doesn't break wire callers. - String.format throughout; no message concatenation. - ControllerErrorHandlingTest trimmed from 13 cases to 7: only what the controllers actually own (404s, server-side validation on updateOperation, happy-path sanity). Framework-level 4xx left to Spring. Co-Authored-By: Claude Opus 4.7 --- services/optimizer/build.gradle | 1 - .../controller/TableOperationsController.java | 27 ++-- .../TableOperationsHistoryController.java | 3 +- .../api/controller/TableStatsController.java | 6 +- .../api/error/GlobalExceptionHandler.java | 103 ++++--------- .../api/spec/UpdateOperationRequest.java | 13 +- .../api/spec/UpsertTableStatsRequest.java | 9 +- .../ControllerErrorHandlingTest.java | 135 ++++-------------- 8 files changed, 86 insertions(+), 211 deletions(-) diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle index c208cf330..c05c7f9c3 100644 --- a/services/optimizer/build.gradle +++ b/services/optimizer/build.gradle @@ -7,7 +7,6 @@ dependencies { implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' - implementation 'org.springframework.boot:spring-boot-starter-validation:2.7.8' implementation 'mysql:mysql-connector-java:8.+' testImplementation 'com.h2database:h2:2.2.224' testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index 25fd8ab6c..2f6f62e4b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -7,12 +7,13 @@ import com.linkedin.openhouse.optimizer.api.spec.UpdateOperationRequest; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; -import javax.validation.Valid; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; +import org.springframework.util.StringUtils; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PostMapping; @@ -38,15 +39,19 @@ public class TableOperationsController { */ @PostMapping("/{id}/update") public ResponseEntity updateOperation( - @PathVariable String id, @Valid @RequestBody UpdateOperationRequest request) { - if (!id.equals(request.getOperationId())) { + @PathVariable String id, @RequestBody UpdateOperationRequest request) { + if (!StringUtils.hasText(request.getOperationId())) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "operationId is required"); + } + if (!Objects.equals(id, request.getOperationId())) { throw new ResponseStatusException( HttpStatus.BAD_REQUEST, - "PATH_BODY_MISMATCH: operationId in body ('" - + request.getOperationId() - + "') does not match path id ('" - + id - + "')"); + String.format( + "operationId in body (%s) does not match path id (%s)", + request.getOperationId(), id)); + } + if (request.getStatus() == null) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "status is required"); } return service .updateOperation(id, request.getStatus().toModel()) @@ -57,8 +62,7 @@ public ResponseEntity updateOperation( .orElseThrow( () -> new ResponseStatusException( - HttpStatus.NOT_FOUND, - "OPERATION_NOT_FOUND: no operation with id '" + id + "'")); + HttpStatus.NOT_FOUND, String.format("no operation with id %s", id))); } /** Fetch a single operation row by its ID, regardless of status. Returns 404 if not found. */ @@ -71,8 +75,7 @@ public ResponseEntity getTableOperation(@PathVariable String id .orElseThrow( () -> new ResponseStatusException( - HttpStatus.NOT_FOUND, - "OPERATION_NOT_FOUND: no operation with id '" + id + "'")); + HttpStatus.NOT_FOUND, String.format("no operation with id %s", id))); } /** diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 7a457d9cf..9a1b6d303 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -4,7 +4,6 @@ import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.stream.Collectors; -import javax.validation.Valid; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -27,7 +26,7 @@ public class TableOperationsHistoryController { /** Append a completed-job result. Called by the SparkJob after each run (success or failure). */ @PostMapping public ResponseEntity appendHistory( - @Valid @RequestBody TableOperationsHistory dto) { + @RequestBody TableOperationsHistory dto) { return ResponseEntity.status(HttpStatus.CREATED) .body(TableOperationsHistory.fromModel(service.appendHistory(dto.toModel()))); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 976d05e7f..ca8db4d51 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -8,7 +8,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import javax.validation.Valid; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -35,7 +34,7 @@ public class TableStatsController { */ @PutMapping("/{tableUuid}") public ResponseEntity upsertTableStats( - @PathVariable String tableUuid, @Valid @RequestBody UpsertTableStatsRequest request) { + @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { return ResponseEntity.ok( TableStats.fromModel(service.upsertTableStats(request.toModel(tableUuid)))); } @@ -50,8 +49,7 @@ public ResponseEntity getTableStats(@PathVariable String tableUuid) .orElseThrow( () -> new ResponseStatusException( - HttpStatus.NOT_FOUND, - "STATS_NOT_FOUND: no stats for tableUuid '" + tableUuid + "'")); + HttpStatus.NOT_FOUND, String.format("no stats for tableUuid %s", tableUuid))); } /** diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java index 00baf5bd9..d47dd3911 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java @@ -1,75 +1,31 @@ package com.linkedin.openhouse.optimizer.api.error; +import com.linkedin.openhouse.optimizer.api.controller.TableOperationsController; +import com.linkedin.openhouse.optimizer.api.controller.TableOperationsHistoryController; +import com.linkedin.openhouse.optimizer.api.controller.TableStatsController; import javax.servlet.http.HttpServletRequest; import lombok.extern.slf4j.Slf4j; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; -import org.springframework.http.converter.HttpMessageNotReadableException; -import org.springframework.web.bind.MethodArgumentNotValidException; -import org.springframework.web.bind.MissingServletRequestParameterException; import org.springframework.web.bind.annotation.ExceptionHandler; import org.springframework.web.bind.annotation.RestControllerAdvice; -import org.springframework.web.method.annotation.MethodArgumentTypeMismatchException; import org.springframework.web.server.ResponseStatusException; /** - * Maps framework + service exceptions to {@link ApiError} bodies with consistent HTTP status codes - * across every optimizer endpoint. - * - *

Codes used: {@code VALIDATION_ERROR}, {@code INVALID_PARAMETER}, {@code MISSING_PARAMETER}, - * {@code MALFORMED_REQUEST}, {@code OPERATION_NOT_FOUND}, {@code STATS_NOT_FOUND}, {@code - * INTERNAL_ERROR}. Endpoint-specific 404 codes are passed through via {@link - * ResponseStatusException}'s {@code reason} field. + * Scoped to the optimizer REST controllers. Two cases only: pass through any {@link + * ResponseStatusException} that a controller threw, and convert any other uncaught exception into a + * 500. Framework-level 4xx responses (missing query param, malformed body, etc.) are left to + * Spring's defaults — this advice intentionally does not blanket every possible exception type. */ @Slf4j -@RestControllerAdvice +@RestControllerAdvice( + assignableTypes = { + TableOperationsController.class, + TableOperationsHistoryController.class, + TableStatsController.class + }) public class GlobalExceptionHandler { - @ExceptionHandler(MethodArgumentNotValidException.class) - public ResponseEntity handleValidation( - MethodArgumentNotValidException e, HttpServletRequest req) { - String message = - e.getBindingResult().getFieldErrors().stream() - .map(fe -> fe.getField() + ": " + fe.getDefaultMessage()) - .reduce((a, b) -> a + "; " + b) - .orElse(e.getMessage()); - return error(HttpStatus.BAD_REQUEST, "VALIDATION_ERROR", message, req); - } - - @ExceptionHandler(MethodArgumentTypeMismatchException.class) - public ResponseEntity handleTypeMismatch( - MethodArgumentTypeMismatchException e, HttpServletRequest req) { - String type = e.getRequiredType() == null ? "?" : e.getRequiredType().getSimpleName(); - return error( - HttpStatus.BAD_REQUEST, - "INVALID_PARAMETER", - "Parameter '" - + e.getName() - + "' has invalid value '" - + e.getValue() - + "' (expected " - + type - + ")", - req); - } - - @ExceptionHandler(MissingServletRequestParameterException.class) - public ResponseEntity handleMissingParam( - MissingServletRequestParameterException e, HttpServletRequest req) { - return error( - HttpStatus.BAD_REQUEST, - "MISSING_PARAMETER", - "Required parameter '" + e.getParameterName() + "' is missing", - req); - } - - @ExceptionHandler(HttpMessageNotReadableException.class) - public ResponseEntity handleMalformedBody( - HttpMessageNotReadableException e, HttpServletRequest req) { - return error( - HttpStatus.BAD_REQUEST, "MALFORMED_REQUEST", "Request body is missing or malformed", req); - } - @ExceptionHandler(ResponseStatusException.class) public ResponseEntity handleResponseStatus( ResponseStatusException e, HttpServletRequest req) { @@ -77,26 +33,25 @@ public ResponseEntity handleResponseStatus( if (status == null) { status = HttpStatus.INTERNAL_SERVER_ERROR; } - String reason = e.getReason() == null ? status.getReasonPhrase() : e.getReason(); - // Convention: when callers throw ResponseStatusException, they pack a "CODE: human message" - // into the reason. If no colon is present, the whole reason becomes the message and the code - // defaults to the status name (e.g. NOT_FOUND). - int sep = reason.indexOf(':'); - String code = sep > 0 ? reason.substring(0, sep).trim() : status.name(); - String message = sep > 0 ? reason.substring(sep + 1).trim() : reason; - return error(status, code, message, req); + String message = e.getReason() == null ? status.getReasonPhrase() : e.getReason(); + return ResponseEntity.status(status) + .body( + ApiError.builder() + .code(status.name()) + .message(message) + .path(req.getRequestURI()) + .build()); } @ExceptionHandler(Exception.class) public ResponseEntity handleUncaught(Exception e, HttpServletRequest req) { - log.warn("Unhandled exception on {}: {}", req.getRequestURI(), e.toString(), e); - return error( - HttpStatus.INTERNAL_SERVER_ERROR, "INTERNAL_ERROR", "An unexpected error occurred", req); - } - - private static ResponseEntity error( - HttpStatus status, String code, String message, HttpServletRequest req) { - return ResponseEntity.status(status) - .body(ApiError.builder().code(code).message(message).path(req.getRequestURI()).build()); + log.warn(String.format("Unhandled exception on %s", req.getRequestURI()), e); + return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR) + .body( + ApiError.builder() + .code("INTERNAL_ERROR") + .message("An unexpected error occurred") + .path(req.getRequestURI()) + .build()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java index fe5bee516..fcae718ad 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java @@ -1,7 +1,5 @@ package com.linkedin.openhouse.optimizer.api.spec; -import javax.validation.constraints.NotBlank; -import javax.validation.constraints.NotNull; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -30,15 +28,16 @@ public class UpdateOperationRequest { /** - * Operation row's UUID. Required. Must match the {@code {id}} path variable on {@code POST + * Operation row's UUID. Must match the {@code {id}} path variable on {@code POST * /v1/optimizer/operations/{id}/update} — the controller rejects mismatched requests with 400. * Carrying it in the body keeps the payload self-describing for trace/log consumers that may not - * see the URL. + * see the URL. Validated server-side (no bean-validation annotation) so that future relaxation + * does not break clients on the wire contract. */ - @NotBlank private String operationId; + private String operationId; - /** Terminal outcome for this single operation. Required. */ - @NotNull private HistoryStatus status; + /** Terminal outcome for this single operation. Validated server-side. */ + private HistoryStatus status; /** Debug echo: stable table identity the caller believed it was completing. */ private String tableUuid; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java index 9d2dadb0e..d1b4a5fe2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java @@ -2,7 +2,6 @@ import java.util.Collections; import java.util.Map; -import javax.validation.constraints.NotBlank; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -20,11 +19,11 @@ @AllArgsConstructor public class UpsertTableStatsRequest { - /** Denormalized database name for display. Required. */ - @NotBlank private String databaseName; + /** Denormalized database name for display. */ + private String databaseName; - /** Denormalized table name for display. Required. */ - @NotBlank private String tableName; + /** Denormalized table name for display. */ + private String tableName; /** Combined snapshot + delta stats payload from this commit. */ private TableStatsPayload stats; diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java index 97b63b06f..59d793441 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java @@ -2,13 +2,13 @@ import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; -import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.put; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; import com.linkedin.openhouse.optimizer.db.OperationType; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import java.time.Instant; import java.util.UUID; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -20,9 +20,9 @@ import org.springframework.transaction.annotation.Transactional; /** - * Exercises the {@code GlobalExceptionHandler} contract across all three controllers — every - * non-2xx response carries an {@link com.linkedin.openhouse.optimizer.api.error.ApiError} body with - * {@code code}, {@code message}, and {@code path}. + * Exercises what the controllers own: server-side validation on {@code updateOperation}, 404 on + * missing rows, and the {@code ApiError} body shape. Framework-level 4xx (missing query param, + * malformed JSON, etc.) is left to Spring's defaults and not asserted here. */ @SpringBootTest @AutoConfigureMockMvc @@ -33,19 +33,17 @@ class ControllerErrorHandlingTest { @Autowired MockMvc mockMvc; @Autowired TableOperationsRepository operationsRepository; - // --- /operations/{id}/update --- - @Test - void updateOperation_notFound_returns404WithCode() throws Exception { + void updateOperation_notFound_returns404WithApiError() throws Exception { String id = UUID.randomUUID().toString(); - String body = "{\"operationId\":\"" + id + "\",\"status\":\"SUCCESS\"}"; + String body = String.format("{\"operationId\":\"%s\",\"status\":\"SUCCESS\"}", id); mockMvc .perform( post("/v1/optimizer/operations/" + id + "/update") .contentType(MediaType.APPLICATION_JSON) .content(body)) .andExpect(status().isNotFound()) - .andExpect(jsonPath("$.code").value("OPERATION_NOT_FOUND")) + .andExpect(jsonPath("$.code").value("NOT_FOUND")) .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString(id))) .andExpect(jsonPath("$.path").value("/v1/optimizer/operations/" + id + "/update")); } @@ -54,32 +52,21 @@ void updateOperation_notFound_returns404WithCode() throws Exception { void updateOperation_pathBodyMismatch_returns400() throws Exception { String pathId = UUID.randomUUID().toString(); String bodyId = UUID.randomUUID().toString(); - String body = "{\"operationId\":\"" + bodyId + "\",\"status\":\"SUCCESS\"}"; + String body = String.format("{\"operationId\":\"%s\",\"status\":\"SUCCESS\"}", bodyId); mockMvc .perform( post("/v1/optimizer/operations/" + pathId + "/update") .contentType(MediaType.APPLICATION_JSON) .content(body)) .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("PATH_BODY_MISMATCH")); - } - - @Test - void updateOperation_missingStatus_returns400Validation() throws Exception { - String id = UUID.randomUUID().toString(); - String body = "{\"operationId\":\"" + id + "\"}"; - mockMvc - .perform( - post("/v1/optimizer/operations/" + id + "/update") - .contentType(MediaType.APPLICATION_JSON) - .content(body)) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("VALIDATION_ERROR")) - .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString("status"))); + .andExpect(jsonPath("$.code").value("BAD_REQUEST")) + .andExpect( + jsonPath("$.message") + .value(org.hamcrest.Matchers.containsString("does not match path id"))); } @Test - void updateOperation_missingOperationId_returns400Validation() throws Exception { + void updateOperation_missingOperationId_returns400() throws Exception { String pathId = UUID.randomUUID().toString(); String body = "{\"status\":\"SUCCESS\"}"; mockMvc @@ -88,107 +75,43 @@ void updateOperation_missingOperationId_returns400Validation() throws Exception .contentType(MediaType.APPLICATION_JSON) .content(body)) .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("VALIDATION_ERROR")) - .andExpect( - jsonPath("$.message").value(org.hamcrest.Matchers.containsString("operationId"))); + .andExpect(jsonPath("$.code").value("BAD_REQUEST")) + .andExpect(jsonPath("$.message").value("operationId is required")); } @Test - void updateOperation_malformedJson_returns400Malformed() throws Exception { - String pathId = UUID.randomUUID().toString(); + void updateOperation_missingStatus_returns400() throws Exception { + String id = UUID.randomUUID().toString(); + String body = String.format("{\"operationId\":\"%s\"}", id); mockMvc .perform( - post("/v1/optimizer/operations/" + pathId + "/update") + post("/v1/optimizer/operations/" + id + "/update") .contentType(MediaType.APPLICATION_JSON) - .content("not json")) + .content(body)) .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("MALFORMED_REQUEST")); + .andExpect(jsonPath("$.code").value("BAD_REQUEST")) + .andExpect(jsonPath("$.message").value("status is required")); } - // --- /operations/{id} --- - @Test - void getTableOperation_notFound_returns404WithCode() throws Exception { + void getTableOperation_notFound_returns404WithApiError() throws Exception { String id = UUID.randomUUID().toString(); mockMvc .perform(get("/v1/optimizer/operations/" + id)) .andExpect(status().isNotFound()) - .andExpect(jsonPath("$.code").value("OPERATION_NOT_FOUND")) + .andExpect(jsonPath("$.code").value("NOT_FOUND")) .andExpect(jsonPath("$.path").value("/v1/optimizer/operations/" + id)); } - // --- /operations (list) --- - - @Test - void listOperations_missingLimit_returns400Missing() throws Exception { - mockMvc - .perform(get("/v1/optimizer/operations")) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("MISSING_PARAMETER")) - .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString("limit"))); - } - - @Test - void listOperations_badLimit_returns400TypeMismatch() throws Exception { - mockMvc - .perform(get("/v1/optimizer/operations").param("limit", "abc")) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("INVALID_PARAMETER")) - .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString("limit"))); - } - - @Test - void listOperations_badEnum_returns400() throws Exception { - mockMvc - .perform(get("/v1/optimizer/operations").param("status", "BOGUS").param("limit", "10")) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("INVALID_PARAMETER")); - } - - // --- /stats/{tableUuid} --- - @Test - void getTableStats_notFound_returns404WithCode() throws Exception { + void getTableStats_notFound_returns404WithApiError() throws Exception { String uuid = UUID.randomUUID().toString(); mockMvc .perform(get("/v1/optimizer/stats/" + uuid)) .andExpect(status().isNotFound()) - .andExpect(jsonPath("$.code").value("STATS_NOT_FOUND")); - } - - // --- /stats (upsert) --- - - @Test - void upsertStats_missingRequiredField_returns400Validation() throws Exception { - String uuid = UUID.randomUUID().toString(); - String body = "{\"tableName\":\"tbl1\"}"; // databaseName missing - mockMvc - .perform( - put("/v1/optimizer/stats/" + uuid) - .contentType(MediaType.APPLICATION_JSON) - .content(body)) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("VALIDATION_ERROR")) - .andExpect( - jsonPath("$.message").value(org.hamcrest.Matchers.containsString("databaseName"))); - } - - // --- /stats/{tableUuid}/history --- - - @Test - void getStatsHistory_badSince_returns400() throws Exception { - String uuid = UUID.randomUUID().toString(); - mockMvc - .perform( - get("/v1/optimizer/stats/" + uuid + "/history") - .param("since", "not-a-date") - .param("limit", "10")) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("INVALID_PARAMETER")); + .andExpect(jsonPath("$.code").value("NOT_FOUND")); } - // --- happy path sanity --- - @Test void updateOperation_happyPath_stillReturns201() throws Exception { String id = UUID.randomUUID().toString(); @@ -200,11 +123,11 @@ void updateOperation_happyPath_stillReturns201() throws Exception { .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) .status(com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED) - .createdAt(java.time.Instant.now()) - .scheduledAt(java.time.Instant.now()) + .createdAt(Instant.now()) + .scheduledAt(Instant.now()) .jobId("job-x") .build()); - String body = "{\"operationId\":\"" + id + "\",\"status\":\"SUCCESS\"}"; + String body = String.format("{\"operationId\":\"%s\",\"status\":\"SUCCESS\"}", id); mockMvc .perform( post("/v1/optimizer/operations/" + id + "/update") From 6416c9dfce21ed02561d8ab104802eb1b760d043 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 22 May 2026 11:41:38 -0700 Subject: [PATCH 78/81] refactor(optimizer): drop GlobalExceptionHandler + ApiError; use Spring defaults The custom advice was producing a body shape (ApiError {code, message, path}) that duplicated Spring Boot's default error JSON ({timestamp, status, error, message, path}). The only substantive difference was that Spring Boot 2.7 omits the `message` field by default. Replace the custom advice with a one-line config: server.error.include-message=always Now ResponseStatusException reasons (e.g. "no operation with id X") reach the caller via Spring's default error body, no custom code. - Delete api/error/GlobalExceptionHandler.java - Delete api/error/ApiError.java - application.properties: server.error.include-message=always - ControllerErrorHandlingTest assertions trimmed to status-code-only (MockMvc does not trigger Spring's error-dispatch to BasicErrorController, so body assertions cannot be made in tests even though the body is populated on real HTTP requests). Co-Authored-By: Claude Opus 4.7 --- .../optimizer/api/error/ApiError.java | 27 --------- .../api/error/GlobalExceptionHandler.java | 57 ------------------- .../src/main/resources/application.properties | 5 ++ .../ControllerErrorHandlingTest.java | 47 ++++++--------- 4 files changed, 21 insertions(+), 115 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java deleted file mode 100644 index 9018e1bbe..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.error; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Uniform error response body returned by every optimizer endpoint on a non-2xx status. - * - *

Shape: - * - *

    - *
  • {@code code} — machine-readable identifier (e.g. {@code OPERATION_NOT_FOUND}). - *
  • {@code message} — human-readable explanation. - *
  • {@code path} — the request URI that triggered the error. - *
- */ -@Data -@Builder -@AllArgsConstructor -@NoArgsConstructor -public class ApiError { - private String code; - private String message; - private String path; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java deleted file mode 100644 index d47dd3911..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.error; - -import com.linkedin.openhouse.optimizer.api.controller.TableOperationsController; -import com.linkedin.openhouse.optimizer.api.controller.TableOperationsHistoryController; -import com.linkedin.openhouse.optimizer.api.controller.TableStatsController; -import javax.servlet.http.HttpServletRequest; -import lombok.extern.slf4j.Slf4j; -import org.springframework.http.HttpStatus; -import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.ExceptionHandler; -import org.springframework.web.bind.annotation.RestControllerAdvice; -import org.springframework.web.server.ResponseStatusException; - -/** - * Scoped to the optimizer REST controllers. Two cases only: pass through any {@link - * ResponseStatusException} that a controller threw, and convert any other uncaught exception into a - * 500. Framework-level 4xx responses (missing query param, malformed body, etc.) are left to - * Spring's defaults — this advice intentionally does not blanket every possible exception type. - */ -@Slf4j -@RestControllerAdvice( - assignableTypes = { - TableOperationsController.class, - TableOperationsHistoryController.class, - TableStatsController.class - }) -public class GlobalExceptionHandler { - - @ExceptionHandler(ResponseStatusException.class) - public ResponseEntity handleResponseStatus( - ResponseStatusException e, HttpServletRequest req) { - HttpStatus status = HttpStatus.resolve(e.getStatus().value()); - if (status == null) { - status = HttpStatus.INTERNAL_SERVER_ERROR; - } - String message = e.getReason() == null ? status.getReasonPhrase() : e.getReason(); - return ResponseEntity.status(status) - .body( - ApiError.builder() - .code(status.name()) - .message(message) - .path(req.getRequestURI()) - .build()); - } - - @ExceptionHandler(Exception.class) - public ResponseEntity handleUncaught(Exception e, HttpServletRequest req) { - log.warn(String.format("Unhandled exception on %s", req.getRequestURI()), e); - return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR) - .body( - ApiError.builder() - .code("INTERNAL_ERROR") - .message("An unexpected error occurred") - .path(req.getRequestURI()) - .build()); - } -} diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index e78745d00..e7f082b47 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -18,3 +18,8 @@ spring.datasource.hikari.maximum-pool-size=20 management.endpoints.web.exposure.include=health,prometheus management.endpoint.health.enabled=true + +# Include ResponseStatusException.reason in the default error response body. Without this, Spring +# Boot 2.7 omits the `message` field, and the human-readable detail from a thrown +# ResponseStatusException never reaches the caller. +server.error.include-message=always diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java index 59d793441..b9c8dc3dc 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java @@ -20,9 +20,12 @@ import org.springframework.transaction.annotation.Transactional; /** - * Exercises what the controllers own: server-side validation on {@code updateOperation}, 404 on - * missing rows, and the {@code ApiError} body shape. Framework-level 4xx (missing query param, - * malformed JSON, etc.) is left to Spring's defaults and not asserted here. + * Exercises what the controllers own: server-side validation on {@code updateOperation} (path/body + * mismatch, missing fields) and 404s on missing rows. Assertions are status-code-only: MockMvc does + * not trigger Spring's error-dispatch to {@code BasicErrorController}, so the response body of a + * {@link org.springframework.web.server.ResponseStatusException} is empty in tests even though it + * is populated in production (with {@code server.error.include-message=always}). Framework-level + * 4xx (missing query param, malformed JSON, etc.) is left to Spring's defaults and not asserted. */ @SpringBootTest @AutoConfigureMockMvc @@ -34,7 +37,7 @@ class ControllerErrorHandlingTest { @Autowired TableOperationsRepository operationsRepository; @Test - void updateOperation_notFound_returns404WithApiError() throws Exception { + void updateOperation_notFound_returns404() throws Exception { String id = UUID.randomUUID().toString(); String body = String.format("{\"operationId\":\"%s\",\"status\":\"SUCCESS\"}", id); mockMvc @@ -42,10 +45,7 @@ void updateOperation_notFound_returns404WithApiError() throws Exception { post("/v1/optimizer/operations/" + id + "/update") .contentType(MediaType.APPLICATION_JSON) .content(body)) - .andExpect(status().isNotFound()) - .andExpect(jsonPath("$.code").value("NOT_FOUND")) - .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString(id))) - .andExpect(jsonPath("$.path").value("/v1/optimizer/operations/" + id + "/update")); + .andExpect(status().isNotFound()); } @Test @@ -58,11 +58,7 @@ void updateOperation_pathBodyMismatch_returns400() throws Exception { post("/v1/optimizer/operations/" + pathId + "/update") .contentType(MediaType.APPLICATION_JSON) .content(body)) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("BAD_REQUEST")) - .andExpect( - jsonPath("$.message") - .value(org.hamcrest.Matchers.containsString("does not match path id"))); + .andExpect(status().isBadRequest()); } @Test @@ -74,9 +70,7 @@ void updateOperation_missingOperationId_returns400() throws Exception { post("/v1/optimizer/operations/" + pathId + "/update") .contentType(MediaType.APPLICATION_JSON) .content(body)) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("BAD_REQUEST")) - .andExpect(jsonPath("$.message").value("operationId is required")); + .andExpect(status().isBadRequest()); } @Test @@ -88,32 +82,23 @@ void updateOperation_missingStatus_returns400() throws Exception { post("/v1/optimizer/operations/" + id + "/update") .contentType(MediaType.APPLICATION_JSON) .content(body)) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("BAD_REQUEST")) - .andExpect(jsonPath("$.message").value("status is required")); + .andExpect(status().isBadRequest()); } @Test - void getTableOperation_notFound_returns404WithApiError() throws Exception { + void getTableOperation_notFound_returns404() throws Exception { String id = UUID.randomUUID().toString(); - mockMvc - .perform(get("/v1/optimizer/operations/" + id)) - .andExpect(status().isNotFound()) - .andExpect(jsonPath("$.code").value("NOT_FOUND")) - .andExpect(jsonPath("$.path").value("/v1/optimizer/operations/" + id)); + mockMvc.perform(get("/v1/optimizer/operations/" + id)).andExpect(status().isNotFound()); } @Test - void getTableStats_notFound_returns404WithApiError() throws Exception { + void getTableStats_notFound_returns404() throws Exception { String uuid = UUID.randomUUID().toString(); - mockMvc - .perform(get("/v1/optimizer/stats/" + uuid)) - .andExpect(status().isNotFound()) - .andExpect(jsonPath("$.code").value("NOT_FOUND")); + mockMvc.perform(get("/v1/optimizer/stats/" + uuid)).andExpect(status().isNotFound()); } @Test - void updateOperation_happyPath_stillReturns201() throws Exception { + void updateOperation_happyPath_returns201() throws Exception { String id = UUID.randomUUID().toString(); operationsRepository.save( TableOperationsRow.builder() From bbef386ae56acf32ae9d8d31be1a7b50a2720c1c Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 22 May 2026 12:16:57 -0700 Subject: [PATCH 79/81] refactor(optimizer): revert UpdateOperationRequest doc edits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per review — no change needed on this file. The path/body validation lives in the controller; the DTO carries the same fields as before with the existing javadoc. Co-Authored-By: Claude Opus 4.7 --- .../optimizer/api/spec/UpdateOperationRequest.java | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java index fcae718ad..a216e9db3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java @@ -27,16 +27,10 @@ @AllArgsConstructor public class UpdateOperationRequest { - /** - * Operation row's UUID. Must match the {@code {id}} path variable on {@code POST - * /v1/optimizer/operations/{id}/update} — the controller rejects mismatched requests with 400. - * Carrying it in the body keeps the payload self-describing for trace/log consumers that may not - * see the URL. Validated server-side (no bean-validation annotation) so that future relaxation - * does not break clients on the wire contract. - */ + /** Operation row's UUID — the primary lookup key. */ private String operationId; - /** Terminal outcome for this single operation. Validated server-side. */ + /** Terminal outcome for this single operation. */ private HistoryStatus status; /** Debug echo: stable table identity the caller believed it was completing. */ From 02bbc5c5af7f1d984fd7360758fc058873818bb7 Mon Sep 17 00:00:00 2001 From: mkuchenbecker <34144575+mkuchenbecker@users.noreply.github.com> Date: Fri, 22 May 2026 12:18:47 -0700 Subject: [PATCH 80/81] (wip) feat(optimizer): basic error-code handling across controllers (#596) Error handling. --- .../controller/TableOperationsController.java | 52 +++++--- .../TableOperationsHistoryController.java | 7 +- .../api/controller/TableStatsController.java | 23 ++-- .../service/OptimizerDataService.java | 16 ++- .../service/OptimizerDataServiceImpl.java | 16 +-- .../src/main/resources/application.properties | 7 +- .../ControllerErrorHandlingTest.java | 124 ++++++++++++++++++ .../service/OptimizerDataServiceImplTest.java | 3 +- 8 files changed, 206 insertions(+), 42 deletions(-) create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index c28002bf7..2f6f62e4b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -7,11 +7,13 @@ import com.linkedin.openhouse.optimizer.api.spec.UpdateOperationRequest; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; +import org.springframework.util.StringUtils; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PostMapping; @@ -19,6 +21,7 @@ import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.server.ResponseStatusException; /** REST controller for {@code table_operations}. */ @RestController @@ -29,23 +32,37 @@ public class TableOperationsController { private final OptimizerDataService service; /** - * Report an update to an operation. The body carries the {@code operationId} the caller is - * updating along with its terminal status. The backend looks up the operation row, writes a - * history entry with the operation's table metadata, and returns 201 Created with the history - * row, or 404 if the operation does not exist. + * Report an update to an operation. {@code id} comes from the URL; the body's {@code operationId} + * must match (the controller rejects mismatched requests with 400). The backend looks up the + * operation row, writes a history entry with the operation's table metadata, and returns 201 + * Created with the history row, or 404 if the operation does not exist. */ - @PostMapping("/update") + @PostMapping("/{id}/update") public ResponseEntity updateOperation( - @RequestBody UpdateOperationRequest request) { + @PathVariable String id, @RequestBody UpdateOperationRequest request) { + if (!StringUtils.hasText(request.getOperationId())) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "operationId is required"); + } + if (!Objects.equals(id, request.getOperationId())) { + throw new ResponseStatusException( + HttpStatus.BAD_REQUEST, + String.format( + "operationId in body (%s) does not match path id (%s)", + request.getOperationId(), id)); + } + if (request.getStatus() == null) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "status is required"); + } return service - .updateOperation( - request.getOperationId(), - request.getStatus() == null ? null : request.getStatus().toModel()) + .updateOperation(id, request.getStatus().toModel()) .map( history -> ResponseEntity.status(HttpStatus.CREATED) .body(TableOperationsHistory.fromModel(history))) - .orElse(ResponseEntity.notFound().build()); + .orElseThrow( + () -> + new ResponseStatusException( + HttpStatus.NOT_FOUND, String.format("no operation with id %s", id))); } /** Fetch a single operation row by its ID, regardless of status. Returns 404 if not found. */ @@ -55,12 +72,15 @@ public ResponseEntity getTableOperation(@PathVariable String id .getTableOperation(id) .map(TableOperations::fromModel) .map(ResponseEntity::ok) - .orElse(ResponseEntity.notFound().build()); + .orElseThrow( + () -> + new ResponseStatusException( + HttpStatus.NOT_FOUND, String.format("no operation with id %s", id))); } /** - * List operations matching the given filters. All parameters are optional — omit all to return - * every row. + * List operations matching the given filters, capped at {@code limit} rows. Every filter is + * optional; {@code limit} is required so callers always state how much they want back. */ @GetMapping public ResponseEntity> listTableOperations( @@ -68,7 +88,8 @@ public ResponseEntity> listTableOperations( @RequestParam(required = false) OperationStatus status, @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, - @RequestParam(required = false) String tableUuid) { + @RequestParam(required = false) String tableUuid, + @RequestParam int limit) { List result = service .listTableOperations( @@ -76,7 +97,8 @@ public ResponseEntity> listTableOperations( Optional.ofNullable(status).map(OperationStatus::toModel), Optional.ofNullable(databaseName), Optional.ofNullable(tableName), - Optional.ofNullable(tableUuid)) + Optional.ofNullable(tableUuid), + limit) .stream() .map(TableOperations::fromModel) .collect(Collectors.toList()); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 36c422623..9a1b6d303 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -31,10 +31,13 @@ public ResponseEntity appendHistory( .body(TableOperationsHistory.fromModel(service.appendHistory(dto.toModel()))); } - /** Return the most recent history for a table, newest first, up to {@code limit} rows. */ + /** + * Return the most recent history for a table, newest first, capped at {@code limit} rows. {@code + * limit} is required. + */ @GetMapping("/{tableUuid}") public ResponseEntity> getHistory( - @PathVariable String tableUuid, @RequestParam(defaultValue = "100") int limit) { + @PathVariable String tableUuid, @RequestParam int limit) { List result = service.getHistory(tableUuid, limit).stream() .map(TableOperationsHistory::fromModel) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 7cb745250..ca8db4d51 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -9,6 +9,7 @@ import java.util.Optional; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; +import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; @@ -17,6 +18,7 @@ import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.server.ResponseStatusException; /** REST controller for managing per-table stats in the optimizer DB. */ @RestController @@ -44,24 +46,29 @@ public ResponseEntity getTableStats(@PathVariable String tableUuid) .getTableStats(tableUuid) .map(TableStats::fromModel) .map(ResponseEntity::ok) - .orElse(ResponseEntity.notFound().build()); + .orElseThrow( + () -> + new ResponseStatusException( + HttpStatus.NOT_FOUND, String.format("no stats for tableUuid %s", tableUuid))); } /** - * List stats rows matching the given filters. All parameters are optional — omit all to return - * every row. + * List stats rows matching the given filters, capped at {@code limit} rows. Every filter is + * optional; {@code limit} is required so callers always state how much they want back. */ @GetMapping public ResponseEntity> listTableStats( @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, - @RequestParam(required = false) String tableUuid) { + @RequestParam(required = false) String tableUuid, + @RequestParam int limit) { List result = service .listTableStats( Optional.ofNullable(databaseName), Optional.ofNullable(tableName), - Optional.ofNullable(tableUuid)) + Optional.ofNullable(tableUuid), + limit) .stream() .map(TableStats::fromModel) .collect(Collectors.toList()); @@ -69,14 +76,14 @@ public ResponseEntity> listTableStats( } /** - * Return per-commit stats history for {@code tableUuid}, newest first. Optionally filter by - * {@code since} (inclusive) and cap at {@code limit} rows. + * Return per-commit stats history for {@code tableUuid}, newest first, capped at {@code limit} + * rows. Optional {@code since} filter (inclusive). {@code limit} is required. */ @GetMapping("/{tableUuid}/history") public ResponseEntity> getStatsHistory( @PathVariable String tableUuid, @RequestParam(required = false) Instant since, - @RequestParam(defaultValue = "100") int limit) { + @RequestParam int limit) { List result = service.getStatsHistory(tableUuid, Optional.ofNullable(since), limit).stream() .map(TableStatsHistory::fromModel) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index 0529d3608..c20ae7bf2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -23,15 +23,16 @@ public interface OptimizerDataService { // --- TableOperations --- /** - * List operations matching the given filters. Every parameter is optional — pass {@link - * Optional#empty()} to skip that filter. No filters returns all rows. + * List operations matching the given filters, capped at {@code limit} rows. Every filter + * parameter is optional — pass {@link Optional#empty()} to skip that filter. */ List listTableOperations( Optional operationType, Optional status, Optional databaseName, Optional tableName, - Optional tableUuid); + Optional tableUuid, + int limit); /** * Update an operation by writing a history entry. Looks up the operation row by {@code @@ -60,11 +61,14 @@ List listTableOperations( Optional getTableStats(String tableUuid); /** - * List stats rows matching the given filters. Every parameter is optional — pass {@link - * Optional#empty()} to skip that filter. No filters returns all rows. + * List stats rows matching the given filters, capped at {@code limit} rows. Every filter + * parameter is optional — pass {@link Optional#empty()} to skip that filter. */ List listTableStats( - Optional databaseName, Optional tableName, Optional tableUuid); + Optional databaseName, + Optional tableName, + Optional tableUuid, + int limit); /** * Return per-commit stats history for {@code tableUuid}, newest first. diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 4f820e1b8..29fd0eeee 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -19,7 +19,6 @@ import java.util.UUID; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; -import org.springframework.beans.factory.annotation.Value; import org.springframework.data.domain.PageRequest; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -40,9 +39,6 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { private final TableStatsRepository statsRepository; private final TableStatsHistoryRepository statsHistoryRepository; - @Value("${optimizer.repo.default-limit:10000}") - private int defaultLimit; - // --- TableOperations --- @Override @@ -51,7 +47,8 @@ public List listTableOperations( Optional status, Optional databaseName, Optional tableName, - Optional tableUuid) { + Optional tableUuid, + int limit) { return operationsRepository .find( operationType.map(OperationTypeDto::toDb), @@ -61,7 +58,7 @@ public List listTableOperations( tableName, Optional.empty(), Optional.empty(), - PageRequest.of(0, defaultLimit)) + PageRequest.of(0, limit)) .stream() .map(TableOperationDto::fromRow) .collect(Collectors.toList()); @@ -137,8 +134,11 @@ public Optional getTableStats(String tableUuid) { @Override public List listTableStats( - Optional databaseName, Optional tableName, Optional tableUuid) { - return statsRepository.find(databaseName, tableName, tableUuid, PageRequest.of(0, defaultLimit)) + Optional databaseName, + Optional tableName, + Optional tableUuid, + int limit) { + return statsRepository.find(databaseName, tableName, tableUuid, PageRequest.of(0, limit)) .stream() .map(TableStatsDto::fromRow) .collect(Collectors.toList()); diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index 1b7eb1a40..e7f082b47 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -16,7 +16,10 @@ spring.datasource.username=${OPTIMIZER_DB_USER:oh_user} spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} spring.datasource.hikari.maximum-pool-size=20 -optimizer.repo.default-limit=10000 - management.endpoints.web.exposure.include=health,prometheus management.endpoint.health.enabled=true + +# Include ResponseStatusException.reason in the default error response body. Without this, Spring +# Boot 2.7 omits the `message` field, and the human-readable detail from a thrown +# ResponseStatusException never reaches the caller. +server.error.include-message=always diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java new file mode 100644 index 000000000..b9c8dc3dc --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java @@ -0,0 +1,124 @@ +package com.linkedin.openhouse.optimizer.api.controller; + +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; + +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import java.time.Instant; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.http.MediaType; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.transaction.annotation.Transactional; + +/** + * Exercises what the controllers own: server-side validation on {@code updateOperation} (path/body + * mismatch, missing fields) and 404s on missing rows. Assertions are status-code-only: MockMvc does + * not trigger Spring's error-dispatch to {@code BasicErrorController}, so the response body of a + * {@link org.springframework.web.server.ResponseStatusException} is empty in tests even though it + * is populated in production (with {@code server.error.include-message=always}). Framework-level + * 4xx (missing query param, malformed JSON, etc.) is left to Spring's defaults and not asserted. + */ +@SpringBootTest +@AutoConfigureMockMvc +@ActiveProfiles("test") +@Transactional +class ControllerErrorHandlingTest { + + @Autowired MockMvc mockMvc; + @Autowired TableOperationsRepository operationsRepository; + + @Test + void updateOperation_notFound_returns404() throws Exception { + String id = UUID.randomUUID().toString(); + String body = String.format("{\"operationId\":\"%s\",\"status\":\"SUCCESS\"}", id); + mockMvc + .perform( + post("/v1/optimizer/operations/" + id + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isNotFound()); + } + + @Test + void updateOperation_pathBodyMismatch_returns400() throws Exception { + String pathId = UUID.randomUUID().toString(); + String bodyId = UUID.randomUUID().toString(); + String body = String.format("{\"operationId\":\"%s\",\"status\":\"SUCCESS\"}", bodyId); + mockMvc + .perform( + post("/v1/optimizer/operations/" + pathId + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isBadRequest()); + } + + @Test + void updateOperation_missingOperationId_returns400() throws Exception { + String pathId = UUID.randomUUID().toString(); + String body = "{\"status\":\"SUCCESS\"}"; + mockMvc + .perform( + post("/v1/optimizer/operations/" + pathId + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isBadRequest()); + } + + @Test + void updateOperation_missingStatus_returns400() throws Exception { + String id = UUID.randomUUID().toString(); + String body = String.format("{\"operationId\":\"%s\"}", id); + mockMvc + .perform( + post("/v1/optimizer/operations/" + id + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isBadRequest()); + } + + @Test + void getTableOperation_notFound_returns404() throws Exception { + String id = UUID.randomUUID().toString(); + mockMvc.perform(get("/v1/optimizer/operations/" + id)).andExpect(status().isNotFound()); + } + + @Test + void getTableStats_notFound_returns404() throws Exception { + String uuid = UUID.randomUUID().toString(); + mockMvc.perform(get("/v1/optimizer/stats/" + uuid)).andExpect(status().isNotFound()); + } + + @Test + void updateOperation_happyPath_returns201() throws Exception { + String id = UUID.randomUUID().toString(); + operationsRepository.save( + TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .scheduledAt(Instant.now()) + .jobId("job-x") + .build()); + String body = String.format("{\"operationId\":\"%s\",\"status\":\"SUCCESS\"}", id); + mockMvc + .perform( + post("/v1/optimizer/operations/" + id + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isCreated()) + .andExpect(jsonPath("$.status").value("SUCCESS")); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 8db14c4d6..2a3c1e676 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -165,7 +165,8 @@ void listTableOperations_filtersByOperationTypeAndStatus() { Optional.of(OperationStatusDto.PENDING), Optional.empty(), Optional.empty(), - Optional.empty())) + Optional.empty(), + 100)) .extracting(op -> op.getId()) .containsExactly(pendingId); } From 6ef7964017101ce7ee028ca3c93e66f023856d76 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 22 May 2026 13:55:14 -0700 Subject: [PATCH 81/81] docs(optimizer): add @ApiResponses to controllers for OpenAPI spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review on PR #531 — each endpoint lists the HTTP response codes it actually returns, in the same "Resource ACTION: STATUS" style used by services/tables. Codes per endpoint: - POST /operations/{id}/update — 201, 400, 404 - GET /operations/{id} — 200, 404 - GET /operations — 200, 400 - POST /operations-history — 201 - GET /operations-history/{u} — 200, 400 - PUT /stats/{u} — 200 - GET /stats/{u} — 200, 404 - GET /stats — 200, 400 - GET /stats/{u}/history — 200, 400 Annotations only — no runtime behavior change, no new tests required. swagger-annotations 2.1.11 is already on the optimizer classpath via openhouse.springboot-conventions. Co-Authored-By: Claude Opus 4.7 --- .../controller/TableOperationsController.java | 18 ++++++++++++++++++ .../TableOperationsHistoryController.java | 11 +++++++++++ .../api/controller/TableStatsController.java | 18 ++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index 2f6f62e4b..2ee40802f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -6,6 +6,8 @@ import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistory; import com.linkedin.openhouse.optimizer.api.spec.UpdateOperationRequest; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; +import io.swagger.v3.oas.annotations.responses.ApiResponse; +import io.swagger.v3.oas.annotations.responses.ApiResponses; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -37,6 +39,12 @@ public class TableOperationsController { * operation row, writes a history entry with the operation's table metadata, and returns 201 * Created with the history row, or 404 if the operation does not exist. */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "201", description = "Operation UPDATE: CREATED"), + @ApiResponse(responseCode = "400", description = "Operation UPDATE: BAD_REQUEST"), + @ApiResponse(responseCode = "404", description = "Operation UPDATE: NOT_FOUND") + }) @PostMapping("/{id}/update") public ResponseEntity updateOperation( @PathVariable String id, @RequestBody UpdateOperationRequest request) { @@ -66,6 +74,11 @@ public ResponseEntity updateOperation( } /** Fetch a single operation row by its ID, regardless of status. Returns 404 if not found. */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "200", description = "Operation GET: OK"), + @ApiResponse(responseCode = "404", description = "Operation GET: NOT_FOUND") + }) @GetMapping("/{id}") public ResponseEntity getTableOperation(@PathVariable String id) { return service @@ -82,6 +95,11 @@ public ResponseEntity getTableOperation(@PathVariable String id * List operations matching the given filters, capped at {@code limit} rows. Every filter is * optional; {@code limit} is required so callers always state how much they want back. */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "200", description = "Operation SEARCH: OK"), + @ApiResponse(responseCode = "400", description = "Operation SEARCH: BAD_REQUEST") + }) @GetMapping public ResponseEntity> listTableOperations( @RequestParam(required = false) OperationType operationType, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 9a1b6d303..873d51d2e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -2,6 +2,8 @@ import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistory; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; +import io.swagger.v3.oas.annotations.responses.ApiResponse; +import io.swagger.v3.oas.annotations.responses.ApiResponses; import java.util.List; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; @@ -24,6 +26,10 @@ public class TableOperationsHistoryController { private final OptimizerDataService service; /** Append a completed-job result. Called by the SparkJob after each run (success or failure). */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "201", description = "OperationsHistory CREATE: CREATED") + }) @PostMapping public ResponseEntity appendHistory( @RequestBody TableOperationsHistory dto) { @@ -35,6 +41,11 @@ public ResponseEntity appendHistory( * Return the most recent history for a table, newest first, capped at {@code limit} rows. {@code * limit} is required. */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "200", description = "OperationsHistory GET: OK"), + @ApiResponse(responseCode = "400", description = "OperationsHistory GET: BAD_REQUEST") + }) @GetMapping("/{tableUuid}") public ResponseEntity> getHistory( @PathVariable String tableUuid, @RequestParam int limit) { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index ca8db4d51..b119dd1c7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -4,6 +4,8 @@ import com.linkedin.openhouse.optimizer.api.spec.TableStatsHistory; import com.linkedin.openhouse.optimizer.api.spec.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; +import io.swagger.v3.oas.annotations.responses.ApiResponse; +import io.swagger.v3.oas.annotations.responses.ApiResponses; import java.time.Instant; import java.util.List; import java.util.Optional; @@ -32,6 +34,7 @@ public class TableStatsController { * Create or overwrite the stats row for {@code tableUuid}. Called by the Tables Service on every * Iceberg commit. Idempotent. */ + @ApiResponses(value = {@ApiResponse(responseCode = "200", description = "Stats PUT: OK")}) @PutMapping("/{tableUuid}") public ResponseEntity upsertTableStats( @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { @@ -40,6 +43,11 @@ public ResponseEntity upsertTableStats( } /** Fetch the stats row for {@code tableUuid}. Returns 404 if no stats have been written yet. */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "200", description = "Stats GET: OK"), + @ApiResponse(responseCode = "404", description = "Stats GET: NOT_FOUND") + }) @GetMapping("/{tableUuid}") public ResponseEntity getTableStats(@PathVariable String tableUuid) { return service @@ -56,6 +64,11 @@ public ResponseEntity getTableStats(@PathVariable String tableUuid) * List stats rows matching the given filters, capped at {@code limit} rows. Every filter is * optional; {@code limit} is required so callers always state how much they want back. */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "200", description = "Stats SEARCH: OK"), + @ApiResponse(responseCode = "400", description = "Stats SEARCH: BAD_REQUEST") + }) @GetMapping public ResponseEntity> listTableStats( @RequestParam(required = false) String databaseName, @@ -79,6 +92,11 @@ public ResponseEntity> listTableStats( * Return per-commit stats history for {@code tableUuid}, newest first, capped at {@code limit} * rows. Optional {@code since} filter (inclusive). {@code limit} is required. */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "200", description = "StatsHistory GET: OK"), + @ApiResponse(responseCode = "400", description = "StatsHistory GET: BAD_REQUEST") + }) @GetMapping("/{tableUuid}/history") public ResponseEntity> getStatsHistory( @PathVariable String tableUuid,