From 44b8af9366eeb63bcfc0e38c8929421bcd8ccb95 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Tue, 5 May 2026 15:30:11 -0700 Subject: [PATCH 1/3] test(amber): add unit test coverage for LogicalLink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pin both construction paths and the production Jackson behavior: - Primary case-class constructor: four fields exposed as constructed. - Secondary @JsonCreator constructor: wraps raw String op ids in OperatorIdentity (the Jackson deserialization path used for user-saved workflow JSON). - Case-class equals / hashCode across the four fields, including field-discriminating tests (fromOpId, toPortId.internal) and acceptance of self-loop links (same fromOpId / toOpId, distinct ports — higher layers reject cycles, the data type does not). - @JsonCreator path accepts dashes / dots / digits and the empty string without normalization (no validation in the data type). - Production-mapper Jackson coverage via JSONUtils.objectMapper: - treeToValue with raw String op-id JSON dispatches to the @JsonCreator string overload and produces the expected fields. - @JsonProperty pins the on-the-wire key names (`fromOpId` / `toOpId` / `fromPortId` / `toPortId`). - writeValueAsString → readValue is NOT round-trippable: the string overload can't accept the object-shape OperatorIdentity that writeValueAsString emits. Asymmetry pinned so a future fix (object @JsonCreator overload or custom @JsonDeserialize) flips this on purpose. - Missing string op-id fields silently default to OperatorIdentity(null) — no validation in the @JsonCreator path; future hardening should fail this on purpose. Closes #4955 --- .../texera/workflow/LogicalLinkSpec.scala | 221 ++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala diff --git a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala new file mode 100644 index 00000000000..aa8576f1c98 --- /dev/null +++ b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.workflow + +import com.fasterxml.jackson.databind.exc.MismatchedInputException +import com.fasterxml.jackson.databind.node.ObjectNode +import org.apache.texera.amber.core.virtualidentity.OperatorIdentity +import org.apache.texera.amber.core.workflow.PortIdentity +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec + +class LogicalLinkSpec extends AnyFlatSpec { + + // --------------------------------------------------------------------------- + // Primary constructor + case-class semantics + // --------------------------------------------------------------------------- + + "LogicalLink primary constructor" should "expose the four fields it was constructed with" in { + val link = LogicalLink( + fromOpId = OperatorIdentity("op-A"), + fromPortId = PortIdentity(0), + toOpId = OperatorIdentity("op-B"), + toPortId = PortIdentity(1, internal = true) + ) + assert(link.fromOpId == OperatorIdentity("op-A")) + assert(link.fromPortId == PortIdentity(0)) + assert(link.toOpId == OperatorIdentity("op-B")) + assert(link.toPortId == PortIdentity(1, internal = true)) + } + + "LogicalLink case-class equality" should "use structural equality across all four fields" in { + val a = + LogicalLink(OperatorIdentity("x"), PortIdentity(0), OperatorIdentity("y"), PortIdentity(1)) + val b = + LogicalLink(OperatorIdentity("x"), PortIdentity(0), OperatorIdentity("y"), PortIdentity(1)) + assert(a == b) + assert(a.hashCode == b.hashCode) + } + + it should "distinguish links that differ only in fromOpId" in { + val a = + LogicalLink(OperatorIdentity("x"), PortIdentity(0), OperatorIdentity("y"), PortIdentity(1)) + val b = + LogicalLink(OperatorIdentity("z"), PortIdentity(0), OperatorIdentity("y"), PortIdentity(1)) + assert(a != b) + } + + it should "distinguish links that differ only in toPortId.internal" in { + val a = LogicalLink( + OperatorIdentity("x"), + PortIdentity(0), + OperatorIdentity("y"), + PortIdentity(1, internal = false) + ) + val b = LogicalLink( + OperatorIdentity("x"), + PortIdentity(0), + OperatorIdentity("y"), + PortIdentity(1, internal = true) + ) + assert(a != b) + } + + it should "consider a self-loop link well-formed (same fromOpId / toOpId, distinct ports)" in { + // Self-loops aren't structurally invalid at the LogicalLink level — + // higher layers reject cycles, but the data type allows fromOpId == + // toOpId. Pin so a future == check on construction breaks this on + // purpose. + val selfLoop = LogicalLink( + OperatorIdentity("op-A"), + PortIdentity(0), + OperatorIdentity("op-A"), + PortIdentity(1) + ) + assert(selfLoop.fromOpId == selfLoop.toOpId) + } + + // --------------------------------------------------------------------------- + // Secondary @JsonCreator constructor (string opId variant) + // --------------------------------------------------------------------------- + + "LogicalLink secondary @JsonCreator constructor" should "wrap raw String op ids in OperatorIdentity" in { + val link = new LogicalLink( + fromOpId = "op-A", + fromPortId = PortIdentity(0), + toOpId = "op-B", + toPortId = PortIdentity(1) + ) + assert(link.fromOpId == OperatorIdentity("op-A")) + assert(link.toOpId == OperatorIdentity("op-B")) + // Equal to a link built via the primary constructor. + assert( + link == LogicalLink( + OperatorIdentity("op-A"), + PortIdentity(0), + OperatorIdentity("op-B"), + PortIdentity(1) + ) + ) + } + + it should "accept identifiers containing dashes / dots / digits (no normalization)" in { + val link = new LogicalLink("my.op-1", PortIdentity(0), "my.op-2", PortIdentity(1)) + assert(link.fromOpId == OperatorIdentity("my.op-1")) + assert(link.toOpId == OperatorIdentity("my.op-2")) + } + + it should "accept the empty string as an op id (no validation in the data type)" in { + // Pin: the secondary constructor does not validate; an empty string + // wraps into `OperatorIdentity("")`. A future change adding non-empty + // validation should fail this test on purpose. + val link = new LogicalLink("", PortIdentity(0), "", PortIdentity(1)) + assert(link.fromOpId == OperatorIdentity("")) + assert(link.toOpId == OperatorIdentity("")) + } + + // --------------------------------------------------------------------------- + // Jackson round-trip (production objectMapper) + // --------------------------------------------------------------------------- + // + // These tests use the same `JSONUtils.objectMapper` that production uses + // to read user-saved workflow JSON, so a regression in the Jackson + // wiring (annotations, default-Scala-module config) surfaces here. + + "LogicalLink Jackson deserialization" should + "deserialize fromOpId / toOpId from raw String values via the secondary @JsonCreator constructor" in { + // Build the JSON by hand to mimic a user-saved workflow file where + // `fromOpId` and `toOpId` are written as plain strings (the only shape + // production actually receives, since the frontend emits them as + // strings). Jackson dispatches to the @JsonCreator string-overload + // constructor. + val node = objectMapper.createObjectNode() + node.put("fromOpId", "op-A") + node.set[ObjectNode]("fromPortId", objectMapper.valueToTree(PortIdentity(0))) + node.put("toOpId", "op-B") + node.set[ObjectNode]("toPortId", objectMapper.valueToTree(PortIdentity(1))) + val link = objectMapper.treeToValue(node, classOf[LogicalLink]) + assert(link.fromOpId == OperatorIdentity("op-A")) + assert(link.toOpId == OperatorIdentity("op-B")) + assert(link.fromPortId == PortIdentity(0)) + assert(link.toPortId == PortIdentity(1)) + } + + it should "use the documented `fromOpId` / `toOpId` JSON field names on serialization" in { + // The `@JsonProperty` annotations pin the on-the-wire key names, which + // saved workflow files depend on. A renamed Scala field would + // silently break a project's existing JSON if these annotations were + // removed. + val link = LogicalLink( + OperatorIdentity("op-A"), + PortIdentity(0), + OperatorIdentity("op-B"), + PortIdentity(1) + ) + val tree = objectMapper.valueToTree[com.fasterxml.jackson.databind.JsonNode](link) + assert(tree.has("fromOpId")) + assert(tree.has("toOpId")) + assert(tree.has("fromPortId")) + assert(tree.has("toPortId")) + } + + it should "NOT round-trip through writeValueAsString (the @JsonCreator string overload is incompatible with the object-shape OperatorIdentity that writeValueAsString emits)" in { + // This is a real asymmetry worth pinning: production reads user-saved + // workflow JSON where `fromOpId`/`toOpId` are plain strings, but + // `objectMapper.writeValueAsString` writes OperatorIdentity as + // `{"id":"op-A"}` (the case-class object form). Re-reading that + // emitted JSON fails because Jackson dispatches on the @JsonCreator + // string-overload, which can't accept an object for fromOpId. + // A future fix that adds a third @JsonCreator (object overload) or + // a custom @JsonDeserialize on `fromOpId`/`toOpId` will turn this + // into a passing round-trip and break this assertion on purpose. + val original = LogicalLink( + OperatorIdentity("op-A"), + PortIdentity(0), + OperatorIdentity("op-B"), + PortIdentity(1) + ) + val json = objectMapper.writeValueAsString(original) + // Confirm writeValueAsString emits the object form. + assert(json.contains("\"fromOpId\":{\"id\":\"op-A\"}")) + // Re-reading fails because the @JsonCreator string overload can't + // accept an object for fromOpId. + intercept[MismatchedInputException] { + objectMapper.readValue(json, classOf[LogicalLink]) + } + } + + it should "silently default missing string op-id fields to OperatorIdentity(null) (no validation in the @JsonCreator path)" in { + // Pin: omitting `fromOpId` / `toOpId` does not throw — Jackson invokes + // the @JsonCreator with `null` for the missing String args, and + // `OperatorIdentity(null)` is a perfectly constructable case-class + // instance. A future change adding a non-null check at construction + // time should fail this on purpose. + val empty = objectMapper.createObjectNode() + val link = objectMapper.treeToValue(empty, classOf[LogicalLink]) + assert(link.fromOpId == OperatorIdentity(null.asInstanceOf[String])) + assert(link.toOpId == OperatorIdentity(null.asInstanceOf[String])) + // PortIdentity fields default to null too (the @JsonCreator does not + // supply scalapb defaults for missing object fields — Jackson passes + // null for the typed PortIdentity arg). + assert(link.fromPortId == null) + assert(link.toPortId == null) + } +} From 24b7e37c95607299fbe67251e4b14a1a034f5914 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Tue, 5 May 2026 15:44:19 -0700 Subject: [PATCH 2/3] test(amber): tighten LogicalLinkSpec Jackson assertions per Copilot review Address Copilot feedback on #4956: - Replace the brittle `json.contains(...)` raw-string check on the writeValueAsString output with `objectMapper.readTree(json)` parse + structural assertions (`tree.path("fromOpId").isObject` / `path("id").asText() == "op-A"`). No longer depends on exact key ordering or escaping. - Drop the `node.set[ObjectNode](..., objectMapper.valueToTree(...))` pattern. The `[ObjectNode]` type parameter forces a runtime cast that assumes PortIdentity always serializes to an ObjectNode, which could throw ClassCastException if PortIdentity's serialization changes. Use `node.set("fromPortId", objectMapper.valueToTree[ JsonNode](...))` instead. - Replace `OperatorIdentity(null.asInstanceOf[String])` with the cleaner `link.fromOpId.id == null` assertion. Same intent (verify the op-id field is null after deserialization with missing JSON field), no `null.asInstanceOf[String]` noise. --- .../texera/workflow/LogicalLinkSpec.scala | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala index aa8576f1c98..c860456e6e2 100644 --- a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala +++ b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala @@ -19,8 +19,8 @@ package org.apache.texera.workflow +import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.exc.MismatchedInputException -import com.fasterxml.jackson.databind.node.ObjectNode import org.apache.texera.amber.core.virtualidentity.OperatorIdentity import org.apache.texera.amber.core.workflow.PortIdentity import org.apache.texera.amber.util.JSONUtils.objectMapper @@ -148,9 +148,9 @@ class LogicalLinkSpec extends AnyFlatSpec { // constructor. val node = objectMapper.createObjectNode() node.put("fromOpId", "op-A") - node.set[ObjectNode]("fromPortId", objectMapper.valueToTree(PortIdentity(0))) + node.set("fromPortId", objectMapper.valueToTree[JsonNode](PortIdentity(0))) node.put("toOpId", "op-B") - node.set[ObjectNode]("toPortId", objectMapper.valueToTree(PortIdentity(1))) + node.set("toPortId", objectMapper.valueToTree[JsonNode](PortIdentity(1))) val link = objectMapper.treeToValue(node, classOf[LogicalLink]) assert(link.fromOpId == OperatorIdentity("op-A")) assert(link.toOpId == OperatorIdentity("op-B")) @@ -169,7 +169,7 @@ class LogicalLinkSpec extends AnyFlatSpec { OperatorIdentity("op-B"), PortIdentity(1) ) - val tree = objectMapper.valueToTree[com.fasterxml.jackson.databind.JsonNode](link) + val tree = objectMapper.valueToTree[JsonNode](link) assert(tree.has("fromOpId")) assert(tree.has("toOpId")) assert(tree.has("fromPortId")) @@ -193,10 +193,14 @@ class LogicalLinkSpec extends AnyFlatSpec { PortIdentity(1) ) val json = objectMapper.writeValueAsString(original) - // Confirm writeValueAsString emits the object form. - assert(json.contains("\"fromOpId\":{\"id\":\"op-A\"}")) - // Re-reading fails because the @JsonCreator string overload can't - // accept an object for fromOpId. + // Parse the emitted JSON and confirm the structural shape — fromOpId + // is an object with an `id` field of "op-A". Avoids depending on + // exact key ordering or escaping. + val tree = objectMapper.readTree(json) + assert(tree.path("fromOpId").isObject, s"expected fromOpId to be an object: $json") + assert(tree.path("fromOpId").path("id").asText() == "op-A") + // Re-reading the just-emitted JSON fails because the @JsonCreator + // String overload can't accept the object-shape fromOpId. intercept[MismatchedInputException] { objectMapper.readValue(json, classOf[LogicalLink]) } @@ -210,8 +214,8 @@ class LogicalLinkSpec extends AnyFlatSpec { // time should fail this on purpose. val empty = objectMapper.createObjectNode() val link = objectMapper.treeToValue(empty, classOf[LogicalLink]) - assert(link.fromOpId == OperatorIdentity(null.asInstanceOf[String])) - assert(link.toOpId == OperatorIdentity(null.asInstanceOf[String])) + assert(link.fromOpId.id == null) + assert(link.toOpId.id == null) // PortIdentity fields default to null too (the @JsonCreator does not // supply scalapb defaults for missing object fields — Jackson passes // null for the typed PortIdentity arg). From e1f706fab23e977ac3d3b3200c9b528fe390545b Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Tue, 5 May 2026 16:18:15 -0700 Subject: [PATCH 3/3] test(amber): tighten @JsonProperty pin + separate parameter-name pin Address Copilot follow-up on #4956: - The @JsonProperty pin previously claimed "all four fields" but only fromOpId/toOpId actually carry the annotation. Split into two tests: one pinning the @JsonProperty-annotated op-id keys, and a separate one explicitly pinning that fromPortId/toPortId derive from Scala parameter names (no annotation). The second test makes the parameter-rename failure mode visible: a parameter rename without an accompanying @JsonProperty annotation would silently break saved workflows. The "writeValueAsString does NOT round-trip" thread is reconciled by updating the linked issue (#4955) body to clarify that the spec characterizes the current asymmetry rather than asserting a passing round-trip; the follow-up fix that adds a custom JsonDeserialize for fromOpId/toOpId would flip the characterization-pin into a real round-trip. --- .../texera/workflow/LogicalLinkSpec.scala | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala index c860456e6e2..b81f1a89fad 100644 --- a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala +++ b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala @@ -158,11 +158,11 @@ class LogicalLinkSpec extends AnyFlatSpec { assert(link.toPortId == PortIdentity(1)) } - it should "use the documented `fromOpId` / `toOpId` JSON field names on serialization" in { - // The `@JsonProperty` annotations pin the on-the-wire key names, which - // saved workflow files depend on. A renamed Scala field would - // silently break a project's existing JSON if these annotations were - // removed. + it should "emit `fromOpId` / `toOpId` JSON keys pinned by @JsonProperty annotations" in { + // Only `fromOpId` / `toOpId` carry `@JsonProperty` in `LogicalLink`; + // a Scala-side rename of either parameter would still keep the + // JSON key stable, which is the saved-workflow contract these + // annotations pin. val link = LogicalLink( OperatorIdentity("op-A"), PortIdentity(0), @@ -172,6 +172,21 @@ class LogicalLinkSpec extends AnyFlatSpec { val tree = objectMapper.valueToTree[JsonNode](link) assert(tree.has("fromOpId")) assert(tree.has("toOpId")) + } + + it should "emit `fromPortId` / `toPortId` JSON keys derived from Scala parameter names (no @JsonProperty)" in { + // Pin: the port-id JSON keys come from Scala parameter names since + // there is no `@JsonProperty` annotation on those fields. A + // parameter rename WOULD silently break saved-workflow compatibility + // for these keys — pin so a future rename without an accompanying + // `@JsonProperty` annotation breaks this on purpose. + val link = LogicalLink( + OperatorIdentity("op-A"), + PortIdentity(0), + OperatorIdentity("op-B"), + PortIdentity(1) + ) + val tree = objectMapper.valueToTree[JsonNode](link) assert(tree.has("fromPortId")) assert(tree.has("toPortId")) }