diff --git a/maestro-orchestra/src/main/java/maestro/orchestra/Orchestra.kt b/maestro-orchestra/src/main/java/maestro/orchestra/Orchestra.kt index ed17181726..a3c966cae2 100644 --- a/maestro-orchestra/src/main/java/maestro/orchestra/Orchestra.kt +++ b/maestro-orchestra/src/main/java/maestro/orchestra/Orchestra.kt @@ -26,8 +26,6 @@ import kotlinx.coroutines.yield import maestro.Driver import maestro.ElementFilter import maestro.Filters -import com.github.romankh3.image.comparison.ImageComparison -import com.github.romankh3.image.comparison.model.ImageComparisonState import io.grpc.Status import maestro.* import maestro.Filters.asFilter @@ -544,7 +542,6 @@ class Orchestra( private suspend fun assertScreenshotCommand(command: AssertScreenshotCommand): Boolean { val path = normalizeScreenshotPath(command.path) - val thresholdDifferencePercentage = (100 - command.thresholdPercentage) val candidates = buildList { command.flowPath?.let { add(it.resolve(path).toFile()) } @@ -600,35 +597,18 @@ class Orchestra( val diffFileName = "${baseName}_diff.png" val diffFile = expectedFile.parentFile?.resolve(diffFileName) ?: File(diffFileName) - val comparison = - ImageComparison(expectedImage, actualImage, diffFile) - - comparison.apply { - allowingPercentOfDifferentPixels = thresholdDifferencePercentage - rectangleLineWidth = 10 - pixelToleranceLevel = 0.1 - minimalRectangleSize = 40 - } - - val comparisonState = comparison.compareImages() - - when (comparisonState.imageComparisonState) { - ImageComparisonState.MATCH -> return true - ImageComparisonState.SIZE_MISMATCH -> throw MaestroException.AssertionFailure( - message = "Screenshot size mismatch: ${command.description()} - expected ${expectedImage.width}x${expectedImage.height}, actual ${actualImage.width}x${actualImage.height}. Screenshots must have the same dimensions to compare.", + when (val result = ScreenshotMatch.compare(expectedImage, actualImage, command.thresholdPercentage, diffFile)) { + is ScreenshotMatch.Result.Match -> return true + is ScreenshotMatch.Result.SizeMismatch -> throw MaestroException.AssertionFailure( + message = "Screenshot size mismatch: ${command.description()} - expected ${result.expectedWidth}x${result.expectedHeight}, actual ${result.actualWidth}x${result.actualHeight}. Screenshots must have the same dimensions to compare.", hierarchyRoot = maestro.viewHierarchy().root, - debugMessage = "The assertScreenshot command requires the actual screenshot to have the same dimensions as the reference. Expected: ${expectedImage.width}x${expectedImage.height}, got: ${actualImage.width}x${actualImage.height}. Use the same device/emulator or cropOn to align dimensions." + debugMessage = "The assertScreenshot command requires the actual screenshot to have the same dimensions as the reference. Expected: ${result.expectedWidth}x${result.expectedHeight}, got: ${result.actualWidth}x${result.actualHeight}. Use the same device/emulator or cropOn to align dimensions." ) - ImageComparisonState.MISMATCH -> throw MaestroException.AssertionFailure( - message = "Comparison error: ${command.description()} - threshold not met, current: ${100 - comparisonState.differencePercent}%", + is ScreenshotMatch.Result.Mismatch -> throw MaestroException.AssertionFailure( + message = "Comparison error: ${command.description()} - threshold not met, current: ${result.matchPercent}%", hierarchyRoot = maestro.viewHierarchy().root, debugMessage = "Screenshot comparison failed. Check the diff image at ${diffFile.absolutePath} to see the differences. Adjust the thresholdPercentage if the differences are acceptable." ) - else -> throw MaestroException.AssertionFailure( - message = "Screenshot comparison failed: ${command.description()} - unexpected comparison state ${comparisonState.imageComparisonState}.", - hierarchyRoot = maestro.viewHierarchy().root, - debugMessage = "The assertScreenshot command encountered an unexpected result from the image comparison. State: ${comparisonState.imageComparisonState}" - ) } } diff --git a/maestro-orchestra/src/main/java/maestro/orchestra/ScreenshotMatch.kt b/maestro-orchestra/src/main/java/maestro/orchestra/ScreenshotMatch.kt new file mode 100644 index 0000000000..c056384802 --- /dev/null +++ b/maestro-orchestra/src/main/java/maestro/orchestra/ScreenshotMatch.kt @@ -0,0 +1,113 @@ +package maestro.orchestra + +import com.github.romankh3.image.comparison.ImageComparison +import java.awt.image.BufferedImage +import java.io.File +import kotlin.math.pow +import kotlin.math.sqrt + +/** + * Single source of truth for screenshot comparison: owns both the + * pass/fail decision *and* the "current %" that gets reported back to the + * user. Keeping these in one place prevents the historical bug where the + * threshold was evaluated against one metric (count of pixels exceeding + * `pixelToleranceLevel`) while the message reported a different one + * (`ImageComparisonResult.differencePercent`, an average RGB intensity). + * + * The underlying library is still used to draw the diff PNG, but no longer + * decides whether the screenshot matches. + */ +internal object ScreenshotMatch { + + const val DEFAULT_PIXEL_TOLERANCE: Double = 0.1 + + sealed class Result { + data class Match(val matchPercent: Double) : Result() + data class Mismatch(val matchPercent: Double) : Result() + data class SizeMismatch( + val expectedWidth: Int, + val expectedHeight: Int, + val actualWidth: Int, + val actualHeight: Int, + ) : Result() + } + + /** + * Compare [actual] against [expected] and decide if it meets [thresholdPercentage]. + * On [Result.Mismatch], writes a rectangles-overlay PNG to [diffFile] as a side effect. + */ + fun compare( + expected: BufferedImage, + actual: BufferedImage, + thresholdPercentage: Double, + diffFile: File, + pixelToleranceLevel: Double = DEFAULT_PIXEL_TOLERANCE, + ): Result { + if (expected.width != actual.width || expected.height != actual.height) { + return Result.SizeMismatch( + expectedWidth = expected.width, + expectedHeight = expected.height, + actualWidth = actual.width, + actualHeight = actual.height, + ) + } + + val matchPct = matchPercentage(expected, actual, pixelToleranceLevel) + if (matchPct >= thresholdPercentage) { + return Result.Match(matchPct) + } + + // Use ImageComparison purely as a diff renderer. We've already decided this is a + // mismatch above; passing `100 - thresholdPercentage` keeps the library's internal + // gate consistent with our decision so it produces and writes the rectangles overlay. + ImageComparison(expected, actual, diffFile).apply { + allowingPercentOfDifferentPixels = 100.0 - thresholdPercentage + this.pixelToleranceLevel = pixelToleranceLevel + rectangleLineWidth = 10 + minimalRectangleSize = 40 + }.compareImages() + + return Result.Mismatch(matchPct) + } + + /** + * Percentage of pixels in [actual] that match [expected] within [pixelToleranceLevel], + * using the same Euclidean color-distance rule as + * `com.github.romankh3.image.comparison.ImageComparison`. + */ + fun matchPercentage( + expected: BufferedImage, + actual: BufferedImage, + pixelToleranceLevel: Double = DEFAULT_PIXEL_TOLERANCE, + ): Double { + require(expected.width == actual.width && expected.height == actual.height) { + "matchPercentage requires images of the same size: " + + "expected=${expected.width}x${expected.height}, " + + "actual=${actual.width}x${actual.height}" + } + val width = expected.width + val height = expected.height + val totalPixels = width.toLong() * height.toLong() + if (totalPixels == 0L) return 100.0 + + val differenceConstant = (pixelToleranceLevel * sqrt(255.0 * 255.0 * 3)).pow(2) + var differing = 0L + for (y in 0 until height) { + for (x in 0 until width) { + val e = expected.getRGB(x, y) + val a = actual.getRGB(x, y) + if (e == a) continue + if (pixelToleranceLevel == 0.0) { + differing++ + continue + } + val dr = ((a shr 16) and 0xff) - ((e shr 16) and 0xff) + val dg = ((a shr 8) and 0xff) - ((e shr 8) and 0xff) + val db = (a and 0xff) - (e and 0xff) + val sqDist = (dr * dr + dg * dg + db * db).toDouble() + if (sqDist > differenceConstant) differing++ + } + } + return 100.0 - (differing.toDouble() / totalPixels) * 100.0 + } +} diff --git a/maestro-orchestra/src/test/java/maestro/orchestra/AssertScreenshotMatchTest.kt b/maestro-orchestra/src/test/java/maestro/orchestra/AssertScreenshotMatchTest.kt new file mode 100644 index 0000000000..45f7b8348d --- /dev/null +++ b/maestro-orchestra/src/test/java/maestro/orchestra/AssertScreenshotMatchTest.kt @@ -0,0 +1,157 @@ +package maestro.orchestra + +import com.github.romankh3.image.comparison.ImageComparison +import com.github.romankh3.image.comparison.model.ImageComparisonState +import com.google.common.truth.Truth.assertThat +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.io.TempDir +import java.awt.image.BufferedImage +import java.io.File +import java.nio.file.Path +import javax.imageio.ImageIO + +/** + * Coverage for [ScreenshotMatch] — the single source of truth that backs the + * assertScreenshot command. The same function decides pass/fail *and* produces + * the "current %" reported in the failure message, so the threshold check and + * the user-visible number can never drift. + */ +class AssertScreenshotMatchTest { + + @TempDir + lateinit var tempDir: Path + + private fun loadResource(name: String) = + ImageIO.read(javaClass.getResourceAsStream("/AssertScreenshotMatchTest/$name")!!) + + private fun diffFile(name: String = "diff.png"): File = tempDir.resolve(name).toFile() + + @Test + fun `compare returns Match when match percent meets threshold`() { + val expected = loadResource("expected.png") + val actual = loadResource("actual.png") + + val result = ScreenshotMatch.compare( + expected = expected, + actual = actual, + thresholdPercentage = 95.0, + diffFile = diffFile(), + ) + + assertThat(result).isInstanceOf(ScreenshotMatch.Result.Match::class.java) + assertThat((result as ScreenshotMatch.Result.Match).matchPercent).isWithin(0.01).of(99.9511) + } + + @Test + fun `compare returns Mismatch when match percent falls below threshold and writes diff file`() { + val expected = loadResource("expected.png") + val actual = loadResource("actual.png") + val diff = diffFile() + + val result = ScreenshotMatch.compare( + expected = expected, + actual = actual, + thresholdPercentage = 99.99, + diffFile = diff, + ) + + assertThat(result).isInstanceOf(ScreenshotMatch.Result.Mismatch::class.java) + assertThat((result as ScreenshotMatch.Result.Mismatch).matchPercent).isWithin(0.01).of(99.9511) + assertThat(diff.exists()).isTrue() + } + + @Test + fun `compare returns Mismatch for visually distinct screens of the same size`() { + val expected = loadResource("actual.png") + val actual = loadResource("saved_without_search.png") + val diff = diffFile() + + val result = ScreenshotMatch.compare( + expected = expected, + actual = actual, + thresholdPercentage = 95.0, + diffFile = diff, + ) + + val mismatch = result as ScreenshotMatch.Result.Mismatch + assertThat(mismatch.matchPercent).isWithin(0.01).of(91.4724) + assertThat(diff.exists()).isTrue() + } + + @Test + fun `compare returns SizeMismatch for differently sized images`() { + val expected = loadResource("expected.png") + val actual = BufferedImage(100, 100, BufferedImage.TYPE_INT_RGB) + + val result = ScreenshotMatch.compare( + expected = expected, + actual = actual, + thresholdPercentage = 95.0, + diffFile = diffFile(), + ) + + val sizeMismatch = result as ScreenshotMatch.Result.SizeMismatch + assertThat(sizeMismatch.expectedWidth).isEqualTo(expected.width) + assertThat(sizeMismatch.expectedHeight).isEqualTo(expected.height) + assertThat(sizeMismatch.actualWidth).isEqualTo(100) + assertThat(sizeMismatch.actualHeight).isEqualTo(100) + } + + /** + * Pins our pixel-walk to the upstream library at the threshold boundary. + * If the library's `pixelToleranceLevel` semantics ever change, the + * library's MATCH/MISMATCH decision will diverge from ours and this test + * will catch it. + */ + @Test + fun `match percentage agrees with library MATCH MISMATCH decision at the boundary`() { + val expected = loadResource("expected.png") + val actual = loadResource("actual.png") + + for (pixelToleranceLevel in listOf(0.0, 0.05, 0.1)) { + val ourMatchPct = ScreenshotMatch.matchPercentage(expected, actual, pixelToleranceLevel) + val ourDiffPct = 100.0 - ourMatchPct + + val justAbove = libraryDecision(expected, actual, pixelToleranceLevel, ourDiffPct + 0.001) + val justBelow = libraryDecision(expected, actual, pixelToleranceLevel, ourDiffPct - 0.001) + + assertThat(justAbove).isEqualTo(ImageComparisonState.MATCH) + assertThat(justBelow).isEqualTo(ImageComparisonState.MISMATCH) + } + } + + @Test + fun `match percentage exposes count-based metric distinct from library's average color delta`() { + val expected = loadResource("expected.png") + val actual = loadResource("actual.png") + + // At pixelToleranceLevel=0.0, ~0.5749% of pixels differ → match% ≈ 99.4251%. + val ourMatchPct = ScreenshotMatch.matchPercentage(expected, actual, pixelToleranceLevel = 0.0) + assertThat(ourMatchPct).isWithin(0.01).of(99.4251) + + // The library's `differencePercent` (avg per-channel RGB delta) is much smaller (~0.0366%) + // and is NOT a meaningful "current %" against the configured threshold. This is the bug + // we removed by owning the metric ourselves. + val cmp = ImageComparison(expected, actual) + .setAllowingPercentOfDifferentPixels(0.1) + .setPixelToleranceLevel(0.0) + val libraryResult = cmp.compareImages() + assertThat(libraryResult.imageComparisonState).isEqualTo(ImageComparisonState.MISMATCH) + val misleadingMatchPct = 100.0 - libraryResult.differencePercent + assertThat(misleadingMatchPct).isGreaterThan(ourMatchPct + 0.5) + } + + private fun libraryDecision( + expected: BufferedImage, + actual: BufferedImage, + pixelToleranceLevel: Double, + allowingPercentOfDifferentPixels: Double, + ): ImageComparisonState { + return ImageComparison(expected, actual) + .setAllowingPercentOfDifferentPixels(allowingPercentOfDifferentPixels) + .setPixelToleranceLevel(pixelToleranceLevel) + .compareImages() + .imageComparisonState + } + +} diff --git a/maestro-orchestra/src/test/resources/AssertScreenshotMatchTest/actual.png b/maestro-orchestra/src/test/resources/AssertScreenshotMatchTest/actual.png new file mode 100644 index 0000000000..d1170cbe12 Binary files /dev/null and b/maestro-orchestra/src/test/resources/AssertScreenshotMatchTest/actual.png differ diff --git a/maestro-orchestra/src/test/resources/AssertScreenshotMatchTest/expected.png b/maestro-orchestra/src/test/resources/AssertScreenshotMatchTest/expected.png new file mode 100644 index 0000000000..15eb7a42ce Binary files /dev/null and b/maestro-orchestra/src/test/resources/AssertScreenshotMatchTest/expected.png differ diff --git a/maestro-orchestra/src/test/resources/AssertScreenshotMatchTest/saved_without_search.png b/maestro-orchestra/src/test/resources/AssertScreenshotMatchTest/saved_without_search.png new file mode 100644 index 0000000000..bbf1bbc82c Binary files /dev/null and b/maestro-orchestra/src/test/resources/AssertScreenshotMatchTest/saved_without_search.png differ