Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 7 additions & 27 deletions maestro-orchestra/src/main/java/maestro/orchestra/Orchestra.kt
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ import kotlinx.coroutines.yield
import maestro.Driver
import maestro.ElementFilter
import maestro.Filters
import com.github.romankh3.image.comparison.ImageComparison
import com.github.romankh3.image.comparison.model.ImageComparisonState
import io.grpc.Status
import maestro.*
import maestro.Filters.asFilter
Expand Down Expand Up @@ -544,7 +542,6 @@ class Orchestra(

private suspend fun assertScreenshotCommand(command: AssertScreenshotCommand): Boolean {
val path = normalizeScreenshotPath(command.path)
val thresholdDifferencePercentage = (100 - command.thresholdPercentage)

val candidates = buildList {
command.flowPath?.let { add(it.resolve(path).toFile()) }
Expand Down Expand Up @@ -600,35 +597,18 @@ class Orchestra(
val diffFileName = "${baseName}_diff.png"
val diffFile = expectedFile.parentFile?.resolve(diffFileName) ?: File(diffFileName)

val comparison =
ImageComparison(expectedImage, actualImage, diffFile)

comparison.apply {
allowingPercentOfDifferentPixels = thresholdDifferencePercentage
rectangleLineWidth = 10
pixelToleranceLevel = 0.1
minimalRectangleSize = 40
}

val comparisonState = comparison.compareImages()

when (comparisonState.imageComparisonState) {
ImageComparisonState.MATCH -> return true
ImageComparisonState.SIZE_MISMATCH -> throw MaestroException.AssertionFailure(
message = "Screenshot size mismatch: ${command.description()} - expected ${expectedImage.width}x${expectedImage.height}, actual ${actualImage.width}x${actualImage.height}. Screenshots must have the same dimensions to compare.",
when (val result = ScreenshotMatch.compare(expectedImage, actualImage, command.thresholdPercentage, diffFile)) {
is ScreenshotMatch.Result.Match -> return true
is ScreenshotMatch.Result.SizeMismatch -> throw MaestroException.AssertionFailure(
message = "Screenshot size mismatch: ${command.description()} - expected ${result.expectedWidth}x${result.expectedHeight}, actual ${result.actualWidth}x${result.actualHeight}. Screenshots must have the same dimensions to compare.",
hierarchyRoot = maestro.viewHierarchy().root,
debugMessage = "The assertScreenshot command requires the actual screenshot to have the same dimensions as the reference. Expected: ${expectedImage.width}x${expectedImage.height}, got: ${actualImage.width}x${actualImage.height}. Use the same device/emulator or cropOn to align dimensions."
debugMessage = "The assertScreenshot command requires the actual screenshot to have the same dimensions as the reference. Expected: ${result.expectedWidth}x${result.expectedHeight}, got: ${result.actualWidth}x${result.actualHeight}. Use the same device/emulator or cropOn to align dimensions."
)
ImageComparisonState.MISMATCH -> throw MaestroException.AssertionFailure(
message = "Comparison error: ${command.description()} - threshold not met, current: ${100 - comparisonState.differencePercent}%",
is ScreenshotMatch.Result.Mismatch -> throw MaestroException.AssertionFailure(
message = "Comparison error: ${command.description()} - threshold not met, current: ${result.matchPercent}%",
hierarchyRoot = maestro.viewHierarchy().root,
debugMessage = "Screenshot comparison failed. Check the diff image at ${diffFile.absolutePath} to see the differences. Adjust the thresholdPercentage if the differences are acceptable."
)
else -> throw MaestroException.AssertionFailure(
message = "Screenshot comparison failed: ${command.description()} - unexpected comparison state ${comparisonState.imageComparisonState}.",
hierarchyRoot = maestro.viewHierarchy().root,
debugMessage = "The assertScreenshot command encountered an unexpected result from the image comparison. State: ${comparisonState.imageComparisonState}"
)
}
}

Expand Down
113 changes: 113 additions & 0 deletions maestro-orchestra/src/main/java/maestro/orchestra/ScreenshotMatch.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package maestro.orchestra

import com.github.romankh3.image.comparison.ImageComparison
import java.awt.image.BufferedImage
import java.io.File
import kotlin.math.pow
import kotlin.math.sqrt

/**
* Single source of truth for screenshot comparison: owns both the
* pass/fail decision *and* the "current %" that gets reported back to the
* user. Keeping these in one place prevents the historical bug where the
* threshold was evaluated against one metric (count of pixels exceeding
* `pixelToleranceLevel`) while the message reported a different one
* (`ImageComparisonResult.differencePercent`, an average RGB intensity).
*
* The underlying library is still used to draw the diff PNG, but no longer
* decides whether the screenshot matches.
*/
internal object ScreenshotMatch {

const val DEFAULT_PIXEL_TOLERANCE: Double = 0.1

sealed class Result {
data class Match(val matchPercent: Double) : Result()
data class Mismatch(val matchPercent: Double) : Result()
data class SizeMismatch(
val expectedWidth: Int,
val expectedHeight: Int,
val actualWidth: Int,
val actualHeight: Int,
) : Result()
}

/**
* Compare [actual] against [expected] and decide if it meets [thresholdPercentage].
* On [Result.Mismatch], writes a rectangles-overlay PNG to [diffFile] as a side effect.
*/
fun compare(
expected: BufferedImage,
actual: BufferedImage,
thresholdPercentage: Double,
diffFile: File,
pixelToleranceLevel: Double = DEFAULT_PIXEL_TOLERANCE,
): Result {
if (expected.width != actual.width || expected.height != actual.height) {
return Result.SizeMismatch(
expectedWidth = expected.width,
expectedHeight = expected.height,
actualWidth = actual.width,
actualHeight = actual.height,
)
}

val matchPct = matchPercentage(expected, actual, pixelToleranceLevel)
if (matchPct >= thresholdPercentage) {
return Result.Match(matchPct)
}

// Use ImageComparison purely as a diff renderer. We've already decided this is a
// mismatch above; passing `100 - thresholdPercentage` keeps the library's internal
// gate consistent with our decision so it produces and writes the rectangles overlay.
ImageComparison(expected, actual, diffFile).apply {
allowingPercentOfDifferentPixels = 100.0 - thresholdPercentage
this.pixelToleranceLevel = pixelToleranceLevel
rectangleLineWidth = 10
minimalRectangleSize = 40
}.compareImages()

return Result.Mismatch(matchPct)
}

/**
* Percentage of pixels in [actual] that match [expected] within [pixelToleranceLevel],
* using the same Euclidean color-distance rule as
* `com.github.romankh3.image.comparison.ImageComparison`.
*/
fun matchPercentage(
expected: BufferedImage,
actual: BufferedImage,
pixelToleranceLevel: Double = DEFAULT_PIXEL_TOLERANCE,
): Double {
require(expected.width == actual.width && expected.height == actual.height) {
"matchPercentage requires images of the same size: " +
"expected=${expected.width}x${expected.height}, " +
"actual=${actual.width}x${actual.height}"
}
val width = expected.width
val height = expected.height
val totalPixels = width.toLong() * height.toLong()
if (totalPixels == 0L) return 100.0

val differenceConstant = (pixelToleranceLevel * sqrt(255.0 * 255.0 * 3)).pow(2)
var differing = 0L
for (y in 0 until height) {
for (x in 0 until width) {
val e = expected.getRGB(x, y)
val a = actual.getRGB(x, y)
if (e == a) continue
if (pixelToleranceLevel == 0.0) {
differing++
continue
}
val dr = ((a shr 16) and 0xff) - ((e shr 16) and 0xff)
val dg = ((a shr 8) and 0xff) - ((e shr 8) and 0xff)
val db = (a and 0xff) - (e and 0xff)
val sqDist = (dr * dr + dg * dg + db * db).toDouble()
if (sqDist > differenceConstant) differing++
}
}
return 100.0 - (differing.toDouble() / totalPixels) * 100.0
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
package maestro.orchestra

import com.github.romankh3.image.comparison.ImageComparison
import com.github.romankh3.image.comparison.model.ImageComparisonState
import com.google.common.truth.Truth.assertThat
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.io.TempDir
import java.awt.image.BufferedImage
import java.io.File
import java.nio.file.Path
import javax.imageio.ImageIO

/**
* Coverage for [ScreenshotMatch] — the single source of truth that backs the
* assertScreenshot command. The same function decides pass/fail *and* produces
* the "current %" reported in the failure message, so the threshold check and
* the user-visible number can never drift.
*/
class AssertScreenshotMatchTest {

@TempDir
lateinit var tempDir: Path

private fun loadResource(name: String) =
ImageIO.read(javaClass.getResourceAsStream("/AssertScreenshotMatchTest/$name")!!)

private fun diffFile(name: String = "diff.png"): File = tempDir.resolve(name).toFile()

@Test
fun `compare returns Match when match percent meets threshold`() {
val expected = loadResource("expected.png")
val actual = loadResource("actual.png")

val result = ScreenshotMatch.compare(
expected = expected,
actual = actual,
thresholdPercentage = 95.0,
diffFile = diffFile(),
)

assertThat(result).isInstanceOf(ScreenshotMatch.Result.Match::class.java)
assertThat((result as ScreenshotMatch.Result.Match).matchPercent).isWithin(0.01).of(99.9511)
}

@Test
fun `compare returns Mismatch when match percent falls below threshold and writes diff file`() {
val expected = loadResource("expected.png")
val actual = loadResource("actual.png")
val diff = diffFile()

val result = ScreenshotMatch.compare(
expected = expected,
actual = actual,
thresholdPercentage = 99.99,
diffFile = diff,
)

assertThat(result).isInstanceOf(ScreenshotMatch.Result.Mismatch::class.java)
assertThat((result as ScreenshotMatch.Result.Mismatch).matchPercent).isWithin(0.01).of(99.9511)
assertThat(diff.exists()).isTrue()
}

@Test
fun `compare returns Mismatch for visually distinct screens of the same size`() {
val expected = loadResource("actual.png")
val actual = loadResource("saved_without_search.png")
val diff = diffFile()

val result = ScreenshotMatch.compare(
expected = expected,
actual = actual,
thresholdPercentage = 95.0,
diffFile = diff,
)

val mismatch = result as ScreenshotMatch.Result.Mismatch
assertThat(mismatch.matchPercent).isWithin(0.01).of(91.4724)
assertThat(diff.exists()).isTrue()
}

@Test
fun `compare returns SizeMismatch for differently sized images`() {
val expected = loadResource("expected.png")
val actual = BufferedImage(100, 100, BufferedImage.TYPE_INT_RGB)

val result = ScreenshotMatch.compare(
expected = expected,
actual = actual,
thresholdPercentage = 95.0,
diffFile = diffFile(),
)

val sizeMismatch = result as ScreenshotMatch.Result.SizeMismatch
assertThat(sizeMismatch.expectedWidth).isEqualTo(expected.width)
assertThat(sizeMismatch.expectedHeight).isEqualTo(expected.height)
assertThat(sizeMismatch.actualWidth).isEqualTo(100)
assertThat(sizeMismatch.actualHeight).isEqualTo(100)
}

/**
* Pins our pixel-walk to the upstream library at the threshold boundary.
* If the library's `pixelToleranceLevel` semantics ever change, the
* library's MATCH/MISMATCH decision will diverge from ours and this test
* will catch it.
*/
@Test
fun `match percentage agrees with library MATCH MISMATCH decision at the boundary`() {
val expected = loadResource("expected.png")
val actual = loadResource("actual.png")

for (pixelToleranceLevel in listOf(0.0, 0.05, 0.1)) {
val ourMatchPct = ScreenshotMatch.matchPercentage(expected, actual, pixelToleranceLevel)
val ourDiffPct = 100.0 - ourMatchPct

val justAbove = libraryDecision(expected, actual, pixelToleranceLevel, ourDiffPct + 0.001)
val justBelow = libraryDecision(expected, actual, pixelToleranceLevel, ourDiffPct - 0.001)

assertThat(justAbove).isEqualTo(ImageComparisonState.MATCH)
assertThat(justBelow).isEqualTo(ImageComparisonState.MISMATCH)
}
}

@Test
fun `match percentage exposes count-based metric distinct from library's average color delta`() {
val expected = loadResource("expected.png")
val actual = loadResource("actual.png")

// At pixelToleranceLevel=0.0, ~0.5749% of pixels differ → match% ≈ 99.4251%.
val ourMatchPct = ScreenshotMatch.matchPercentage(expected, actual, pixelToleranceLevel = 0.0)
assertThat(ourMatchPct).isWithin(0.01).of(99.4251)

// The library's `differencePercent` (avg per-channel RGB delta) is much smaller (~0.0366%)
// and is NOT a meaningful "current %" against the configured threshold. This is the bug
// we removed by owning the metric ourselves.
val cmp = ImageComparison(expected, actual)
.setAllowingPercentOfDifferentPixels(0.1)
.setPixelToleranceLevel(0.0)
val libraryResult = cmp.compareImages()
assertThat(libraryResult.imageComparisonState).isEqualTo(ImageComparisonState.MISMATCH)
val misleadingMatchPct = 100.0 - libraryResult.differencePercent
assertThat(misleadingMatchPct).isGreaterThan(ourMatchPct + 0.5)
}

private fun libraryDecision(
expected: BufferedImage,
actual: BufferedImage,
pixelToleranceLevel: Double,
allowingPercentOfDifferentPixels: Double,
): ImageComparisonState {
return ImageComparison(expected, actual)
.setAllowingPercentOfDifferentPixels(allowingPercentOfDifferentPixels)
.setPixelToleranceLevel(pixelToleranceLevel)
.compareImages()
.imageComparisonState
}

}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading