From 93415f4e1e6bacc0ae8467518d7334fbbfad4c63 Mon Sep 17 00:00:00 2001 From: Levi Jiang Date: Mon, 15 Jun 2026 17:15:32 -0700 Subject: [PATCH] fix(retention): preserve scheme/authority when deriving backup manifest paths Retention backup-manifest writing used java.nio.file.Paths to derive the parent directory of a data file. Paths is not URI-aware: it collapses hdfs://authority/... into hdfs:/authority/..., dropping the authority. The mangled path then resolved against the default filesystem root and failed with "Permission denied ... inode=/". This bit tables whose newer data files are stored as fully-qualified hdfs:// URIs while older ones are bare paths. Use Hadoop Path (which preserves scheme + authority) via a testable dataFileParent() helper. OperationsBackupPathTest covers the qualified and bare path forms, plus a negative test that exercises the java.nio.file.Paths derivation and asserts it produces the broken, root-rooted destination path. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../openhouse/jobs/spark/Operations.java | 18 ++- .../jobs/spark/OperationsBackupPathTest.java | 114 ++++++++++++++++++ 2 files changed, 129 insertions(+), 3 deletions(-) create mode 100644 apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/OperationsBackupPathTest.java diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/Operations.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/Operations.java index bd301c729..b9feb9dfb 100644 --- a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/Operations.java +++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/Operations.java @@ -14,7 +14,6 @@ import com.linkedin.openhouse.jobs.util.SparkJobUtil; import com.linkedin.openhouse.jobs.util.TableStatsCollector; import java.io.IOException; -import java.nio.file.Paths; import java.time.ZonedDateTime; import java.time.temporal.ChronoUnit; import java.util.Collections; @@ -356,7 +355,7 @@ private Map> prepareBackupDataManifests( Collectors.groupingBy( task -> { String path = task.file().path().toString(); - return Paths.get(path).getParent().toString(); + return dataFileParent(path); }, Collectors.mapping(task -> task.file().path().toString(), Collectors.toList()))); } catch (IOException e) { @@ -403,7 +402,20 @@ private void exposeBackupLocation(Table table, String backupDir) { table.name(), AppConstants.BACKUP_DIR_KEY, fullyQualifiedBackupDir)); } - private Path getTrashPath(String path, String filePath, String trashDir) { + /** + * Return the parent directory of a data file path. Uses Hadoop {@link Path} (not {@link + * java.nio.file.Paths}, which is not URI-aware) so that the scheme and authority of a + * fully-qualified path such as {@code hdfs://cluster/a/b/f.orc} are preserved. Dropping the + * authority here would later resolve the backup manifest against the default filesystem root and + * fail with a permission error. + */ + @VisibleForTesting + static String dataFileParent(String dataFilePath) { + return new Path(dataFilePath).getParent().toString(); + } + + @VisibleForTesting + static Path getTrashPath(String path, String filePath, String trashDir) { return new Path(filePath.replace(path, new Path(path, trashDir).toString())); } diff --git a/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/OperationsBackupPathTest.java b/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/OperationsBackupPathTest.java new file mode 100644 index 000000000..3255a0a01 --- /dev/null +++ b/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/OperationsBackupPathTest.java @@ -0,0 +1,114 @@ +package com.linkedin.openhouse.jobs.spark; + +import java.nio.file.Paths; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * Pure-Java unit tests for the backup-manifest path derivation in {@link Operations}. No Spark + * session, no filesystem — exercises the scheme/authority handling that broke retention backups + * when data files are stored as fully-qualified {@code hdfs://authority/...} URIs. + * + *

These guard against a regression where {@code java.nio.file.Paths} was used to derive parent + * paths: it is not URI-aware and collapses {@code hdfs://authority} into {@code hdfs:/authority}, + * dropping the authority. The mangled path later resolves against the default filesystem root and + * fails with {@code Permission denied ... inode="/"}. + */ +public class OperationsBackupPathTest { + + @Test + public void dataFileParentPreservesSchemeAndAuthority() { + String dataFile = + "hdfs://ltx1-holdem/data/openhouse/db/tbl-uuid/data/datepartition=2026-03-26-00/f.orc"; + + String parent = Operations.dataFileParent(dataFile); + + // Authority (//ltx1-holdem) must be retained; the old Paths.get path produced + // "hdfs:/ltx1-holdem/..." (single slash, authority dropped). + Assertions.assertEquals( + "hdfs://ltx1-holdem/data/openhouse/db/tbl-uuid/data/datepartition=2026-03-26-00", parent); + } + + @Test + public void dataFileParentPreservesBarePath() { + String dataFile = "/data/openhouse/db/tbl-uuid/data/datepartition=2026-03-06-00/f.orc"; + + Assertions.assertEquals( + "/data/openhouse/db/tbl-uuid/data/datepartition=2026-03-06-00", + Operations.dataFileParent(dataFile)); + } + + @Test + public void manifestDestPathForQualifiedDataFileKeepsAuthorityAndInsertsBackupDir() { + // Mirrors the production chain in prepareBackupDataManifests + writeBackupDataManifests. + // Iceberg stores table.location() bare (no scheme), but newer data files are fully qualified. + String tableLocation = "/data/openhouse/db/tbl-uuid"; + String dataFile = + "hdfs://ltx1-holdem/data/openhouse/db/tbl-uuid/data/datepartition=2026-03-26-00/f.orc"; + + String partitionPath = Operations.dataFileParent(dataFile); + String manifestSource = new Path(partitionPath, "data_manifest_1.json").toString(); + Path dest = Operations.getTrashPath(tableLocation, manifestSource, ".backup"); + + Assertions.assertEquals( + "hdfs://ltx1-holdem/data/openhouse/db/tbl-uuid/.backup/data/" + + "datepartition=2026-03-26-00/data_manifest_1.json", + dest.toString()); + } + + @Test + public void manifestDestPathForBareDataFileInsertsBackupDir() { + String tableLocation = "/data/openhouse/db/tbl-uuid"; + String dataFile = "/data/openhouse/db/tbl-uuid/data/datepartition=2026-03-06-00/f.orc"; + + String partitionPath = Operations.dataFileParent(dataFile); + String manifestSource = new Path(partitionPath, "data_manifest_1.json").toString(); + Path dest = Operations.getTrashPath(tableLocation, manifestSource, ".backup"); + + Assertions.assertEquals( + "/data/openhouse/db/tbl-uuid/.backup/data/datepartition=2026-03-06-00/data_manifest_1.json", + dest.toString()); + } + + /** + * Negative test pinning the regression: deriving the parent with {@link java.nio.file.Paths} + * (instead of Hadoop {@link Path}) collapses the {@code //} authority separator, so the authority + * is dropped and leaks into the path as a bogus top-level directory. The resulting backup + * destination resolves under the filesystem root, which is what produced the {@code Permission + * denied ... inode="/"} failure in production. + */ + @Test + public void nioPathsDropsAuthorityAndProducesWrongDestPath() { + String tableLocation = "/data/openhouse/db/tbl-uuid"; + String dataFile = + "hdfs://ltx1-holdem/data/openhouse/db/tbl-uuid/data/datepartition=2026-03-26-00/f.orc"; + + // Buggy derivation: java.nio.file.Paths is not URI-aware and collapses "hdfs://" into "hdfs:/". + String nioParent = Paths.get(dataFile).getParent().toString(); + Assertions.assertEquals( + "hdfs:/ltx1-holdem/data/openhouse/db/tbl-uuid/data/datepartition=2026-03-26-00", + nioParent, + "java.nio.file.Paths drops the // authority separator"); + Assertions.assertNotEquals( + Operations.dataFileParent(dataFile), + nioParent, + "nio-derived parent must differ from the correct Hadoop Path parent"); + + String manifestSource = new Path(nioParent, "data_manifest_1.json").toString(); + Path wrongDest = Operations.getTrashPath(tableLocation, manifestSource, ".backup"); + + // The authority is gone and has leaked into the path, so the file would be created under the + // filesystem root ("/ltx1-holdem/...") instead of under the table location. + Assertions.assertNull( + wrongDest.toUri().getAuthority(), "authority is lost in the nio-derived path"); + Assertions.assertTrue( + wrongDest.toUri().getPath().startsWith("/ltx1-holdem/"), + "authority leaks into the path, rooting it at the filesystem root"); + Assertions.assertNotEquals( + "hdfs://ltx1-holdem/data/openhouse/db/tbl-uuid/.backup/data/" + + "datepartition=2026-03-26-00/data_manifest_1.json", + wrongDest.toString(), + "nio-derived destination differs from the correct backup path"); + } +}