From d9a60619679e7918a71f12975a732c4975a8087c Mon Sep 17 00:00:00 2001 From: Minh Vu Date: Sat, 27 Jun 2026 16:47:53 +0200 Subject: [PATCH 1/2] fix(table): keep version hint URIs intact in orphan cleanup --- table/orphan_cleanup.go | 12 +++++++++++- table/orphan_cleanup_test.go | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/table/orphan_cleanup.go b/table/orphan_cleanup.go index 49d323e1d..e6f5cdc4d 100644 --- a/table/orphan_cleanup.go +++ b/table/orphan_cleanup.go @@ -304,7 +304,7 @@ func (t Table) getReferencedFiles(ctx context.Context, fs iceio.IO, maxConcurren // Add version hint file (for Hadoop-style tables) // Following Java's ReachableFileUtil.versionHintLocation() logic: - versionHintPath := filepath.Join(metadata.Location(), "metadata", "version-hint.text") + versionHintPath := versionHintLocation(metadata.Location()) referenced[normalizeFilePath(versionHintPath)] = false for sf := range metadata.Statistics() { @@ -597,6 +597,16 @@ func normalizeFilePathWithConfig(path string, cfg *orphanCleanupConfig) string { return normalizeNonURLPath(path) } +func versionHintLocation(tableLocation string) string { + if strings.Contains(tableLocation, "://") || strings.HasPrefix(tableLocation, "file:") { + if joined, err := url.JoinPath(tableLocation, "metadata", "version-hint.text"); err == nil { + return joined + } + } + + return filepath.Join(tableLocation, "metadata", "version-hint.text") +} + // normalizeURLPath normalizes URL-based file paths with scheme/authority equivalence. // // This function handles the complexities of cloud storage URIs where the same file diff --git a/table/orphan_cleanup_test.go b/table/orphan_cleanup_test.go index 1bf6a7a5d..7957461a7 100644 --- a/table/orphan_cleanup_test.go +++ b/table/orphan_cleanup_test.go @@ -22,6 +22,7 @@ import ( "errors" "fmt" stdfs "io/fs" + "path/filepath" "strings" "testing" "time" @@ -157,6 +158,36 @@ func TestNormalizeNonURLPath(t *testing.T) { } } +func TestVersionHintLocation(t *testing.T) { + tests := []struct { + name string + location string + expected string + }{ + { + name: "s3_uri", + location: "s3://bucket/table", + expected: "s3://bucket/table/metadata/version-hint.text", + }, + { + name: "file_uri", + location: "file:///tmp/table", + expected: "file:///tmp/table/metadata/version-hint.text", + }, + { + name: "local_path", + location: filepath.Join("local", "table"), + expected: filepath.Join("local", "table", "metadata", "version-hint.text"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, versionHintLocation(tt.location)) + }) + } +} + func TestApplySchemeEquivalence(t *testing.T) { equalSchemes := map[string]string{ "s3,s3a,s3n": "s3", @@ -532,6 +563,8 @@ func TestGetReferencedFiles_IncludesStatisticsFiles(t *testing.T) { assert.Contains(t, refs, normalizeFilePath("s3://bucket/stats/table-stats.puffin")) assert.Contains(t, refs, normalizeFilePath("s3://bucket/stats/part-stats.puffin")) assert.Contains(t, refs, normalizeFilePath(tbl.metadataLocation)) + assert.Contains(t, refs, normalizeFilePath("s3://bucket/test/location/metadata/version-hint.text")) + assert.NotContains(t, refs, normalizeFilePath("s3:/bucket/test/location/metadata/version-hint.text")) assert.NotContains(t, refs, normalizeFilePath("s3://bucket/stats/not-referenced.puffin")) assert.NotContains(t, refs, "") } From 9bcc275a6d6b49e075b4abbacac1d489aeac29e3 Mon Sep 17 00:00:00 2001 From: Minh Vu Date: Sat, 27 Jun 2026 21:51:41 +0200 Subject: [PATCH 2/2] test(table): cover file URI version hint path --- table/orphan_cleanup_test.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/table/orphan_cleanup_test.go b/table/orphan_cleanup_test.go index 7957461a7..1bea46804 100644 --- a/table/orphan_cleanup_test.go +++ b/table/orphan_cleanup_test.go @@ -174,6 +174,11 @@ func TestVersionHintLocation(t *testing.T) { location: "file:///tmp/table", expected: "file:///tmp/table/metadata/version-hint.text", }, + { + name: "file_uri_opaque", + location: "file:/tmp/table", + expected: "file:/tmp/table/metadata/version-hint.text", + }, { name: "local_path", location: filepath.Join("local", "table"),