From 413202c2c04e7389e8673cd188b1cba4ef91f6b7 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 13 Aug 2025 21:21:07 +0200 Subject: [PATCH 1/2] fixup! Hydrate missing loose objects in check_and_freshen() If the `read-object` hook is not found, the code currently would fail with the rather obscure message: fatal: Out of memory, strdup failed The reason is because without a check whether the hook was found, eventually `strvec_push()` would try to call `xstrdup()` with `cmd` (which is `NULL` if the hook was not found) which would fail and print that rather misleading message. Signed-off-by: Johannes Schindelin --- object-store.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/object-store.c b/object-store.c index 188768864c06b4..fc3fbdf4d3265e 100644 --- a/object-store.c +++ b/object-store.c @@ -707,6 +707,9 @@ int read_object_process(const struct object_id *oid) const char *cmd = find_hook(the_repository, "read-object"); uint64_t start; + if (!cmd) + die(_("could not find the `read-object` hook")); + start = getnanotime(); trace2_region_enter("subprocess", "read_object", the_repository); From db6f8f43a5132ca85e5cbf704a79a875672eea9a Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 13 Aug 2025 12:47:31 +0200 Subject: [PATCH 2/2] gvfs: allow corrupt objects to be re-downloaded As of 9e59b38c88c (object-file: emit corruption errors when detected, 2022-12-14), Git will loudly complain about corrupt objects. That is fine, as long as the idea isn't to re-download locally-corrupted objects. But that's exactly what we want to do in VFS for Git via the `read-object` hook, as per the `GitCorruptObjectTests` code added in https://github.com/microsoft/VFSForGit/commit/2db0c030eb25 (New features: [...] - GVFS can now recover from corrupted git object files [...] , 2018-02-16). So let's support precisely that, and add a regression test that ensures that re-downloading corrupt objects via the `read-object` hook works. While at it, avoid the XOR operator to flip the bits, when we actually want to make sure that they are turned off: Use the AND-NOT operator for that purpose. Helped-by: Matthew John Cheetham Helped-by: Derrick Stolee Signed-off-by: Johannes Schindelin --- commit.c | 9 +++++++-- object-store.c | 4 ++++ t/t1060-object-corruption.sh | 30 ++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/commit.c b/commit.c index 8209d5303d427e..94f8f6e0d600ef 100644 --- a/commit.c +++ b/commit.c @@ -566,9 +566,14 @@ int repo_parse_commit_internal(struct repository *r, int flags = OBJECT_INFO_LOOKUP_REPLACE | OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_DIE_IF_CORRUPT; - /* But the GVFS Protocol _does_ support missing commits! */ + /* + * But the GVFS Protocol _does_ support missing commits! + * And the idea with VFS for Git is to re-download corrupted objects, + * not to fail! + */ if (gvfs_config_is_set(GVFS_MISSING_OK)) - flags ^= OBJECT_INFO_SKIP_FETCH_OBJECT; + flags &= ~(OBJECT_INFO_SKIP_FETCH_OBJECT | + OBJECT_INFO_DIE_IF_CORRUPT); if (!item) return -1; diff --git a/object-store.c b/object-store.c index fc3fbdf4d3265e..3cee72452deaf7 100644 --- a/object-store.c +++ b/object-store.c @@ -7,6 +7,7 @@ #include "dir.h" #include "environment.h" #include "gettext.h" +#include "gvfs.h" #include "gvfs-helper-client.h" #include "hex.h" #include "hook.h" @@ -1107,6 +1108,9 @@ void *repo_read_object_file(struct repository *r, unsigned flags = OBJECT_INFO_DIE_IF_CORRUPT | OBJECT_INFO_LOOKUP_REPLACE; void *data; + if (gvfs_config_is_set(GVFS_MISSING_OK)) + flags &= ~OBJECT_INFO_DIE_IF_CORRUPT; + oi.typep = type; oi.sizep = size; oi.contentp = &data; diff --git a/t/t1060-object-corruption.sh b/t/t1060-object-corruption.sh index 502a5ea1c51e3a..4f1f593c7410bd 100755 --- a/t/t1060-object-corruption.sh +++ b/t/t1060-object-corruption.sh @@ -3,6 +3,7 @@ test_description='see how we handle various forms of corruption' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-diff-data.sh # convert "1234abcd" to ".git/objects/12/34abcd" obj_to_file() { @@ -62,6 +63,35 @@ test_expect_success 'streaming a corrupt blob fails' ' ) ' +test_expect_success PERL 'truncated objects can be re-retrieved via GVFS' ' + git init truncated && + COPYING_test_data >truncated/COPYING && + git -C truncated add COPYING && + test_tick && + git -C truncated commit -m initial COPYING && + + # set up the `read-object` hook so that it overwrites the corrupt object + mkdir -p truncated/.git/hooks && + sed -e "1s|/usr/bin/perl|$PERL_PATH|" \ + -e "s/system/unlink \".git\/objects\/\" . substr(\$sha1, 0, 2) . \"\/\" . substr(\$sha1, 2); &/" \ + <$TEST_DIRECTORY/t0410/read-object \ + >truncated/.git/hooks/read-object && + chmod +x truncated/.git/hooks/read-object && + + # ensure that the parent repository has a copy of the object, from + # where the `read-object` can read it + sha="$(git hash-object -w truncated/COPYING)" && + file=$(obj_to_file $sha) && + size=$(test_file_size $file) && + chmod u+w truncated/$file && + test-tool truncate truncated/$file $(($size-8)) && + + rm truncated/COPYING && + test_must_fail git -C truncated reset --hard && + git -C truncated -c core.gvfs=4 -c core.virtualizeObjects \ + reset --hard +' + test_expect_success 'getting type of a corrupt blob fails' ' ( cd bit-error &&