From cdd38f55b5791429c4faa4bca3474792f79a08e2 Mon Sep 17 00:00:00 2001 From: Baptiste Langlade Date: Wed, 16 Apr 2025 19:42:36 +0200 Subject: [PATCH] add regression case due to invalid utf8 content --- proofs/tar.php | 36 ++++++++++++++++++++++++++++++++++++ src/Tar/Encode.php | 3 +++ 2 files changed, 39 insertions(+) diff --git a/proofs/tar.php b/proofs/tar.php index 00ecf9c..808012f 100644 --- a/proofs/tar.php +++ b/proofs/tar.php @@ -270,4 +270,40 @@ static function($assert, $file) { ); }, ); + + yield test( + 'Tar encode files with utf8 content', + static function($assert) { + $clock = Clock::live(); + $path = \rtrim(\sys_get_temp_dir(), '/').'/innmind/encoding/'; + $tmp = Filesystem::mount(Path::of($path)); + + $file = File::named( + '._+', + File\Content::ofString('›'), + ); + + // make sure to avoid conflicts when trying to unarchive + $tmp->remove($file->name()); + + $tar = Tar::encode($clock)($file); + $tar = File::named('shape.tar', $tar); + $tmp->add($tar); + + $exitCode = null; + \exec("tar -xf '$path/shape.tar' --directory=$path", result_code: $exitCode); + // $assert->same(0, $exitCode); + + $assert->same( + $file->content()->toString(), + $tmp + ->get($file->name()) + ->keep(Instance::of(File::class)) + ->match( + static fn($file) => $file->content()->toString(), + static fn() => null, + ), + ); + }, + )->tag(\Innmind\BlackBox\Tag::wip); }; diff --git a/src/Tar/Encode.php b/src/Tar/Encode.php index 81fc496..56bd22c 100644 --- a/src/Tar/Encode.php +++ b/src/Tar/Encode.php @@ -112,6 +112,9 @@ private function encodeFile(Str $path, File $file): Sequence ->content() ->chunks() ->map(static fn($chunk) => $chunk->toEncoding(Str\Encoding::ascii)) + ->map(static fn($chunk) => $chunk->map( + static fn($value) => \mb_convert_encoding($value, 'UTF-8'), + )) ->aggregate(static fn(Str $a, Str $b) => $a->append($b)->chunk(512)) ->flatMap(static fn($str) => $str->chunk(512)) // in case there is only one line ->map(static fn($chunk) => \pack('a512', $chunk->toString()))