diff --git a/Makefile.PL b/Makefile.PL index 641ddffbd..e8d69dafb 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -22,11 +22,17 @@ if ($ENV{AUTOMATED_TESTING}) { if $is_njh; } +use File::ShareDir::Install; +$File::ShareDir::Install::INCLUDE_DOTFILES = 1; +$File::ShareDir::Install::INCLUDE_DOTDIRS = 1; +install_share dist => "share"; + my %WriteMakefileArgs = ( "ABSTRACT" => "Top-level test harness service.", "AUTHOR" => "Chad Granum ", "CONFIGURE_REQUIRES" => { "ExtUtils::MakeMaker" => 0, + "File::ShareDir::Install" => "0.06", "Test2::Event::V2" => "1.302199", "Test2::Util::Term" => "0.000159" }, @@ -274,3 +280,9 @@ delete $WriteMakefileArgs{CONFIGURE_REQUIRES} unless eval { ExtUtils::MakeMaker->VERSION(6.52) }; WriteMakefile(%WriteMakefileArgs); + +{ +package +MY; +use File::ShareDir::Install qw(postamble); +} diff --git a/cpanfile b/cpanfile index e88a329bc..8baa234dd 100644 --- a/cpanfile +++ b/cpanfile @@ -137,6 +137,7 @@ on 'test' => sub { on 'configure' => sub { requires "ExtUtils::MakeMaker" => "0"; + requires "File::ShareDir::Install" => "0.06"; requires "Test2::Event::V2" => "1.302199"; requires "Test2::Util::Term" => "0.000159"; }; diff --git a/lib/App/Yath2/Command/speedtag.pm b/lib/App/Yath2/Command/speedtag.pm index e3f9bd2cb..b48401629 100644 --- a/lib/App/Yath2/Command/speedtag.pm +++ b/lib/App/Yath2/Command/speedtag.pm @@ -105,16 +105,6 @@ sub run { die "max medium duration must be an integer, got '$self->{+MAX_MEDIUM}'\n" unless $self->{+MAX_MEDIUM} && $self->{+MAX_MEDIUM} =~ m/^\d+$/; - require App::Yath2::LogArchive; - my @runs = App::Yath2::LogArchive->new(path => $log)->runs; - die "No runs found in '$log'\n" unless @runs; - - my $streamer = App::Yath2::Streamer::Static->new( - log => $log, - runs => [@runs], - global => 1, - ); - my $durations_file = $settings->speedtag->generate_durations_file; my %durations; @@ -123,98 +113,108 @@ sub run { # tag and emit the moment we have both. my %job_state; my %tagged; - $streamer->stream( - callback => sub { - my ($event) = @_; - my $f = $event->facet_data // {}; - - if (my $start = $f->{harness_job_start}) { - my $jid = $start->{job_id} // $event->{job_id} // return; - $job_state{$jid}{start} //= $start->{stamp} // $event->stamp; - $job_state{$jid}{file} //= $start->{abs_file} // $start->{file}; - return; + my $callback = sub { + my ($event) = @_; + my $f = $event->facet_data // {}; + + if (my $start = $f->{harness_job_start}) { + my $jid = $start->{job_id} // $event->{job_id} // return; + $job_state{$jid}{start} //= $start->{stamp} // $event->stamp; + $job_state{$jid}{file} //= $start->{abs_file} // $start->{file}; + return; + } + + return unless my $end = $f->{harness_job_end}; + + my $jid = $end->{job_id} // $event->{job_id} // return; + my $file = $end->{abs_file} // $end->{file} // $job_state{$jid}{file}; + return unless $file; + + $file = clean_path($file); + return if $tagged{$file}++; + + my $start = $job_state{$jid}{start}; + my $stop = $end->{stamp} // $event->stamp; + return unless defined $start && defined $stop; + + my $time = $stop - $start; + return unless $time > 0; + + my $dur = + $time < $self->{+MAX_SHORT} ? 'short' + : $time < $self->{+MAX_MEDIUM} ? 'medium' + : 'long'; + + my $rfh; + unless (open($rfh, '<', $file)) { + warn "Could not open file $file for reading\n"; + return; + } + + my @lines; + my $injected; + my ($old, $new); + for my $line (<$rfh>) { + if ($line =~ m/^(\s*)#(\s*)HARNESS-(CAT(EGORY)?|DUR(ATION))-(LONG|MEDIUM|SHORT)$/i) { + next if $injected++; + $old = $line; + $line = "${1}#${2}HARNESS-DURATION-" . uc($dur) . "\n"; + $new = $line; } - - return unless my $end = $f->{harness_job_end}; - - my $jid = $end->{job_id} // $event->{job_id} // return; - my $file = $end->{abs_file} // $end->{file} // $job_state{$jid}{file}; - return unless $file; - - $file = clean_path($file); - return if $tagged{$file}++; - - my $start = $job_state{$jid}{start}; - my $stop = $end->{stamp} // $event->stamp; - return unless defined $start && defined $stop; - - my $time = $stop - $start; - return unless $time > 0; - - my $dur = - $time < $self->{+MAX_SHORT} ? 'short' - : $time < $self->{+MAX_MEDIUM} ? 'medium' - : 'long'; - - my $rfh; - unless (open($rfh, '<', $file)) { - warn "Could not open file $file for reading\n"; - return; + push @lines => $line; + } + close($rfh); + + unless ($injected) { + my $new_line = "# HARNESS-DURATION-" . uc($dur) . "\n"; + my @header; + while (@lines && $lines[0] =~ m/^(#|use\s|package\s)/) { + push @header => shift @lines; } + unshift @lines => (@header, $new_line); + + $old = ""; + $new = $new_line; + } + + if ($durations_file) { + my $tfile = $file; + $tfile =~ s{^\Q$initial_dir\E/+}{}; + $durations{$tfile} = uc($dur); + } + + if ($settings->harness->dummy) { + print "Would tag (dummy) file $file with duration '$dur'\n"; + chomp($old); + chomp($new); + print "Old Header: $old\nNew Header: $new\n\n"; + return; + } + + my $wfh; + unless (open($wfh, '>', $file)) { + warn "Could not open file $file for writing\n"; + return; + } + + print $wfh @lines; + close($wfh); + + print "Tagged '$dur': $file\n"; + }; - my @lines; - my $injected; - my ($old, $new); - for my $line (<$rfh>) { - if ($line =~ m/^(\s*)#(\s*)HARNESS-(CAT(EGORY)?|DUR(ATION))-(LONG|MEDIUM|SHORT)$/i) { - next if $injected++; - $old = $line; - $line = "${1}#${2}HARNESS-DURATION-" . uc($dur) . "\n"; - $new = $line; - } - push @lines => $line; - } - close($rfh); - - unless ($injected) { - my $new_line = "# HARNESS-DURATION-" . uc($dur) . "\n"; - my @header; - while (@lines && $lines[0] =~ m/^(#|use\s|package\s)/) { - push @header => shift @lines; - } - unshift @lines => (@header, $new_line); - - $old = ""; - $new = $new_line; - } - - if ($durations_file) { - my $tfile = $file; - $tfile =~ s{^\Q$initial_dir\E/+}{}; - $durations{$tfile} = uc($dur); - } - - if ($settings->harness->dummy) { - print "Would tag (dummy) file $file with duration '$dur'\n"; - chomp($old); - chomp($new); - print "Old Header: $old\nNew Header: $new\n\n"; - return; - } - - my $wfh; - unless (open($wfh, '>', $file)) { - warn "Could not open file $file for writing\n"; - return; - } - - print $wfh @lines; - close($wfh); + require App::Yath2::LogArchive; + my @runs = App::Yath2::LogArchive->new(path => $log)->runs; + die "No runs found in '$log'\n" unless @runs; - print "Tagged '$dur': $file\n"; - }, + my $streamer = App::Yath2::Streamer::Static->new( + log => $log, + runs => \@runs, + global => 1, ); + $streamer->stream(callback => $callback); + if ($durations_file) { my $jfile = Test2::Harness2::Util::File::JSON->new( name => $durations_file, diff --git a/lib/App/Yath2/LogArchive.pm b/lib/App/Yath2/LogArchive.pm index 0b002f3d4..8a68c09b2 100644 --- a/lib/App/Yath2/LogArchive.pm +++ b/lib/App/Yath2/LogArchive.pm @@ -57,7 +57,9 @@ sub artifacts { my $bytes = do { local $/; <$fh> }; close $fh; - my $dict_bytes = $self->can('dict_bytes') ? $self->dict_bytes : undef; + # dict_bytes is a Role::Source contract method; every backend + # provides it (returns undef when no dict is bundled). + my $dict_bytes = $self->dict_bytes; my $json = decompress_blob( $bytes, ($dict_bytes ? (dict_bytes => $dict_bytes) : ()), diff --git a/lib/App/Yath2/LogArchive/Directory.pm b/lib/App/Yath2/LogArchive/Directory.pm index 5fff2c3c8..8e0ab92b3 100644 --- a/lib/App/Yath2/LogArchive/Directory.pm +++ b/lib/App/Yath2/LogArchive/Directory.pm @@ -10,7 +10,8 @@ use Role::Tiny::With; use parent 'App::Yath2::LogArchive'; use Object::HashBase qw/path format/; -with 'App::Yath2::LogArchive::Role::Source'; +with 'App::Yath2::LogArchive::Role::Source', + 'App::Yath2::LogArchive::Role::DiskDict'; sub viable { 1 } diff --git a/lib/App/Yath2/LogArchive/Role/DiskDict.pm b/lib/App/Yath2/LogArchive/Role/DiskDict.pm new file mode 100644 index 000000000..9d79f39fe --- /dev/null +++ b/lib/App/Yath2/LogArchive/Role/DiskDict.pm @@ -0,0 +1,83 @@ +package App::Yath2::LogArchive::Role::DiskDict; +use strict; +use warnings; + +our $VERSION = '2.000011'; + +use Carp qw/croak/; +use File::Spec (); + +use Role::Tiny; + +# Concrete `dict_bytes` implementation for any LogArchive backend +# whose storage shape is "an on-disk root carrying a sibling +# zstd-dict.bin file". Composes the Role::Source dict_bytes contract +# (returns the dict bytes, or undef when none). +# +# Implementors must expose a `path` accessor (e.g. via +# Object::HashBase) returning the root directory; the role reads +# `/zstd-dict.bin` in binary mode -- Test2::Harness2::Util:: +# read_file goes through open_file which does not call binmode, and +# a CRLF-translating environment would corrupt the dict mid-read. +requires 'path'; + +sub dict_bytes { + my $self = shift; + my $abs = File::Spec->catfile($self->path, 'zstd-dict.bin'); + return undef unless -f $abs; + + open(my $fh, '<', $abs) or croak "open '$abs': $!"; + binmode $fh; + local $/; + my $bytes = <$fh>; + close $fh; + return $bytes; +} + +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +App::Yath2::LogArchive::Role::DiskDict - Sibling-file +implementation of the L +C contract. + +=head1 SYNOPSIS + + package App::Yath2::LogArchive::Directory; + use parent 'App::Yath2::LogArchive'; + use Object::HashBase qw/path format/; + + with 'App::Yath2::LogArchive::Role::Source', + 'App::Yath2::LogArchive::Role::DiskDict'; + +=head1 DESCRIPTION + +Provides a single C method that reads +CpathE/zstd-dict.bin> from disk in binary mode and +returns its bytes (or C when the file does not exist). + +Implementors must expose a C accessor returning the root +directory the dict file lives next to; everything else is the +role's responsibility. + +This is the right shape for any backend whose underlying storage +is a real directory tree -- the live workdir layout +(L), an extracted archive, an +NFS-mounted snapshot, etc. Backends whose dict lives I a +single file (e.g. L, which +reads the dict at an offset within the archive) implement +C themselves. + +=head1 SOURCE + +The source code repository for Test2-Harness can be found at +L. + +=cut diff --git a/lib/App/Yath2/LogArchive/Role/Source.pm b/lib/App/Yath2/LogArchive/Role/Source.pm index 2a3c7f35e..c5bb5d469 100644 --- a/lib/App/Yath2/LogArchive/Role/Source.pm +++ b/lib/App/Yath2/LogArchive/Role/Source.pm @@ -3,6 +3,6 @@ use strict; use warnings; use Role::Tiny; -requires qw/read_file has_file list_files close viable/; +requires qw/read_file has_file list_files close viable dict_bytes/; 1; diff --git a/lib/App/Yath2/LogArchive/TarZIdx.pm b/lib/App/Yath2/LogArchive/TarZIdx.pm index cf08c87b9..e44805960 100644 --- a/lib/App/Yath2/LogArchive/TarZIdx.pm +++ b/lib/App/Yath2/LogArchive/TarZIdx.pm @@ -171,6 +171,7 @@ sub has_file { return exists $self->_build_index->{$rel} ? 1 : 0; } +# Role::Source dict_bytes contract for the tar.zidx backend. # Returns the bytes of the bundled zstd dictionary (the file the # writer copied out of $source/zstd-dict.bin) or undef when the # archive is dict-less. Probes the index once and caches the result; diff --git a/lib/App/Yath2/Streamer/Static.pm b/lib/App/Yath2/Streamer/Static.pm index 99afeeda5..912cb4226 100644 --- a/lib/App/Yath2/Streamer/Static.pm +++ b/lib/App/Yath2/Streamer/Static.pm @@ -113,8 +113,7 @@ sub _resolve_path { return undef unless $archive->has_file($rel); - my $tmpdir = $self->{+ARCHIVE_TMPDIR} //= - tempdir('yath-streamer-XXXXXX', TMPDIR => 1, CLEANUP => 1); + my $tmpdir = $self->{+ARCHIVE_TMPDIR} //= $self->_make_archive_tempdir($archive); my $abs = "$tmpdir/$rel"; my $dir = dirname($abs); @@ -134,6 +133,36 @@ sub _resolve_path { return $self->{+ARCHIVE_EXTRACTED}->{$rel} = $abs; } +# Vivify the per-streamer extraction tempdir on first call. +sub _make_archive_tempdir { + my ($self, $archive) = @_; + my $dir = tempdir('yath-streamer-XXXXXX', TMPDIR => 1, CLEANUP => 1); + $self->_extract_archive_dict($archive, $dir); + return $dir; +} + +# Copy the archive's bundled zstd dictionary to <$dir>/zstd-dict.bin. +# Per-logger log_reader()s discover the dict by walking up from each +# .zst snapshot's directory looking for that exact filename, so +# without this every dict-compressed snapshot extracted from the +# archive croaks with "Dictionary mismatch" when read back. No-op +# when the archive carries no dict (Role::Source dict_bytes returns +# undef). Binary write -- Test2::Harness2::Util's file helpers do +# not call binmode. +sub _extract_archive_dict { + my ($self, $archive, $dir) = @_; + + my $bytes = $archive->dict_bytes; + return unless defined $bytes && length $bytes; + + my $path = "$dir/zstd-dict.bin"; + open(my $fh, '>', $path) or croak "open '$path' for write: $!"; + binmode $fh; + print {$fh} $bytes or do { close $fh; croak "write '$path': $!" }; + close $fh or croak "close '$path': $!"; + return; +} + # Collect the state snapshot for a run by asking every logger whose # records_state() is true for its fetch_state. Reconcile across # loggers: cared-about fields must agree when both loggers set them. diff --git a/t/AI/unit/LogArchive/Roles.t b/t/AI/unit/LogArchive/Roles.t index f6d8bc87a..64f81353c 100644 --- a/t/AI/unit/LogArchive/Roles.t +++ b/t/AI/unit/LogArchive/Roles.t @@ -10,6 +10,7 @@ sub has_file { die 'nope' } sub list_files { die 'nope' } sub close { die 'nope' } sub viable { 1 } +sub dict_bytes { undef } package Fake::Writer; use Role::Tiny::With; diff --git a/t/Yath/integration/speedtag.t b/t/Yath/integration/speedtag.t index 94cc8edcb..2f35508fb 100644 --- a/t/Yath/integration/speedtag.t +++ b/t/Yath/integration/speedtag.t @@ -6,49 +6,137 @@ use File::Temp qw/tempdir/; use File::Spec; use File::Copy qw/copy/; -use Test2::Harness2::Util::File::JSONL; - use lib 't/lib'; use Test2::Harness2::Test::Yath qw/yath/; use App::Yath2::Util qw/find_yath/; find_yath(); # cache result before we chdir -my $tmp = tempdir(CLEANUP => 1); - -my $dir = __FILE__; -$dir =~ s{\.t$}{}g; -$dir =~ s{^\./}{}; - -my $pass = File::Spec->catfile($tmp, 'pass.tx'); -my $pass2 = File::Spec->catfile($tmp, 'pass2.tx'); - -copy(File::Spec->catfile($dir, 'pass.tx'), $pass); -copy(File::Spec->catfile($dir, 'pass2.tx'), $pass2); - -my $out = yath(command => 'test', args => [$tmp, '--ext=tx'], log => 1, exit => 0); -my $log = $out->{log}->name; - -yath( - command => 'speedtag', - args => [$log], - exit => 0, - test => sub { - like($_, qr/Tagged .*pass\.tx/, "Indicate we tagged pass"); - like($_, qr/Tagged .*pass2\.tx/, "Indicate we tagged pass2"); - - for my $file ($pass, $pass2) { - open(my $fh, '<', $file) or die $!; - my $found = 0; - while (my $line = <$fh>) { - chomp($line); - next unless $line =~ m/^#\s*HARNESS-DURATION-(SHORT|MEDIUM|LONG)$/; - $found = 1; - last; - } - $file =~ s/^.*(pass\d?\.tx)$/$1/; - ok($found, "Tagged file $file"); +# Each scenario walks `yath speedtag` over a different shape of log +# input. They cover the delta this branch landed on top of origin/2.0: +# +# * Command::speedtag drives every input through +# Streamer::Static -- LogArchive recognises both directories +# and .yath archives, so the dispatch is just "open the path". +# * Streamer::Static drops the archive's bundled zstd-dict at +# the extraction tempdir root (covered indirectly by every +# archive scenario when the harness picks up a dict from +# share). +# * App::Yath2::LogArchive::Directory exposes dict_bytes (via +# Role::DiskDict, covered by the directory-input scenario +# that uses `yath extract`). + +my $fixtures = __FILE__; +$fixtures =~ s{\.t$}{}g; +$fixtures =~ s{^\./}{}; + +# Copy pass.tx + pass2.tx into a fresh tempdir so each scenario gets +# untagged inputs (speedtag mutates the .tx files in place). +sub fresh_fixtures { + my $tmp = tempdir(CLEANUP => 1); + my $pass = File::Spec->catfile($tmp, 'pass.tx'); + my $pass2 = File::Spec->catfile($tmp, 'pass2.tx'); + copy(File::Spec->catfile($fixtures, 'pass.tx'), $pass); + copy(File::Spec->catfile($fixtures, 'pass2.tx'), $pass2); + return ($tmp, $pass, $pass2); +} + +# Verify each fixture file ends up with a HARNESS-DURATION-* header. +sub assert_tagged { + my ($label, @files) = @_; + for my $file (@files) { + open(my $fh, '<', $file) or die "open '$file': $!"; + my $found = 0; + while (my $line = <$fh>) { + chomp $line; + next unless $line =~ m/^#\s*HARNESS-DURATION-(SHORT|MEDIUM|LONG)$/; + $found = 1; + last; } - }, -); + close $fh; + my ($base) = $file =~ m{(pass\d?\.tx)$}; + ok($found, "$label: tagged $base"); + } +} + +subtest 'speedtag from the archive path Tester `log => 1` produces' => sub { + my ($tmp, $pass, $pass2) = fresh_fixtures(); + + my $out = yath(command => 'test', args => [$tmp, '--ext=tx'], log => 1, exit => 0); + my $log = $out->{log}->name; + + yath( + command => 'speedtag', + args => [$log], + exit => 0, + test => sub { + like($_, qr/Tagged .*pass\.tx/, 'announced pass.tx'); + like($_, qr/Tagged .*pass2\.tx/, 'announced pass2.tx'); + assert_tagged('Tester log => 1', $pass, $pass2); + }, + ); +}; + +subtest 'speedtag from an explicit *.yath archive path' => sub { + my ($tmp, $pass, $pass2) = fresh_fixtures(); + my $archive = File::Spec->catfile($tmp, 'run.yath'); + + yath( + command => 'test', + args => [$tmp, '--ext=tx', "--log-file=$archive"], + exit => 0, + ); + ok(-f $archive, '--log-file produced an archive at the requested path'); + + yath( + command => 'speedtag', + args => [$archive], + exit => 0, + test => sub { + like($_, qr/Tagged .*pass\.tx/, 'announced pass.tx'); + like($_, qr/Tagged .*pass2\.tx/, 'announced pass2.tx'); + assert_tagged('yath archive', $pass, $pass2); + }, + ); +}; + +subtest 'speedtag from an extracted live $logdir directory' => sub { + my ($tmp, $pass, $pass2) = fresh_fixtures(); + my $archive = File::Spec->catfile($tmp, 'run.yath'); + my $logdir = File::Spec->catdir($tmp, 'extracted'); + + yath( + command => 'test', + args => [$tmp, '--ext=tx', "--log-file=$archive"], + exit => 0, + ); + yath(command => 'extract', args => [$archive, $logdir], exit => 0); + ok(-d $logdir, 'extract produced a directory'); + + yath( + command => 'speedtag', + args => [$logdir], + exit => 0, + test => sub { + like($_, qr/Tagged .*pass\.tx/, 'announced pass.tx'); + like($_, qr/Tagged .*pass2\.tx/, 'announced pass2.tx'); + assert_tagged('directory backend', $pass, $pass2); + }, + ); +}; + +subtest 'speedtag errors cleanly on a missing log path' => sub { + my $tmp = tempdir(CLEANUP => 1); + my $missing = File::Spec->catfile($tmp, 'no-such.yath'); + ok(!-e $missing, 'precondition: log path does not exist'); + + yath( + command => 'speedtag', + args => [$missing], + exit => T(), + test => sub { + like($_, qr{Log source.*does not exist}, 'reported a missing-source error'); + }, + ); +}; done_testing;