diff --git a/lib/MirrorCache/Schema/Result/FolderDiff.pm b/lib/MirrorCache/Schema/Result/FolderDiff.pm index 886886a0..7d18a7f0 100644 --- a/lib/MirrorCache/Schema/Result/FolderDiff.pm +++ b/lib/MirrorCache/Schema/Result/FolderDiff.pm @@ -61,6 +61,8 @@ __PACKAGE__->add_columns( data_type => 'timestamp', is_nullable => 0 }, + "mtime_latest", + { data_type => 'bigint' }, "realfolder_id", { data_type => "bigint", is_foreign_key => 1, is_nullable => 1 }, ); diff --git a/lib/MirrorCache/Schema/ResultSet/Server.pm b/lib/MirrorCache/Schema/ResultSet/Server.pm index a91d4788..82c0bb08 100644 --- a/lib/MirrorCache/Schema/ResultSet/Server.pm +++ b/lib/MirrorCache/Schema/ResultSet/Server.pm @@ -105,18 +105,17 @@ sub mirrors_query { $condition_server_project = "and sp.server_id IS NULL"; } - my $join_file_cond = "fl.folder_id = fd.folder_id"; - my $folder_cond = "fd.folder_id in (coalesce((select id from folder where path = concat(?::text,'/repodata')),?),?) and (fdf.file_id is NULL and fl.folder_id in (coalesce((select id from folder where path = concat(?::text,'/repodata')),?),?))"; - my $where_recent = "where s.mtime > 0"; - # license.tar* and info.xml* might be kept with the same name through updates, so timestamp on them is unreliable in mirrorlist for folders - my $file_dt = ", max(case when fdf.file_id is null and fl.name ~ '[0-9]' and fl.name not like '%license.tar.%' and fl.name not like '%info.xml.%' and fl.name not like '%.asc' and fl.name not like '%.txt' and fl.name not like '%/' and fl.name not like 'yast2%' and fl.name not like '%.pf2' and fl.name not like '%patterns.xml.zst' and fl.name not like '%data%xml%' and fl.name not like '%appdata-icons%' then fl.mtime else null end) as mtime"; - my $group_by = "group by s.id, s.hostname, s.hostname_vpn, s.urldir, s.region, s.country, s.lat, s.lng, s.score, fd.folder_id"; + my $join_file = ""; + my $join_folder_diff_file = ""; + my $folder_cond = "fd.folder_id in (coalesce((select id from folder where path = concat(?::text,'/repodata')),?),?)"; + my $file_dt = ", fd.mtime_latest as mtime"; + my $fl_relevant = ""; if ($file_id) { - $join_file_cond = "fl.id = ?"; + $join_file = "join file fl on fl.id = ?"; + $join_folder_diff_file = "left join folder_diff_file fdf on fdf.file_id = fl.id and fdf.folder_diff_id = fd.id"; $file_dt = ", fl.mtime as mtime"; - $group_by = ""; - $where_recent = ""; + $fl_relevant = "and date_trunc('second', fl.dt) <= coalesce(fds.dt, fl.dt)"; $folder_cond = "fd.folder_id in (?,?) and coalesce(fd.realfolder_id, ?) = ? and (fdf.file_id is NULL and fl.folder_id = ?)"; } @@ -154,11 +153,11 @@ CASE WHEN COALESCE(stability_ipv.rating, 0) > 0 THEN stability_ipv.rating from ( select s.id, s.hostname, s.hostname_vpn, s.urldir, s.region, s.country, s.lat, s.lng, s.score $file_dt, fd.folder_id from folder_diff fd - join file fl on $join_file_cond - join folder_diff_server fds on fd.id = fds.folder_diff_id and date_trunc('second', fl.dt) <= coalesce(fds.dt, fl.dt) + $join_file + join folder_diff_server fds on fd.id = fds.folder_diff_id $fl_relevant join server s on fds.server_id = s.id and s.enabled $country_condition $condition_our_regions left join server_capability_declaration scd on s.id = scd.server_id and scd.capability = 'country' - left join folder_diff_file fdf on fdf.file_id = fl.id and fdf.folder_diff_id = fd.id + $join_folder_diff_file $join_server_project where $folder_cond $condition_server_project and ( -- here mirrors may be declared to handle only specific countries @@ -170,7 +169,6 @@ from ( ( not scd.enabled and not ? ~ scd.extra) ) ) - $group_by ) s join folder f on f.id = s.folder_id left join server_capability_declaration scd on s.id = scd.server_id and scd.capability = '$capability' and NOT scd.enabled @@ -181,7 +179,6 @@ left join server_stability stability_scheme on s.id = stability_scheme.server_i left join server_stability stability_schemex on s.id = stability_schemex.server_id and stability_schemex.capability = '$capabilityx' left join server_stability stability_ipv on s.id = stability_ipv.server_id and stability_ipv.capability = '$ipv' left join server_stability stability_ipvx on s.id = stability_ipvx.server_id and stability_ipvx.capability = '$ipvx' -$where_recent ) x WHERE not_disabled $extra order by rating_country desc, (dist/100)::int, support_scheme desc, rating_scheme desc, support_ipv desc, rating_ipv desc, score, realfolderscore, random() @@ -203,7 +200,7 @@ END_SQL if ($file_id) { $prep->execute($file_id, @country_params, $realfolder_id, $folder_id, $realfolder_id, $realfolder_id, $realfolder_id, $country, $country, $country); } else { - $prep->execute(@country_params, $path, $realfolder_id, $folder_id, $path, $realfolder_id, $folder_id, $country, $country, $country); + $prep->execute(@country_params, $path, $realfolder_id, $folder_id, $country, $country, $country); } my $server_arrayref = $dbh->selectall_arrayref($prep, { Slice => {} }); return $server_arrayref; diff --git a/lib/MirrorCache/Task/MirrorScan.pm b/lib/MirrorCache/Task/MirrorScan.pm index 6e294655..c2bf80ae 100644 --- a/lib/MirrorCache/Task/MirrorScan.pm +++ b/lib/MirrorCache/Task/MirrorScan.pm @@ -47,17 +47,17 @@ sub _scan { unless my $guard = $minion->guard('mirror_scan' . $path, 20*60); $job->note($path => 1); - my ($folder_id, $realfolder_id, $anotherpath, $latestdt, $max_dt, $dbfiles, $dbfileids, $dbfileprefixes); + my ($folder_id, $realfolder_id, $anotherpath, $latestdt, $max_dt, $dbfiles, $dbfileids, $dbfileprefixes, $dbfilemtimes); { return $job->finish('folder sync job is still active') unless my $guard_r = $minion->guard('folder_sync' . $path, 360); - ($folder_id, $realfolder_id, $anotherpath, $latestdt, $max_dt, $dbfiles, $dbfileids, $dbfileprefixes) + ($folder_id, $realfolder_id, $anotherpath, $latestdt, $max_dt, $dbfiles, $dbfileids, $dbfileprefixes, $dbfilemtimes) = _dbfiles($app, $job, $path); } return undef unless $dbfiles; - my $count = _doscan($app, $job, $path, $realfolder_id, $folder_id, $latestdt, $max_dt, $dbfiles, $dbfileids, $dbfileprefixes); + my $count = _doscan($app, $job, $path, $realfolder_id, $folder_id, $latestdt, $max_dt, $dbfiles, $dbfileids, $dbfileprefixes, $dbfilemtimes); $job->note($count => 1); return $job->finish; } @@ -88,6 +88,7 @@ sub _dbfiles { my @dbfiles = (); my %dbfileids = (); my %dbfileprefixes = (); + my %dbfilemtimes = (); my $max_dt = 0; for my $file ($schema->resultset('File')->search({folder_id => $folder_id})) { my $basename = $file->name; @@ -96,14 +97,15 @@ sub _dbfiles { $dbfileprefixes{_reliable_prefix($basename)} = 1; push @dbfiles, $basename; $dbfileids{$basename} = $file->id; + $dbfilemtimes{$basename} = $file->mtime; $max_dt = $file->dt if !$max_dt || ( 0 > DateTime->compare($max_dt, $file->dt) ); } @dbfiles = sort @dbfiles; - return $folder->id, $folder_id, $realpath, $latestdt, $max_dt, \@dbfiles, \%dbfileids, \%dbfileprefixes; + return $folder->id, $folder_id, $realpath, $latestdt, $max_dt, \@dbfiles, \%dbfileids, \%dbfileprefixes, \%dbfilemtimes; } sub _doscan { - my ($app, $job, $path, $realfolder_id, $folder_id, $latestdt, $max_dt, $dbfiles, $dbfileids, $dbfileprefixes) = @_; + my ($app, $job, $path, $realfolder_id, $folder_id, $latestdt, $max_dt, $dbfiles, $dbfileids, $dbfileprefixes, $dbfilemtimes) = @_; my @dbfiles = @$dbfiles; my %dbfileids = %$dbfileids; my %dbfileprefixes = %$dbfileprefixes; @@ -216,8 +218,20 @@ unless ($hasall) { } my $ctx = Digest::MD5->new; my @missing_files = (); + my $mtime_latest = 0; foreach my $file (@dbfiles) { - next if $mirrorfiles{$file} || substr($file,length($file)-1) eq '/' || $hasall; + next if substr($file,length($file)-1) eq '/'; + if ($mirrorfiles{$file} || $hasall) { + # track mtime only for versioned files + next unless $file =~ m/[0-9]/; + # exluding some versioned files that change timestamp + next if $file =~ m/(license\.tar\.|info\.xml|\.asc$|\.txt|^yast2|\.pf2$|.patterns.xml.zst$|data.*xml|appdata-icons|)/; + if (my $mtime = $dbfilemtimes->{$file}) { + $mtime_latest = $mtime if !$mtime_latest || $mtime_latest < $mtime; + } + next; + } + ; $ctx->add($file); push @missing_files, $dbfileids{$file}; } @@ -228,6 +242,7 @@ unless ($hasall) { $folder_diff = $schema->resultset('FolderDiff')->find_or_new({folder_id => $folder_id, hash => $digest}); unless($folder_diff->in_storage) { $folder_diff->dt($latestdt); + $folder_diff->mtime_latest($mtime_latest); $folder_diff->realfolder_id($realfolder_id) if $realfolder_id && $realfolder_id != $folder_id; $folder_diff->insert; diff --git a/lib/MirrorCache/resources/migrations/Pg.sql b/lib/MirrorCache/resources/migrations/Pg.sql index 90c5181f..45cce5e9 100644 --- a/lib/MirrorCache/resources/migrations/Pg.sql +++ b/lib/MirrorCache/resources/migrations/Pg.sql @@ -465,3 +465,5 @@ create index if not exists agg_download_pkg_period_metapkg_id_dt_idx on agg_down alter table project add column if not exists shard varchar(32); -- 46 up -- noop +-- 47 up +alter table folder_diff add column if not exists mtime_latest bigint; diff --git a/lib/MirrorCache/resources/migrations/mysql.sql b/lib/MirrorCache/resources/migrations/mysql.sql index adab65b3..8fe4e812 100644 --- a/lib/MirrorCache/resources/migrations/mysql.sql +++ b/lib/MirrorCache/resources/migrations/mysql.sql @@ -480,3 +480,5 @@ alter table project add column if not exists shard varchar(32); -- delete duplicate rows and add PR to server_project delete from server_project where (server_id, project_id, dt) in (select x.server_id, x.project_id, x.dt from server_project x join server_project y on (y.server_id, y.project_id) = (x.server_id, x.project_id) and x.dt < y.dt); alter table server_project add primary key if not exists (server_id, project_id); +-- 47 up +alter table folder_diff add column if not exists mtime_latest bigint;