diff --git a/mysql-test/suite/innodb/r/page_tracking_corruption.result b/mysql-test/suite/innodb/r/page_tracking_corruption.result new file mode 100644 index 000000000000..0d9d69e30c10 --- /dev/null +++ b/mysql-test/suite/innodb/r/page_tracking_corruption.result @@ -0,0 +1,40 @@ +# +# Enable page tracking +# +INSTALL COMPONENT 'file://component_mysqlbackup'; +SELECT mysqlbackup_page_track_set(1); +mysqlbackup_page_track_set(1) +# +# +# Generate data to create page tracking archive files +# +CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b TEXT) ENGINE=InnoDB; +UPDATE t1 SET b = REPEAT('y', 1000); +# +# Verify page tracking works before corruption +# +SELECT mysqlbackup_page_track_get_changed_page_count(mysqlbackup_page_track_get_start_lsn(), mysqlbackup_page_track_get_start_lsn() + 1); +mysqlbackup_page_track_get_changed_page_count(mysqlbackup_page_track_get_start_lsn(), mysqlbackup_page_track_get_start_lsn() + 1) +# +# +# Shutdown the server +# +# +# Corrupt page tracking archive files +# +Page tracking archive files corrupted successfully. +# +# Restart the server — should succeed despite corrupted archive files +# +# restart +# +# Verify page tracking query does not crash after corruption +# +SELECT mysqlbackup_page_track_get_changed_page_count(mysqlbackup_page_track_get_start_lsn(), mysqlbackup_page_track_get_start_lsn() + 1); +mysqlbackup_page_track_get_changed_page_count(mysqlbackup_page_track_get_start_lsn(), mysqlbackup_page_track_get_start_lsn() + 1) +# +# +# Cleanup +# +DROP TABLE t1; +UNINSTALL COMPONENT 'file://component_mysqlbackup'; diff --git a/mysql-test/suite/innodb/t/page_tracking_corruption.test b/mysql-test/suite/innodb/t/page_tracking_corruption.test new file mode 100644 index 000000000000..9e4116489074 --- /dev/null +++ b/mysql-test/suite/innodb/t/page_tracking_corruption.test @@ -0,0 +1,118 @@ +# Test that the server starts despite corrupted page tracking archive files. +# +# When ib_page_* files in #ib_archive are corrupted (e.g. truncated), +# the server should detect the problem, log a warning, and continue +# starting up rather than crashing. +# +--source include/not_valgrind.inc + +--disable_query_log +call mtr.add_suppression("Page Archiver's doublewrite buffer initialisation failed"); +call mtr.add_suppression("Page archiver system's recovery failed"); +call mtr.add_suppression("Operating system error number .* in a file operation"); +call mtr.add_suppression("Error number .* means"); +call mtr.add_suppression("'rmdir' returned OS error"); +call mtr.add_suppression("bytes should have been read. Only .* bytes read"); +call mtr.add_suppression("Retry attempts for reading partial data failed"); +--enable_query_log + +let MYSQLD_DATADIR = `SELECT @@datadir`; + +--echo # +--echo # Enable page tracking +--echo # +INSTALL COMPONENT 'file://component_mysqlbackup'; +--replace_column 1 # +SELECT mysqlbackup_page_track_set(1); + +--echo # +--echo # Generate data to create page tracking archive files +--echo # +CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b TEXT) ENGINE=InnoDB; + +--disable_query_log +--let $i = 100 +while ($i) +{ + INSERT INTO t1 (b) VALUES (REPEAT('x', 1000)); + --dec $i +} +--enable_query_log + +UPDATE t1 SET b = REPEAT('y', 1000); + +--echo # +--echo # Verify page tracking works before corruption +--echo # +--replace_column 1 # +SELECT mysqlbackup_page_track_get_changed_page_count(mysqlbackup_page_track_get_start_lsn(), mysqlbackup_page_track_get_start_lsn() + 1); + +--echo # +--echo # Shutdown the server +--echo # +--source include/shutdown_mysqld.inc + +--echo # +--echo # Corrupt page tracking archive files +--echo # + +perl; +use File::Find; +use strict; +use warnings; + +my $datadir = $ENV{'MYSQLD_DATADIR'}; +chomp $datadir if defined $datadir; +die "MYSQLD_DATADIR not set\n" unless defined $datadir && length $datadir; + +$datadir .= '/' unless $datadir =~ m{/$}; +my $archive_dir = "${datadir}#ib_archive"; + +die "Archive directory $archive_dir not found.\n" unless -d $archive_dir; + +my $corrupted = 0; + +find(sub { + # Truncate the doublewrite buffer so it cannot restore corrupted blocks. + if (/^dblwr_\d+$/ && -f $File::Find::name) { + truncate($File::Find::name, 0) or die "Cannot truncate $File::Find::name: $!\n"; + } + + return unless /^ib_page_\d+$/; + my $path = $File::Find::name; + my $size = -s $path; + return unless defined $size && $size > 16384; + + # Truncate just past the first block (the reset block). This preserves + # the valid reset entries that reference data blocks further in the file, + # but removes those data blocks. During recovery, fetch_reset_lsn() will + # try to read a referenced data block past the end of the file and hit + # the assertion (debug) or fatal read error (release). + # The size must be > ARCH_PAGE_BLK_SIZE (16384) so that + # cleanup_if_required() does not delete the file. + truncate($path, 16385) or die "Cannot truncate $path: $!\n"; + + $corrupted++; +}, $archive_dir); + +die "No ib_page_* files found under $archive_dir.\n" if $corrupted == 0; + +print "Page tracking archive files corrupted successfully.\n"; +EOF + +--echo # +--echo # Restart the server — should succeed despite corrupted archive files +--echo # +--source include/start_mysqld.inc + +--echo # +--echo # Verify page tracking query does not crash after corruption +--echo # +--replace_column 1 # +SELECT mysqlbackup_page_track_get_changed_page_count(mysqlbackup_page_track_get_start_lsn(), mysqlbackup_page_track_get_start_lsn() + 1); + +--echo # +--echo # Cleanup +--echo # +DROP TABLE t1; +UNINSTALL COMPONENT 'file://component_mysqlbackup'; diff --git a/storage/innobase/arch/arch0arch.cc b/storage/innobase/arch/arch0arch.cc index d0633e34f0ca..7f2c8aaf0740 100644 --- a/storage/innobase/arch/arch0arch.cc +++ b/storage/innobase/arch/arch0arch.cc @@ -454,17 +454,18 @@ dberr_t Arch_File_Ctx::open_next(lsn_t start_lsn, uint64_t file_offset, } dberr_t Arch_File_Ctx::read(byte *to_buffer, uint64_t offset, uint size) { - ut_ad(offset + size <= m_size); ut_ad(!is_closed()); + if (offset + size > m_size) { + return DB_IO_ERROR; + } + IORequest request(IORequest::READ); request.disable_compression(); request.clear_encrypted(); - auto err = - os_file_read(request, m_path_name, m_file, to_buffer, offset, size); - - return (err); + return os_file_read_no_error_handling(request, m_path_name, m_file, + to_buffer, offset, size, nullptr); } dberr_t Arch_File_Ctx::resize_and_overwrite_with_zeros(uint64_t file_size) { diff --git a/storage/innobase/arch/arch0page.cc b/storage/innobase/arch/arch0page.cc index 42a9feb50400..c2ed791bfaac 100644 --- a/storage/innobase/arch/arch0page.cc +++ b/storage/innobase/arch/arch0page.cc @@ -599,8 +599,8 @@ bool Arch_File_Ctx::validate_stop_point_in_file(Arch_Group *group, byte buf[ARCH_PAGE_BLK_SIZE]; /* Read the entire reset block. */ - dberr_t err = - os_file_read(request, m_path_name, file, buf, offset, ARCH_PAGE_BLK_SIZE); + dberr_t err = os_file_read_no_error_handling( + request, m_path_name, file, buf, offset, ARCH_PAGE_BLK_SIZE, nullptr); if (err != DB_SUCCESS) { return (false); @@ -628,8 +628,8 @@ bool Arch_File_Ctx::validate_reset_block_in_file(pfs_os_file_t file, byte buf[ARCH_PAGE_BLK_SIZE]; /* Read the entire reset block. */ - dberr_t err = - os_file_read(request, m_path_name, file, buf, 0, ARCH_PAGE_BLK_SIZE); + dberr_t err = os_file_read_no_error_handling(request, m_path_name, file, buf, + 0, ARCH_PAGE_BLK_SIZE, nullptr); if (err != DB_SUCCESS) { return (false); @@ -1718,7 +1718,7 @@ void Arch_Page_Sys::track_page(buf_page_t *bpage, lsn_t track_lsn, m_state = ARCH_STATE_ABORT; arch_oper_mutex_exit(); ut_d(ut_error); - ut_o(return); + ut_o(return ); } cur_blk = m_data.get_block(&m_write_pos, ARCH_DATA_BLOCK); @@ -1898,14 +1898,19 @@ int Arch_Page_Sys::get_pages(MYSQL_THD thd, Page_Track_Callback cbk_func, auto data_len = Arch_Block::get_data_len(header_buf); bytes_left = data_len + ARCH_PAGE_BLK_HEADER_LENGTH; - ut_ad(bytes_left <= ARCH_PAGE_BLK_SIZE); - ut_ad(block_stop_lsn != LSN_MAX); + if (bytes_left > ARCH_PAGE_BLK_SIZE || block_stop_lsn == LSN_MAX) { + err = ER_PAGE_TRACKING_RANGE_NOT_TRACKED; + break; + } bytes_left -= cur_pos.m_offset; if (data_len == 0 || cur_pos.m_block_num == last_pos.m_block_num || block_stop_lsn > stop_id) { - ut_ad(block_stop_lsn >= stop_id); + if (block_stop_lsn < stop_id) { + err = ER_PAGE_TRACKING_RANGE_NOT_TRACKED; + break; + } stop_id = block_stop_lsn; last_block = true; } @@ -2938,8 +2943,8 @@ int Arch_Group::read_from_file(Arch_Page_Pos *read_pos, uint read_len, request.disable_compression(); request.clear_encrypted(); - auto db_err = - os_file_read(request, file_name, file, read_buff, offset, read_len); + auto db_err = os_file_read_no_error_handling( + request, file_name, file, read_buff, offset, read_len, nullptr); os_file_close(file); @@ -3056,7 +3061,9 @@ int Arch_Page_Sys::fetch_group_within_lsn_range(lsn_t &start_id, lsn_t &stop_id, auto latest_stop_lsn = m_latest_stop_lsn; arch_oper_mutex_exit(); - ut_ad(latest_stop_lsn != LSN_MAX); + if (latest_stop_lsn == LSN_MAX) { + return (ER_PAGE_TRACKING_RANGE_NOT_TRACKED); + } if (start_id == 0 || stop_id == 0) { if (m_current_group == nullptr || !m_current_group->is_active()) { @@ -3065,7 +3072,9 @@ int Arch_Page_Sys::fetch_group_within_lsn_range(lsn_t &start_id, lsn_t &stop_id, *group = m_current_group; - ut_ad(m_last_lsn != LSN_MAX); + if (m_last_lsn == LSN_MAX) { + return (ER_PAGE_TRACKING_RANGE_NOT_TRACKED); + } start_id = (start_id == 0) ? m_last_lsn : start_id; stop_id = (stop_id == 0) ? latest_stop_lsn : stop_id; diff --git a/storage/innobase/arch/arch0recv.cc b/storage/innobase/arch/arch0recv.cc index fa3bd9b4796b..3938eaa3f605 100644 --- a/storage/innobase/arch/arch0recv.cc +++ b/storage/innobase/arch/arch0recv.cc @@ -411,11 +411,13 @@ dberr_t Arch_Page_Sys::Recovery::recover() { err = group->recover(group_info, &m_dblwr_ctx); if (err != DB_SUCCESS) { + group->disable(LSN_MAX); ut::delete_(group); break; } if (group_info.m_num_files == 0) { + group->disable(LSN_MAX); ut::delete_(group); continue; } @@ -682,8 +684,6 @@ dberr_t Arch_File_Ctx::Recovery::parse_reset_points( if (file_index != block_num) { /* This means there was no reset for this file and hence the reset block was not flushed. */ - - ut_ad(ut::is_zeros(buf, ARCH_PAGE_BLK_SIZE)); info.m_reset_pos.init(); info.m_reset_pos.m_block_num = file_index; return err; @@ -702,20 +702,24 @@ dberr_t Arch_File_Ctx::Recovery::parse_reset_points( reset_file.m_file_index = file_index; if (data_len != 0) { + if (data_len < ARCH_PAGE_FILE_HEADER_RESET_LSN_SIZE + + ARCH_PAGE_FILE_HEADER_RESET_POS_SIZE) { + return DB_CORRUPTION; + } + uint length = 0; byte *buf1 = buf + ARCH_PAGE_BLK_HEADER_LENGTH; - ut_ad(data_len >= ARCH_PAGE_FILE_HEADER_RESET_LSN_SIZE + - ARCH_PAGE_FILE_HEADER_RESET_POS_SIZE); - reset_file.m_lsn = mach_read_from_8(buf1); length += ARCH_PAGE_FILE_HEADER_RESET_LSN_SIZE; Arch_Point start_point; Arch_Page_Pos pos; - while (length != data_len) { - ut_ad((data_len - length) % ARCH_PAGE_FILE_HEADER_RESET_POS_SIZE == 0); + while (length < data_len) { + if ((data_len - length) < ARCH_PAGE_FILE_HEADER_RESET_POS_SIZE) { + return DB_CORRUPTION; + } pos.m_block_num = mach_read_from_2(buf1 + length); length += ARCH_PAGE_FILE_HEADER_RESET_BLOCK_NUM_SIZE; @@ -724,6 +728,11 @@ dberr_t Arch_File_Ctx::Recovery::parse_reset_points( length += ARCH_PAGE_FILE_HEADER_RESET_BLOCK_OFFSET_SIZE; start_point.lsn = m_file_ctx.fetch_reset_lsn(pos.m_block_num); + + if (start_point.lsn == LSN_MAX) { + return DB_CORRUPTION; + } + start_point.pos = pos; reset_file.m_start_point.push_back(start_point); @@ -739,23 +748,20 @@ dberr_t Arch_File_Ctx::Recovery::parse_reset_points( lsn_t Arch_File_Ctx::fetch_reset_lsn(uint64_t block_num) { ut_ad(!is_closed()); - ut_ad(Arch_Block::get_file_index(block_num, ARCH_DATA_BLOCK) == m_index); + + if (Arch_Block::get_file_index(block_num, ARCH_DATA_BLOCK) != m_index) { + return (LSN_MAX); + } byte buf[ARCH_PAGE_BLK_SIZE]; auto offset = Arch_Block::get_file_offset(block_num, ARCH_DATA_BLOCK); - ut_ad(offset + ARCH_PAGE_BLK_SIZE <= get_phy_size()); - auto err = read(buf, offset, ARCH_PAGE_BLK_HEADER_LENGTH); if (err != DB_SUCCESS) { return (LSN_MAX); } - auto lsn = Arch_Block::get_reset_lsn(buf); - - ut_ad(lsn != LSN_MAX); - - return (lsn); + return Arch_Block::get_reset_lsn(buf); }