-
Notifications
You must be signed in to change notification settings - Fork 30
Add checksum verification feature #72
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
912b82a
c3d2635
6212344
4551534
bcae8b1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,15 +17,17 @@ | |
| use std::os::unix::fs::{chown, MetadataExt}; | ||
| use std::{cmp, thread}; | ||
| use std::fs::{self, canonicalize, create_dir_all, read_link, File, Metadata}; | ||
| use std::io::Read; | ||
| use std::path::{Path, PathBuf}; | ||
| use std::sync::Arc; | ||
| use std::sync::{Arc, Mutex}; | ||
|
|
||
| use crossbeam_channel as cbc; | ||
| use libfs::{ | ||
| allocate_file, copy_file_bytes, copy_owner, copy_permissions, copy_timestamps, next_sparse_segments, probably_sparse, reflink, sync, FileType | ||
| }; | ||
| use log::{debug, error, info, warn}; | ||
| use walkdir::WalkDir; | ||
| use xxhash_rust::xxh3::Xxh3; | ||
|
|
||
| use crate::backup::{get_backup_path, needs_backup}; | ||
| use crate::config::{Config, Reflink}; | ||
|
|
@@ -39,6 +41,8 @@ pub struct CopyHandle { | |
| pub outfd: File, | ||
| pub metadata: Metadata, | ||
| pub config: Arc<Config>, | ||
| pub to: PathBuf, | ||
| src_checksum: Mutex<Option<u64>>, | ||
| } | ||
|
|
||
| impl CopyHandle { | ||
|
|
@@ -60,6 +64,8 @@ impl CopyHandle { | |
| outfd, | ||
| metadata, | ||
| config: config.clone(), | ||
| to: to.to_path_buf(), | ||
| src_checksum: Mutex::new(None), | ||
| }; | ||
|
|
||
| Ok(handle) | ||
|
|
@@ -68,13 +74,27 @@ impl CopyHandle { | |
| /// Copy len bytes from wherever the descriptor cursors are set. | ||
| fn copy_bytes(&self, len: u64, updates: &Arc<dyn StatusUpdater>) -> Result<u64> { | ||
| let mut written = 0; | ||
| let mut hasher = if self.config.verify_checksum { | ||
| Some(Xxh3::new()) | ||
| } else { | ||
| None | ||
| }; | ||
|
|
||
| while written < len { | ||
| let bytes_to_copy = cmp::min(len - written, self.config.block_size); | ||
| let bytes = copy_file_bytes(&self.infd, &self.outfd, bytes_to_copy)? as u64; | ||
| let bytes = if let Some(ref mut h) = hasher { | ||
| copy_file_bytes_with_hash(&self.infd, &self.outfd, bytes_to_copy, h)? | ||
| } else { | ||
| copy_file_bytes(&self.infd, &self.outfd, bytes_to_copy)? as u64 | ||
| }; | ||
| written += bytes; | ||
| updates.send(StatusUpdate::Copied(bytes))?; | ||
| } | ||
|
|
||
| if let Some(h) = hasher { | ||
| *self.src_checksum.lock().unwrap() = Some(h.digest()); | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This shouldn't be unwrapped; it can be mapped to an error type. |
||
| } | ||
|
|
||
| Ok(written) | ||
| } | ||
|
|
||
|
|
@@ -119,7 +139,9 @@ impl CopyHandle { | |
| if self.try_reflink()? { | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This setting will be silently ignored if on a reflink-capable filesystem (btrfs, XFS). Is this deliberate? |
||
| return Ok(self.metadata.len()); | ||
| } | ||
| let total = if probably_sparse(&self.infd)? { | ||
| // Disable sparse file optimization when checksum verification is enabled | ||
| // to ensure consistent hashing of all file content including holes | ||
| let total = if !self.config.verify_checksum && probably_sparse(&self.infd)? { | ||
| self.copy_sparse(updates)? | ||
| } else { | ||
| self.copy_bytes(self.metadata.len(), updates)? | ||
|
|
@@ -138,10 +160,27 @@ impl CopyHandle { | |
| if self.config.ownership && copy_owner(&self.infd, &self.outfd).is_err() { | ||
| warn!("Failed to copy file ownership: {:?}", self.infd); | ||
| } | ||
|
|
||
| if self.config.fsync { | ||
| debug!("Syncing file {:?}", self.outfd); | ||
| sync(&self.outfd)?; | ||
| } | ||
|
|
||
| if self.config.verify_checksum { | ||
| if let Some(expected) = *self.src_checksum.lock().unwrap() { | ||
| debug!("Verifying checksum for {:?}", self.to); | ||
| let actual = compute_file_checksum(&self.to)?; | ||
| if expected != actual { | ||
| return Err(XcpError::ChecksumMismatch { | ||
| path: self.to.clone(), | ||
| expected, | ||
| actual, | ||
| }.into()); | ||
| } | ||
| debug!("Checksum verified: {:016x}", expected); | ||
| } | ||
| } | ||
|
|
||
| Ok(()) | ||
| } | ||
| } | ||
|
|
@@ -265,3 +304,48 @@ pub fn tree_walker( | |
| fn empty_path(path: &Path) -> bool { | ||
| *path == PathBuf::new() | ||
| } | ||
|
|
||
| fn copy_file_bytes_with_hash(infd: &File, outfd: &File, bytes: u64, hasher: &mut Xxh3) -> Result<u64> { | ||
| use std::io::BufReader; | ||
|
|
||
| const BUFFER_SIZE: usize = 64 * 1024; | ||
| let mut reader = BufReader::with_capacity(BUFFER_SIZE, infd); | ||
| let mut writer = std::io::BufWriter::with_capacity(BUFFER_SIZE, outfd); | ||
| let mut buffer = vec![0u8; BUFFER_SIZE]; | ||
| let mut total_copied = 0u64; | ||
|
|
||
| while total_copied < bytes { | ||
| let to_read = cmp::min(bytes - total_copied, BUFFER_SIZE as u64) as usize; | ||
| let n = reader.read(&mut buffer[..to_read])?; | ||
| if n == 0 { | ||
| break; | ||
| } | ||
|
|
||
| hasher.update(&buffer[..n]); | ||
| std::io::Write::write_all(&mut writer, &buffer[..n])?; | ||
| total_copied += n as u64; | ||
| } | ||
|
|
||
| std::io::Write::flush(&mut writer)?; | ||
| Ok(total_copied) | ||
| } | ||
|
|
||
| fn compute_file_checksum(path: &Path) -> Result<u64> { | ||
| use std::io::BufReader; | ||
|
|
||
| const BUFFER_SIZE: usize = 64 * 1024; | ||
| let file = File::open(path)?; | ||
| let mut reader = BufReader::with_capacity(BUFFER_SIZE, file); | ||
| let mut hasher = Xxh3::new(); | ||
| let mut buffer = vec![0u8; BUFFER_SIZE]; | ||
|
|
||
| loop { | ||
| let n = reader.read(&mut buffer)?; | ||
| if n == 0 { | ||
| break; | ||
| } | ||
| hasher.update(&buffer[..n]); | ||
| } | ||
|
|
||
| Ok(hasher.digest()) | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will also be very slow on NFS mounts;
copy_file_range()allows the copy to happen server-side; checksumming will require that data to be copied back to the client for verification.