From 19ad8bc2ed4849d34c7cf1272fddeb93d22d0c6b Mon Sep 17 00:00:00 2001 From: Julian van der Horst Date: Wed, 26 Mar 2025 12:06:10 +0100 Subject: [PATCH 1/4] Added encoding and decoding with padding --- .gitignore | 2 +- src/lib/high_level/data_types.rs | 110 +++++++++++ tests/padding.rs | 325 +++++++++++++++++++++++++++++++ 3 files changed, 436 insertions(+), 1 deletion(-) create mode 100644 tests/padding.rs diff --git a/.gitignore b/.gitignore index a9cc0cd..52e9834 100644 --- a/.gitignore +++ b/.gitignore @@ -154,4 +154,4 @@ dist .idea/ # OS specific -.DS_Store/ \ No newline at end of file +.DS_Store diff --git a/src/lib/high_level/data_types.rs b/src/lib/high_level/data_types.rs index 26790ea..29c6ad8 100644 --- a/src/lib/high_level/data_types.rs +++ b/src/lib/high_level/data_types.rs @@ -6,6 +6,7 @@ use crate::low_level::elgamal::{ElGamal, ELGAMAL_LENGTH}; use derive_more::{Deref, From}; use rand_core::{CryptoRng, RngCore}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::io::{Error, ErrorKind}; /// A pseudonym (in the background, this is a [`GroupElement`]) that can be used to identify a user /// within a specific context, which can be encrypted, rekeyed and reshuffled. @@ -196,6 +197,115 @@ pub trait Encryptable { fn as_bytes(&self) -> Option<[u8; 16]> { self.value().encode_lizard() } + + /// Encodes an arbitrary byte array into one or more Encryptables + /// Uses PKCS#7 style padding where the padding byte value equals the number of padding bytes + fn from_bytes_padded(data: &[u8]) -> Vec + where + Self: Sized, + { + if data.is_empty() { + return vec![]; + } + + let mut result = Vec::new(); + + // Process all full blocks, that do not need padding + // Initialize the last block with the padding value + // Copy remaining data if there is any + for i in 0..(data.len() / 16) { + let start = i * 16; + // This is safe, as we know that the slice is 16 bytes long + result.push(Self::from_bytes( + &data[start..start + 16].try_into().unwrap(), + )); + } + + let remaining = data.len() % 16; + let padding_byte = (16 - remaining) as u8; + + let mut last_block = [padding_byte; 16]; + + if remaining > 0 { + last_block[..remaining].copy_from_slice(&data[data.len() - remaining..]); + } + + result.push(Self::from_bytes(&last_block)); + + result + } + + /// Encodes an arbitrary string into one or more Encrtypb + /// Uses PKCS#7 style padding where the padding byte value equals the number of padding bytes + fn from_string_padded(text: &str) -> Vec + where + Self: Sized, + { + // Convert string to bytes and pass to the byte encoding function + Self::from_bytes_padded(text.as_bytes()) + } + + /// Decodes encryptables back to the original string + /// Returns an error if the decoded bytes are not valid UTF-8 + fn to_string_padded(encryptables: &[Self]) -> Result + where + Self: Sized, + { + let bytes = Self::to_bytes_padded(encryptables)?; + String::from_utf8(bytes).map_err(|e| Error::new(ErrorKind::InvalidData, e.to_string())) + } + + /// Decodes encryptables back to the original byte array + fn to_bytes_padded(encryptables: &[Self]) -> Result, Error> + where + Self: Sized, + { + if encryptables.is_empty() { + return Err(Error::new( + ErrorKind::InvalidInput, + "No encryptables provided", + )); + } + + let mut result = Vec::with_capacity(encryptables.len() * 16); + + // Copy over all blocks except the last one + // Validate padding and copy the data part of the last block + // Copy over all blocks except the last one + for data_point in &encryptables[..encryptables.len() - 1] { + let block = data_point.as_bytes().ok_or(Error::new( + ErrorKind::InvalidData, + "Encryptable conversion to bytes failed", + ))?; + result.extend_from_slice(&block); + } + + // This is safe, we know that there is at least one element in the slice + let last_block = encryptables.last().unwrap().as_bytes().ok_or(Error::new( + ErrorKind::InvalidData, + "Last encryptables conversion to bytes failed", + ))?; + + let padding_byte = last_block[15]; + + if padding_byte == 0 || padding_byte > 16 { + return Err(Error::new(ErrorKind::InvalidData, "Invalid padding")); + } + + if last_block[16 - padding_byte as usize..] + .iter() + .any(|&b| b != padding_byte) + { + return Err(Error::new(ErrorKind::InvalidData, "Inconsistent padding")); + } + + // Add the data part of the last block + let data_bytes = 16 - padding_byte as usize; + result.extend_from_slice(&last_block[..data_bytes]); + + Ok(result) + } + /// Create multiple messages from a byte array. /// TODO: remove this method, as it cannot handle data that is not a multiple of 16 bytes and padding should generally not belong in this library. #[deprecated] diff --git a/tests/padding.rs b/tests/padding.rs new file mode 100644 index 0000000..342975e --- /dev/null +++ b/tests/padding.rs @@ -0,0 +1,325 @@ +use libpep::high_level::contexts::{ + EncryptionContext, PseudonymizationDomain, PseudonymizationInfo, +}; +use libpep::high_level::data_types::{DataPoint, Encryptable, EncryptedPseudonym, Pseudonym}; +use libpep::high_level::keys::{ + make_global_keys, make_session_keys, EncryptionSecret, PseudonymizationSecret, +}; +use libpep::high_level::ops::{decrypt, encrypt, pseudonymize}; +use std::io::{Error, ErrorKind}; + +#[test] +fn test_from_bytes_padded_empty() { + let data: &[u8] = &[]; + let result = DataPoint::from_bytes_padded(data); + assert!(result.is_empty()); +} + +#[test] +fn test_from_bytes_padded_single_block() { + // Test with less than 16 bytes + let data = b"Hello, world!"; + let result = DataPoint::from_bytes_padded(data); + + assert_eq!(1, result.len()); + + // The padding should be 3 bytes of value 3 + let bytes = result[0].as_bytes().unwrap(); + assert_eq!(b"Hello, world!\x03\x03\x03", &bytes); +} + +#[test] +fn test_from_bytes_padded_exact_block() { + // Test with exactly 16 bytes + let data = b"0123456789ABCDEF"; + let result = DataPoint::from_bytes_padded(data); + + // Should have 2 blocks: the 16 bytes of data and one full block of padding + assert_eq!(2, result.len()); + + // First block should be exactly our input + assert_eq!(b"0123456789ABCDEF", &result[0].as_bytes().unwrap()); + + // Second block should be all padding bytes with value 16 + let expected_padding = [16u8; 16]; + assert_eq!(expected_padding, result[1].as_bytes().unwrap()); +} + +#[test] +fn test_from_bytes_padded_multiple_blocks() { + // Test with more than 16 bytes + let data = b"This is a longer string that spans multiple blocks"; + let result = DataPoint::from_bytes_padded(data); + + // Calculate expected number of blocks (47 bytes -> 3 blocks) + let expected_blocks = (data.len() / 16) + 1; + assert_eq!(expected_blocks, result.len()); + + // Check the content of each full block + for i in 0..(data.len() / 16) { + let start = i * 16; + let expected = &data[start..start + 16]; + assert_eq!(expected, &result[i].as_bytes().unwrap()[..16]); + } + + // Check the last block's padding + let last_block = result.last().unwrap().as_bytes().unwrap(); + let remaining = data.len() % 16; + let padding_byte = (16 - remaining) as u8; + + // Verify data portion + assert_eq!(&data[data.len() - remaining..], &last_block[..remaining]); + + // Verify padding portion + for i in remaining..16 { + assert_eq!(padding_byte, last_block[i]); + } +} + +#[test] +fn test_to_bytes_padded() -> Result<(), Error> { + // Create some test data + let original = b"This is some test data for padding"; + + // Encode it + let data_points = DataPoint::from_bytes_padded(original); + + // Decode it + let decoded = DataPoint::to_bytes_padded(&data_points)?; + + // Verify it matches the original + assert_eq!(original, decoded.as_slice()); + + Ok(()) +} + +#[test] +fn test_to_bytes_padded_empty() { + // Test with empty vec + let data_points = vec![]; + let result = DataPoint::to_bytes_padded(&data_points); + + // Should be an error + assert!(result.is_err()); + match result { + Err(e) => { + assert_eq!(ErrorKind::InvalidInput, e.kind()); + } + _ => panic!("Expected an error"), + } +} + +#[test] +fn test_to_bytes_padded_invalid_padding() { + // Create a DataPoint with invalid padding (padding byte = 0) + let invalid_block = [0u8; 16]; + let data_point = DataPoint::from_bytes(&invalid_block); + + // Attempt to decode + let result = DataPoint::to_bytes_padded(&[data_point]); + + // Should be an error + assert!(result.is_err()); + match result { + Err(e) => { + assert_eq!(ErrorKind::InvalidData, e.kind()); + } + _ => panic!("Expected an error"), + } + + // Try with inconsistent padding (some bytes have different values) + let mut inconsistent_block = [5u8; 16]; // padding of 5 + inconsistent_block[15] = 6; // but one byte is wrong + let data_point = DataPoint::from_bytes(&inconsistent_block); + + // Attempt to decode + let result = DataPoint::to_bytes_padded(&[data_point]); + + // Should be an error + assert!(result.is_err()); +} + +#[test] +fn test_to_string_padded() -> Result<(), Error> { + // Test string + let original = "This is a UTF-8 string with special chars: ñáéíóú 你好"; + + // Encode it + let data_points = DataPoint::from_string_padded(original); + + // Decode it + let decoded = DataPoint::to_string_padded(&data_points)?; + + // Verify it matches the original + assert_eq!(original, decoded); + + Ok(()) +} + +#[test] +fn test_to_string_padded_invalid_utf8() { + // Create data points with non-UTF8 data + let invalid_utf8 = vec![0xFF, 0xFE, 0xFD]; // Invalid UTF-8 sequence + let mut block = [0u8; 16]; + block[..3].copy_from_slice(&invalid_utf8); + block[3..].fill(13); // Padding + + let data_point = DataPoint::from_bytes(&block); + + // Attempt to decode to string + let result = DataPoint::to_string_padded(&[data_point]); + + // Should be an error + assert!(result.is_err()); +} + +#[test] +fn test_roundtrip_all_padding_sizes() -> Result<(), Error> { + // Test all possible padding sizes (1-16) + for padding_size in 1..=16 { + let size = 32 - padding_size; // 32 is arbitrary, just want multiple blocks + let data = vec![b'X'; size]; + + // Encode + let data_points = DataPoint::from_bytes_padded(&data); + + // Decode + let decoded = DataPoint::to_bytes_padded(&data_points)?; + + // Verify + assert_eq!(data, decoded); + } + + Ok(()) +} + +#[test] +fn test_pseudonym_from_bytes_padded() { + // Test with less than 16 bytes + let data = b"Hello, world!"; + let result = Pseudonym::from_bytes_padded(data); + + assert_eq!(1, result.len()); + + // The padding should be 3 bytes of value 3 + let bytes = result[0].as_bytes().unwrap(); + assert_eq!(b"Hello, world!\x03\x03\x03", &bytes); +} + +#[test] +fn test_pseudonym_to_bytes_padded() -> Result<(), Error> { + // Create some test data + let original = b"This is some test data for padding"; + + // Encode it + let pseudonyms = Pseudonym::from_bytes_padded(original); + + // Decode it + let decoded = Pseudonym::to_bytes_padded(&pseudonyms)?; + + // Verify it matches the original + assert_eq!(original, decoded.as_slice()); + + Ok(()) +} + +#[test] +fn test_pseudonym_string_roundtrip() -> Result<(), Error> { + // Test string + let original = "Testing pseudonym string conversion"; + + // Encode it + let pseudonyms = Pseudonym::from_string_padded(original); + + // Decode it + let decoded = Pseudonym::to_string_padded(&pseudonyms)?; + + // Verify it matches the original + assert_eq!(original, decoded); + + Ok(()) +} + +#[test] +fn test_pseudonymize_string_roundtrip() -> Result<(), Error> { + // Initialize test environment + let mut rng = rand::thread_rng(); + let (_global_public, global_secret) = make_global_keys(&mut rng); + let pseudo_secret = PseudonymizationSecret::from("test-secret".as_bytes().to_vec()); + let enc_secret = EncryptionSecret::from("enc-secret".as_bytes().to_vec()); + + // Setup domains and contexts + let domain_a = PseudonymizationDomain::from("domain-a"); + let domain_b = PseudonymizationDomain::from("domain-b"); + let session = EncryptionContext::from("session-1"); + + // Create session keys + let (session_public, session_secret) = make_session_keys(&global_secret, &session, &enc_secret); + + // Original string to encrypt and pseudonymize + let original_string = "This is a very long id that will be pseudonymized"; + + // Step 1: Convert string to padded pseudonyms + let pseudonyms = Pseudonym::from_string_padded(original_string); + + // Step 2: Encrypt the pseudonyms + let encrypted_pseudonyms: Vec = pseudonyms + .iter() + .map(|p| encrypt(p, &session_public, &mut rng)) + .collect(); + + // Step 3: Create pseudonymization info for transform + let pseudo_info = PseudonymizationInfo::new( + &domain_a, + &domain_b, + Some(&session), + Some(&session), + &pseudo_secret, + &enc_secret, + ); + + // Step 4: Pseudonymize (transform) the encrypted pseudonyms + let transformed_pseudonyms: Vec = encrypted_pseudonyms + .iter() + .map(|ep| pseudonymize(ep, &pseudo_info)) + .collect(); + + // Step 5: Decrypt the transformed pseudonyms + let decrypted_pseudonyms: Vec = transformed_pseudonyms + .iter() + .map(|ep| decrypt(ep, &session_secret)) + .collect(); + + // Step 6: Encrypt the decrypted pseudonyms + let re_encrypted_pseudonyms: Vec = decrypted_pseudonyms + .iter() + .map(|p| encrypt(p, &session_public, &mut rng)) + .collect(); + + // Step 7: Reverse the pseudonymization + let reverse_pseudo_info = PseudonymizationInfo::new( + &domain_b, + &domain_a, + Some(&session), + Some(&session), + &pseudo_secret, + &enc_secret, + ); + + let reverse_transformed: Vec = re_encrypted_pseudonyms + .iter() + .map(|ep| pseudonymize(ep, &reverse_pseudo_info)) + .collect(); + + let reverse_decrypted: Vec = reverse_transformed + .iter() + .map(|ep| decrypt(ep, &session_secret)) + .collect(); + + let reverse_string = Pseudonym::to_string_padded(&reverse_decrypted)?; + + // After reversing the pseudonymization, we should get back the original string + assert_eq!(original_string, reverse_string); + + Ok(()) +} From 2d07fe6e100bba95e52277cc38750c5c11d8067b Mon Sep 17 00:00:00 2001 From: Julian van der Horst Date: Wed, 26 Mar 2025 12:52:01 +0100 Subject: [PATCH 2/4] Added encoding and decoding with padding --- tests/padding.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/padding.rs b/tests/padding.rs index 342975e..bea2d09 100644 --- a/tests/padding.rs +++ b/tests/padding.rs @@ -56,10 +56,10 @@ fn test_from_bytes_padded_multiple_blocks() { assert_eq!(expected_blocks, result.len()); // Check the content of each full block - for i in 0..(data.len() / 16) { + for (i, block) in result.iter().enumerate().take(data.len() / 16) { let start = i * 16; - let expected = &data[start..start + 16]; - assert_eq!(expected, &result[i].as_bytes().unwrap()[..16]); + let expected = data[start..start + 16].to_vec(); + assert_eq!(expected, block.as_bytes().unwrap()[..16]); } // Check the last block's padding @@ -71,8 +71,8 @@ fn test_from_bytes_padded_multiple_blocks() { assert_eq!(&data[data.len() - remaining..], &last_block[..remaining]); // Verify padding portion - for i in remaining..16 { - assert_eq!(padding_byte, last_block[i]); + for byte in last_block.iter().skip(remaining) { + assert_eq!(&padding_byte, byte); } } From cb6bfc87a749215a93f4efb12214dceb54bf91e3 Mon Sep 17 00:00:00 2001 From: Julian van der Horst Date: Wed, 26 Mar 2025 13:04:07 +0100 Subject: [PATCH 3/4] Added WASM support --- src/lib/wasm/high_level.rs | 72 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/src/lib/wasm/high_level.rs b/src/lib/wasm/high_level.rs index 9e0cd2c..a362c41 100644 --- a/src/lib/wasm/high_level.rs +++ b/src/lib/wasm/high_level.rs @@ -167,6 +167,42 @@ impl WASMPseudonym { pub fn as_bytes(&self) -> Option> { self.0.as_bytes().map(|x| x.to_vec()) } + + /// Create a collection of pseudonyms from an arbitrary-length string + /// Uses PKCS#7 style padding where the padding byte value equals the number of padding bytes + #[wasm_bindgen(js_name = fromStringPadded)] + pub fn from_string_padded(text: &str) -> Vec { + Pseudonym::from_string_padded(text) + .into_iter() + .map(WASMPseudonym::from) + .collect() + } + + /// Create a collection of pseudonyms from an arbitrary-length byte array + /// Uses PKCS#7 style padding where the padding byte value equals the number of padding bytes + #[wasm_bindgen(js_name = fromBytesPadded)] + pub fn from_bytes_padded(data: Vec) -> Vec { + Pseudonym::from_bytes_padded(&data) + .into_iter() + .map(WASMPseudonym::from) + .collect() + } + + /// Convert a collection of pseudonyms back to the original string + /// Returns null if the decoding fails (e.g., invalid padding or UTF-8) + #[wasm_bindgen(js_name = toStringPadded)] + pub fn to_string_padded(pseudonyms: Vec) -> Option { + let rust_pseudonyms: Vec = pseudonyms.into_iter().map(|p| p.0).collect(); + Pseudonym::to_string_padded(&rust_pseudonyms).ok() + } + + /// Convert a collection of pseudonyms back to the original byte array + /// Returns null if the decoding fails (e.g., invalid padding) + #[wasm_bindgen(js_name = toBytesPadded)] + pub fn to_bytes_padded(pseudonyms: Vec) -> Option> { + let rust_pseudonyms: Vec = pseudonyms.into_iter().map(|p| p.0).collect(); + Pseudonym::to_bytes_padded(&rust_pseudonyms).ok() + } } #[wasm_bindgen(js_class = "DataPoint")] @@ -232,6 +268,42 @@ impl WASMDataPoint { pub fn as_bytes(&self) -> Option> { self.0.as_bytes().map(|x| x.to_vec()) } + + /// Create a collection of data points from an arbitrary-length string + /// Uses PKCS#7 style padding where the padding byte value equals the number of padding bytes + #[wasm_bindgen(js_name = fromStringPadded)] + pub fn from_string_padded(text: &str) -> Vec { + DataPoint::from_string_padded(text) + .into_iter() + .map(WASMDataPoint::from) + .collect() + } + + /// Create a collection of data points from an arbitrary-length byte array + /// Uses PKCS#7 style padding where the padding byte value equals the number of padding bytes + #[wasm_bindgen(js_name = fromBytesPadded)] + pub fn from_bytes_padded(data: Vec) -> Vec { + DataPoint::from_bytes_padded(&data) + .into_iter() + .map(WASMDataPoint::from) + .collect() + } + + /// Convert a collection of data points back to the original string + /// Returns null if the decoding fails (e.g., invalid padding or UTF-8) + #[wasm_bindgen(js_name = toStringPadded)] + pub fn to_string_padded(data_points: Vec) -> Option { + let rust_data_points: Vec = data_points.into_iter().map(|p| p.0).collect(); + DataPoint::to_string_padded(&rust_data_points).ok() + } + + /// Convert a collection of data points back to the original byte array + /// Returns null if the decoding fails (e.g., invalid padding) + #[wasm_bindgen(js_name = toBytesPadded)] + pub fn to_bytes_padded(data_points: Vec) -> Option> { + let rust_data_points: Vec = data_points.into_iter().map(|p| p.0).collect(); + DataPoint::to_bytes_padded(&rust_data_points).ok() + } } #[wasm_bindgen(js_class = "EncryptedPseudonym")] From a812580bfc805151c7613110710793721f8d5ec3 Mon Sep 17 00:00:00 2001 From: Julian van der Horst Date: Wed, 26 Mar 2025 14:13:24 +0100 Subject: [PATCH 4/4] Fixed typo --- src/lib/high_level/data_types.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/high_level/data_types.rs b/src/lib/high_level/data_types.rs index 29c6ad8..468ea5f 100644 --- a/src/lib/high_level/data_types.rs +++ b/src/lib/high_level/data_types.rs @@ -198,7 +198,7 @@ pub trait Encryptable { self.value().encode_lizard() } - /// Encodes an arbitrary byte array into one or more Encryptables + /// Encodes an arbitrary byte array into one or more encryptables /// Uses PKCS#7 style padding where the padding byte value equals the number of padding bytes fn from_bytes_padded(data: &[u8]) -> Vec where @@ -235,7 +235,7 @@ pub trait Encryptable { result } - /// Encodes an arbitrary string into one or more Encrtypb + /// Encodes an arbitrary string into one or more encryptables /// Uses PKCS#7 style padding where the padding byte value equals the number of padding bytes fn from_string_padded(text: &str) -> Vec where