From 857b308912c5bcb44c4dca3188ccfe34cb3bdfb9 Mon Sep 17 00:00:00 2001 From: Haydn Trigg Date: Fri, 16 Jan 2026 21:31:49 +1030 Subject: [PATCH 1/2] Better COFF and Big Endian Strings --- objdiff-core/src/arch/mod.rs | 15 +++++++++++---- objdiff-core/src/arch/ppc/mod.rs | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs index 592ed209..44344f69 100644 --- a/objdiff-core/src/arch/mod.rs +++ b/objdiff-core/src/arch/mod.rs @@ -175,19 +175,26 @@ impl DataType { } DataType::String => { if let Some(nul_idx) = bytes.iter().position(|&c| c == b'\0') { - let str_bytes = &bytes[..nul_idx]; + let ascii_str_bytes = &bytes[..nul_idx]; // Special case to display (ASCII) as the label for ASCII-only strings. - let (cow, _, had_errors) = encoding_rs::UTF_8.decode(str_bytes); + let (cow, _, had_errors) = encoding_rs::UTF_8.decode(ascii_str_bytes); if !had_errors && cow.is_ascii() { let string = format!("{cow}"); let copy_string = escape_special_ascii_characters(string.clone()); strs.push((string, Some("ASCII".into()), Some(copy_string))); } + for (encoding, encoding_name) in SUPPORTED_ENCODINGS { - let (cow, _, had_errors) = encoding.decode(str_bytes); + let (cow, _, had_errors) = encoding.decode(&bytes); // Avoid showing ASCII-only strings more than once if the encoding is ASCII-compatible. if !had_errors && (!encoding.is_ascii_compatible() || !cow.is_ascii()) { - let string = format!("{cow}"); + let mut string = format!("{cow}"); + + // Inline loop to strip all trailing "\0" + while let Some(stripped) = string.strip_suffix('\0') { + string = stripped.to_string(); + } + let copy_string = escape_special_ascii_characters(string.clone()); strs.push((string, Some(encoding_name.into()), Some(copy_string))); } diff --git a/objdiff-core/src/arch/ppc/mod.rs b/objdiff-core/src/arch/ppc/mod.rs index c61ea362..b99c972c 100644 --- a/objdiff-core/src/arch/ppc/mod.rs +++ b/objdiff-core/src/arch/ppc/mod.rs @@ -355,7 +355,7 @@ impl Arch for ArchPpc { } fn guess_data_type(&self, resolved: ResolvedInstructionRef, bytes: &[u8]) -> Option { - if resolved.relocation.is_some_and(|r| r.symbol.name.starts_with("@stringBase")) { + if resolved.relocation.is_some_and(|r| r.symbol.name.starts_with("@stringBase") || r.symbol.name.starts_with("$SG")) { // Pooled string. return Some(DataType::String); } From 5588f27ac82600f29ef11c69ca2ed2bcd1979e86 Mon Sep 17 00:00:00 2001 From: Haydn Trigg Date: Fri, 16 Jan 2026 21:42:25 +1030 Subject: [PATCH 2/2] Cargo Check and Format Fixes --- objdiff-core/src/arch/mod.rs | 2 +- objdiff-core/src/arch/ppc/mod.rs | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs index 44344f69..184512a2 100644 --- a/objdiff-core/src/arch/mod.rs +++ b/objdiff-core/src/arch/mod.rs @@ -185,7 +185,7 @@ impl DataType { } for (encoding, encoding_name) in SUPPORTED_ENCODINGS { - let (cow, _, had_errors) = encoding.decode(&bytes); + let (cow, _, had_errors) = encoding.decode(bytes); // Avoid showing ASCII-only strings more than once if the encoding is ASCII-compatible. if !had_errors && (!encoding.is_ascii_compatible() || !cow.is_ascii()) { let mut string = format!("{cow}"); diff --git a/objdiff-core/src/arch/ppc/mod.rs b/objdiff-core/src/arch/ppc/mod.rs index b99c972c..ccce28a6 100644 --- a/objdiff-core/src/arch/ppc/mod.rs +++ b/objdiff-core/src/arch/ppc/mod.rs @@ -355,7 +355,9 @@ impl Arch for ArchPpc { } fn guess_data_type(&self, resolved: ResolvedInstructionRef, bytes: &[u8]) -> Option { - if resolved.relocation.is_some_and(|r| r.symbol.name.starts_with("@stringBase") || r.symbol.name.starts_with("$SG")) { + if resolved.relocation.is_some_and(|r| { + r.symbol.name.starts_with("@stringBase") || r.symbol.name.starts_with("$SG") + }) { // Pooled string. return Some(DataType::String); }