Skip to content

Commit fc32bf6

Browse files
Handle UTF-16 checksum files
1 parent f5602a7 commit fc32bf6

1 file changed

Lines changed: 224 additions & 28 deletions

File tree

crates/multi-pwsh/src/install.rs

Lines changed: 224 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,10 @@ fn download_text_with_retry(http: &Agent, url: &str, retries: usize) -> Result<S
9696
for attempt in 1..=retries {
9797
let result = (|| -> Result<String> {
9898
let response = http.get(url).set("User-Agent", "multi-pwsh").call()?;
99-
Ok(response.into_string()?)
99+
let mut reader = response.into_reader();
100+
let mut bytes = Vec::new();
101+
reader.read_to_end(&mut bytes)?;
102+
decode_checksum_text(&bytes)
100103
})();
101104

102105
match result {
@@ -114,6 +117,85 @@ fn download_text_with_retry(http: &Agent, url: &str, retries: usize) -> Result<S
114117
Err(last_error.unwrap_or_else(|| MultiPwshError::Archive("download failed without detailed error".to_string())))
115118
}
116119

120+
fn decode_checksum_text(bytes: &[u8]) -> Result<String> {
121+
match detect_text_encoding(bytes) {
122+
TextEncoding::Utf8 => String::from_utf8(bytes.to_vec()).map_err(invalid_checksum_encoding),
123+
TextEncoding::Utf8Bom => String::from_utf8(bytes[3..].to_vec()).map_err(invalid_checksum_encoding),
124+
TextEncoding::Utf16Le => decode_utf16_text(&bytes[2..], true),
125+
TextEncoding::Utf16Be => decode_utf16_text(&bytes[2..], false),
126+
TextEncoding::Utf16LeNoBom => decode_utf16_text(bytes, true),
127+
TextEncoding::Utf16BeNoBom => decode_utf16_text(bytes, false),
128+
}
129+
}
130+
131+
fn decode_utf16_text(bytes: &[u8], little_endian: bool) -> Result<String> {
132+
if bytes.len() % 2 != 0 {
133+
return Err(MultiPwshError::Archive(
134+
"invalid checksum file encoding: odd-length utf-16 payload".to_string(),
135+
));
136+
}
137+
138+
let units: Vec<u16> = bytes
139+
.chunks_exact(2)
140+
.map(|chunk| {
141+
if little_endian {
142+
u16::from_le_bytes([chunk[0], chunk[1]])
143+
} else {
144+
u16::from_be_bytes([chunk[0], chunk[1]])
145+
}
146+
})
147+
.collect();
148+
149+
String::from_utf16(&units)
150+
.map_err(|error| MultiPwshError::Archive(format!("invalid checksum file encoding: {}", error)))
151+
}
152+
153+
fn invalid_checksum_encoding(error: std::string::FromUtf8Error) -> MultiPwshError {
154+
MultiPwshError::Archive(format!("invalid checksum file encoding: {}", error))
155+
}
156+
157+
fn detect_text_encoding(bytes: &[u8]) -> TextEncoding {
158+
if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
159+
return TextEncoding::Utf8Bom;
160+
}
161+
162+
if bytes.starts_with(&[0xFF, 0xFE]) {
163+
return TextEncoding::Utf16Le;
164+
}
165+
166+
if bytes.starts_with(&[0xFE, 0xFF]) {
167+
return TextEncoding::Utf16Be;
168+
}
169+
170+
if looks_like_utf16_le(bytes) {
171+
return TextEncoding::Utf16LeNoBom;
172+
}
173+
174+
if looks_like_utf16_be(bytes) {
175+
return TextEncoding::Utf16BeNoBom;
176+
}
177+
178+
TextEncoding::Utf8
179+
}
180+
181+
fn looks_like_utf16_le(bytes: &[u8]) -> bool {
182+
bytes.len() >= 4 && bytes[1] == 0 && bytes[3] == 0
183+
}
184+
185+
fn looks_like_utf16_be(bytes: &[u8]) -> bool {
186+
bytes.len() >= 4 && bytes[0] == 0 && bytes[2] == 0
187+
}
188+
189+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
190+
enum TextEncoding {
191+
Utf8,
192+
Utf8Bom,
193+
Utf16Le,
194+
Utf16Be,
195+
Utf16LeNoBom,
196+
Utf16BeNoBom,
197+
}
198+
117199
fn validate_archive_checksum(http: &Agent, release: &ResolvedRelease, archive_path: &Path) -> Result<()> {
118200
let checksums = download_text_with_retry(http, &release.checksum_asset_url, 8)?;
119201
let expected = find_expected_checksum(&checksums, &release.asset_name)?;
@@ -132,33 +214,10 @@ fn validate_archive_checksum(http: &Agent, release: &ResolvedRelease, archive_pa
132214

133215
fn find_expected_checksum(checksums: &str, asset_name: &str) -> Result<String> {
134216
for (index, line) in checksums.lines().enumerate() {
135-
let trimmed = line.trim();
136-
if trimmed.is_empty() {
137-
continue;
138-
}
139-
140-
let checksum = trimmed
141-
.split_ascii_whitespace()
142-
.next()
143-
.ok_or_else(|| MultiPwshError::Archive(format!("malformed checksum line {}", index + 1)))?;
144-
145-
if !is_valid_sha256_hex(checksum) {
146-
return Err(MultiPwshError::Archive(format!(
147-
"invalid sha256 checksum on line {}",
148-
index + 1
149-
)));
150-
}
151-
152-
let file_name = trimmed[checksum.len()..].trim_start().trim_start_matches('*');
153-
if file_name.is_empty() {
154-
return Err(MultiPwshError::Archive(format!(
155-
"missing file name in checksum line {}",
156-
index + 1
157-
)));
158-
}
159-
160-
if file_name == asset_name {
161-
return Ok(checksum.to_ascii_lowercase());
217+
if let Some((checksum, file_name)) = parse_checksum_line(line, index + 1, asset_name)? {
218+
if file_name == asset_name {
219+
return Ok(checksum);
220+
}
162221
}
163222
}
164223

@@ -172,6 +231,81 @@ fn is_valid_sha256_hex(value: &str) -> bool {
172231
value.len() == 64 && value.as_bytes().iter().all(|byte| byte.is_ascii_hexdigit())
173232
}
174233

234+
fn parse_checksum_line<'a>(
235+
line: &'a str,
236+
line_number: usize,
237+
target_asset_name: &str,
238+
) -> Result<Option<(String, &'a str)>> {
239+
let trimmed = line.trim().trim_start_matches('\u{feff}');
240+
if trimmed.is_empty() || trimmed.starts_with('#') {
241+
return Ok(None);
242+
}
243+
244+
if let Some(parsed) = parse_bsd_checksum_line(trimmed, line_number)? {
245+
return Ok(Some(parsed));
246+
}
247+
248+
parse_gnu_checksum_line(trimmed, line_number, target_asset_name)
249+
}
250+
251+
fn parse_bsd_checksum_line<'a>(line: &'a str, line_number: usize) -> Result<Option<(String, &'a str)>> {
252+
let Some((left, right)) = line.split_once('=') else {
253+
return Ok(None);
254+
};
255+
256+
let Some(file_name) = left
257+
.trim()
258+
.strip_prefix("SHA256 (")
259+
.and_then(|value| value.strip_suffix(')'))
260+
else {
261+
return Ok(None);
262+
};
263+
264+
let checksum = right.trim();
265+
if !is_valid_sha256_hex(checksum) {
266+
return Err(MultiPwshError::Archive(format!(
267+
"invalid sha256 checksum on line {}",
268+
line_number
269+
)));
270+
}
271+
272+
Ok(Some((checksum.to_ascii_lowercase(), file_name)))
273+
}
274+
275+
fn parse_gnu_checksum_line<'a>(
276+
line: &'a str,
277+
line_number: usize,
278+
target_asset_name: &str,
279+
) -> Result<Option<(String, &'a str)>> {
280+
let mut parts = line.split_ascii_whitespace();
281+
let Some(checksum) = parts.next() else {
282+
return Ok(None);
283+
};
284+
285+
let remainder = line[checksum.len()..].trim_start();
286+
if remainder.is_empty() {
287+
return Ok(None);
288+
}
289+
290+
let file_name = remainder.trim_start_matches('*').trim();
291+
if file_name.is_empty() {
292+
return Ok(None);
293+
}
294+
295+
if !is_valid_sha256_hex(checksum) {
296+
if file_name == target_asset_name {
297+
return Err(MultiPwshError::Archive(format!(
298+
"invalid sha256 checksum on line {}",
299+
line_number
300+
)));
301+
}
302+
303+
return Ok(None);
304+
}
305+
306+
Ok(Some((checksum.to_ascii_lowercase(), file_name)))
307+
}
308+
175309
fn sha256_file(path: &Path) -> Result<String> {
176310
let mut file = File::open(path)?;
177311
let mut hasher = Sha256::new();
@@ -360,6 +494,35 @@ bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb *other.zip",
360494
);
361495
}
362496

497+
#[test]
498+
fn find_expected_checksum_accepts_bsd_format() {
499+
let checksum = find_expected_checksum(
500+
"SHA256 (PowerShell-7.4.13-win-x64.zip) = aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
501+
"PowerShell-7.4.13-win-x64.zip",
502+
)
503+
.unwrap();
504+
505+
assert_eq!(
506+
checksum,
507+
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
508+
);
509+
}
510+
511+
#[test]
512+
fn find_expected_checksum_ignores_unrelated_non_checksum_lines() {
513+
let checksum = find_expected_checksum(
514+
"Checksums for release assets\n\
515+
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa PowerShell-7.4.13-win-x64.zip",
516+
"PowerShell-7.4.13-win-x64.zip",
517+
)
518+
.unwrap();
519+
520+
assert_eq!(
521+
checksum,
522+
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
523+
);
524+
}
525+
363526
#[test]
364527
fn find_expected_checksum_rejects_invalid_lines() {
365528
let error = find_expected_checksum(
@@ -384,6 +547,39 @@ bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb *other.zip",
384547
.contains("checksum entry for 'PowerShell-7.4.13-win-x64.zip' not found"));
385548
}
386549

550+
#[test]
551+
fn decode_checksum_text_accepts_utf16le_bom() {
552+
let content =
553+
"73601859461b130ee1e6624f0683000a794cbe86db0f4ff9f2ce2a7d4f5f6a01 *powershell-7.4.13-1.cm.aarch64.rpm\n";
554+
let mut bytes = vec![0xFF, 0xFE];
555+
for unit in content.encode_utf16() {
556+
bytes.extend_from_slice(&unit.to_le_bytes());
557+
}
558+
559+
let decoded = decode_checksum_text(&bytes).unwrap();
560+
561+
assert_eq!(decoded, content);
562+
}
563+
564+
#[test]
565+
fn find_expected_checksum_accepts_real_powershell_utf16le_line() {
566+
let content =
567+
"0aa943342ddd5ff5cd5bbb964e6594b7af3e10758ff59874cd26420bebb3c755 *PowerShell-7.4.13-win-arm64.exe\n\
568+
1820febe6f9567c8bab21be601dacb902777c1185e1beb81843c3a6f902d6b9d *PowerShell-7.4.13-win-arm64.zip\n";
569+
let mut bytes = vec![0xFF, 0xFE];
570+
for unit in content.encode_utf16() {
571+
bytes.extend_from_slice(&unit.to_le_bytes());
572+
}
573+
574+
let decoded = decode_checksum_text(&bytes).unwrap();
575+
let checksum = find_expected_checksum(&decoded, "PowerShell-7.4.13-win-arm64.zip").unwrap();
576+
577+
assert_eq!(
578+
checksum,
579+
"1820febe6f9567c8bab21be601dacb902777c1185e1beb81843c3a6f902d6b9d"
580+
);
581+
}
582+
387583
#[test]
388584
fn sha256_file_hashes_file_contents() {
389585
let temp_dir = tempfile::tempdir().unwrap();

0 commit comments

Comments
 (0)