From 7b5f917c41cdb283017d7a4c9d488f32d0128ea2 Mon Sep 17 00:00:00 2001 From: Joaquin Hui Gomez Date: Tue, 7 Apr 2026 21:34:44 +0100 Subject: [PATCH] Lossily coerce invalid UTF-8 in sqlite collation callback The safe Fn(&str, &str) -> Ordering signature exposed by SqliteConnectOptions::collation() and LockedSqliteHandle::create_collation() was backed by from_utf8_unchecked, so a database containing invalid UTF-8 text could reach the user callback and materialize &str values that violate Rust's UTF-8 invariant inside a safe API. SQLite explicitly documents that invalid UTF-8 may be passed into application-defined collating sequences, so the FFI shim must not assume well-formed bytes. Replace from_utf8_unchecked with String::from_utf8_lossy, which matches the sqlite3_create_collation_v2 SQLITE_UTF8 flag and keeps the safe signature sound without changing correct-UTF-8 behavior. Fixes #4194 --- sqlx-sqlite/src/connection/collation.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sqlx-sqlite/src/connection/collation.rs b/sqlx-sqlite/src/connection/collation.rs index e7422138bc..229e780adf 100644 --- a/sqlx-sqlite/src/connection/collation.rs +++ b/sqlx-sqlite/src/connection/collation.rs @@ -3,7 +3,6 @@ use std::ffi::CString; use std::fmt::{self, Debug, Formatter}; use std::os::raw::{c_int, c_void}; use std::slice; -use std::str::from_utf8_unchecked; use std::sync::Arc; use libsqlite3_sys::{sqlite3_create_collation_v2, SQLITE_OK, SQLITE_UTF8}; @@ -137,15 +136,19 @@ where let right_len = usize::try_from(right_len) .unwrap_or_else(|_| panic!("right_len out of range: {right_len}")); + // SQLite explicitly documents that invalid UTF-8 may be passed into + // application-defined collating sequences. The safe `Fn(&str, &str)` + // signature exposed to users must never observe invalid UTF-8, so + // lossily coerce the raw bytes here. let s1 = { let c_slice = slice::from_raw_parts(left_ptr as *const u8, left_len); - from_utf8_unchecked(c_slice) + String::from_utf8_lossy(c_slice) }; let s2 = { let c_slice = slice::from_raw_parts(right_ptr as *const u8, right_len); - from_utf8_unchecked(c_slice) + String::from_utf8_lossy(c_slice) }; - let t = (*boxed_f)(s1, s2); + let t = (*boxed_f)(&s1, &s2); match t { Ordering::Less => -1,