Skip to content
2 changes: 1 addition & 1 deletion c-api/src/element.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ pub unsafe extern "C" fn lol_html_element_get_attribute(
name_len: size_t,
) -> Str {
let element = to_ref!(element);
let name = unwrap_or_ret!(to_str!(name, name_len), Str::from_opt(None));
let name = unwrap_or_ret!(to_str!(name, name_len), Str::EMPTY);

Str::from_opt(element.get_attribute(name))
}
Expand Down
17 changes: 13 additions & 4 deletions c-api/src/errors.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
use super::*;
use std::error::Error;

thread_local! {
pub static LAST_ERROR: RefCell<Option<Box<dyn Error>>> = RefCell::new(None);
pub static LAST_ERROR: RefCell<Option<Box<str>>> = const { RefCell::new(None) };
}

#[unsafe(no_mangle)]
pub extern "C" fn lol_html_take_last_error() -> Str {
let err = LAST_ERROR.with(|e| e.borrow_mut().take());
Str::from_opt(
LAST_ERROR
.try_with(|e| e.try_borrow_mut().ok()?.take())
.ok()
.flatten(),
)
}

Str::from_opt(err.map(|e| e.to_string()))
#[cold]
#[inline(never)]
pub(crate) fn save_last_error(err: String) {
let err = Some(err.into_boxed_str());
let _ = crate::errors::LAST_ERROR.try_with(|e| e.try_borrow_mut().map(|mut v| *v = err));
}

#[derive(Error, Debug, Eq, PartialEq, Copy, Clone)]
Expand Down
22 changes: 21 additions & 1 deletion c-api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ macro_rules! unwrap_or_ret {
match $expr {
Ok(v) => v,
Err(err) => {
crate::errors::LAST_ERROR.with(|e| *e.borrow_mut() = Some(err.into()));
crate::errors::save_last_error(err.to_string());
return $ret_val;
}
}
Expand All @@ -74,6 +74,26 @@ macro_rules! unwrap_or_ret_null {
};
}

#[cold]
fn panic_err(payload: Box<dyn std::any::Any + Send>) -> Box<dyn std::error::Error> {
if let Some(&s) = payload.downcast_ref::<&str>() {
Box::from(s)
} else if let Ok(s) = payload.downcast::<String>() {
Box::from(*s)
} else {
Box::from("panic") // never happens
}
}

fn catch_panic<T, E>(
callback: impl FnOnce() -> Result<T, E>,
) -> Result<T, Box<dyn std::error::Error>>
where
Box<dyn std::error::Error>: From<E>,
{
Ok(std::panic::catch_unwind(std::panic::AssertUnwindSafe(callback)).map_err(panic_err)??)
}

macro_rules! impl_content_mutation_handlers {
($name:ident: $typ:ty [ $($(#[$meta:meta])* $(@$kind:ident)? $fn_name:ident => $method:ident),+$(,)? ]) => {
$(
Expand Down
59 changes: 30 additions & 29 deletions c-api/src/rewriter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,40 +34,43 @@ impl OutputSink for ExternOutputSink {
}
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn lol_html_rewriter_build(
#[allow(clippy::too_many_arguments)]
fn lol_html_rewriter_build_inner(
builder: *mut HtmlRewriterBuilder,
encoding: *const c_char,
encoding_len: size_t,
memory_settings: MemorySettings,
output_sink: unsafe extern "C" fn(*const c_char, size_t, *mut c_void),
output_sink_user_data: *mut c_void,
strict: bool,
) -> *mut HtmlRewriter {
enable_esi_tags: bool,
) -> Result<HtmlRewriter, Box<dyn std::error::Error>> {
let builder = to_ref!(builder);
let handlers = builder.get_safe_handlers();

let maybe_encoding =
encoding_rs::Encoding::for_label_no_replacement(to_bytes!(encoding, encoding_len));
let encoding = unwrap_or_ret_null! { maybe_encoding.ok_or(EncodingError::UnknownEncoding) };
let encoding = maybe_encoding.ok_or(EncodingError::UnknownEncoding)?;
let settings = Settings {
element_content_handlers: handlers.element,
document_content_handlers: handlers.document,
encoding: unwrap_or_ret_null! { encoding.try_into().or(Err(EncodingError::NonAsciiCompatibleEncoding)) },
encoding: encoding
.try_into()
.or(Err(EncodingError::NonAsciiCompatibleEncoding))?,
memory_settings,
strict,
enable_esi_tags: false,
enable_esi_tags,
adjust_charset_on_meta_tag: false,
};

let output_sink = ExternOutputSink::new(output_sink, output_sink_user_data);
let rewriter = lol_html::HtmlRewriter::new(settings, output_sink);

to_ptr_mut(HtmlRewriter(Some(rewriter)))
Ok(HtmlRewriter(Some(rewriter)))
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn unstable_lol_html_rewriter_build_with_esi_tags(
pub unsafe extern "C" fn lol_html_rewriter_build(
builder: *mut HtmlRewriterBuilder,
encoding: *const c_char,
encoding_len: size_t,
Expand All @@ -76,26 +79,24 @@ pub unsafe extern "C" fn unstable_lol_html_rewriter_build_with_esi_tags(
output_sink_user_data: *mut c_void,
strict: bool,
) -> *mut HtmlRewriter {
let builder = to_ref!(builder);
let handlers = builder.get_safe_handlers();

let maybe_encoding =
encoding_rs::Encoding::for_label_no_replacement(to_bytes!(encoding, encoding_len));
let encoding = unwrap_or_ret_null! { maybe_encoding.ok_or(EncodingError::UnknownEncoding) };
let settings = Settings {
element_content_handlers: handlers.element,
document_content_handlers: handlers.document,
encoding: unwrap_or_ret_null! { encoding.try_into().or(Err(EncodingError::NonAsciiCompatibleEncoding)) },
memory_settings,
strict,
enable_esi_tags: true,
adjust_charset_on_meta_tag: false,
};

let output_sink = ExternOutputSink::new(output_sink, output_sink_user_data);
let rewriter = lol_html::HtmlRewriter::new(settings, output_sink);
to_ptr_mut(unwrap_or_ret_null! { catch_panic(move || {
lol_html_rewriter_build_inner(builder, encoding, encoding_len, memory_settings, output_sink, output_sink_user_data, strict, false)
})})
}

to_ptr_mut(HtmlRewriter(Some(rewriter)))
#[unsafe(no_mangle)]
pub unsafe extern "C" fn unstable_lol_html_rewriter_build_with_esi_tags(
builder: *mut HtmlRewriterBuilder,
encoding: *const c_char,
encoding_len: size_t,
memory_settings: MemorySettings,
output_sink: unsafe extern "C" fn(*const c_char, size_t, *mut c_void),
output_sink_user_data: *mut c_void,
strict: bool,
) -> *mut HtmlRewriter {
to_ptr_mut(unwrap_or_ret_null! { catch_panic(move || {
lol_html_rewriter_build_inner(builder, encoding, encoding_len, memory_settings, output_sink, output_sink_user_data, strict, true)
})})
}

#[unsafe(no_mangle)]
Expand All @@ -110,7 +111,7 @@ pub unsafe extern "C" fn lol_html_rewriter_write(
.as_mut()
.expect("cannot call `lol_html_rewriter_write` after calling `end()`");

unwrap_or_ret_err_code! { rewriter.write(chunk) };
unwrap_or_ret_err_code! { catch_panic(move || rewriter.write(chunk)) };

0
}
Expand All @@ -122,7 +123,7 @@ pub unsafe extern "C" fn lol_html_rewriter_end(rewriter: *mut HtmlRewriter) -> c
.take() // Using `take()` allows calling `free()` afterwards (it will be a no-op).
.expect("cannot call `lol_html_rewriter_end` after calling `end()`");

unwrap_or_ret_err_code! { rewriter.end() };
unwrap_or_ret_err_code! { catch_panic(move || rewriter.end()) };

0
}
Expand Down
12 changes: 9 additions & 3 deletions c-api/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,17 @@ pub struct Str {
}

impl Str {
pub const EMPTY: Self = Self {
data: std::ptr::null(),
len: 0,
};

#[must_use]
pub fn new(string: String) -> Self {
pub fn new(string: impl Into<Box<str>>) -> Self {
let string = string.into();
Self {
len: string.len(),
data: Box::into_raw(string.into_boxed_str()) as *const c_char,
data: Box::into_raw(string).cast::<c_char>(),
}
}

Expand All @@ -22,7 +28,7 @@ impl Str {
/// If `string` is `None`, `data` will be set to `NULL`.
#[inline]
#[must_use]
pub fn from_opt(string: Option<String>) -> Self {
pub fn from_opt(string: Option<impl Into<Box<str>>>) -> Self {
match string {
Some(string) => Self::new(string),
None => Self {
Expand Down
2 changes: 1 addition & 1 deletion src/base/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use std::str;
pub struct HasReplacementsError;

/// A thin wrapper around byte slice with handy APIs attached
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
#[derive(Copy, Clone, PartialEq, Eq, Hash, Default)]
#[repr(transparent)]
pub(crate) struct Bytes<'b>(&'b [u8]);

Expand Down
93 changes: 49 additions & 44 deletions src/base/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,56 +6,59 @@ use std::sync::atomic::{AtomicUsize, Ordering};
/// This serves as a map from integer to [`Encoding`], which allows more efficient
/// sets/gets of the [`SharedEncoding`].
static ALL_ENCODINGS: [&Encoding; 40] = [
&encoding_rs::UTF_8_INIT,
&encoding_rs::SHIFT_JIS_INIT,
&encoding_rs::BIG5_INIT,
&encoding_rs::EUC_JP_INIT,
&encoding_rs::EUC_KR_INIT,
&encoding_rs::GB18030_INIT,
&encoding_rs::GBK_INIT,
&encoding_rs::IBM866_INIT,
&encoding_rs::ISO_8859_2_INIT,
&encoding_rs::ISO_8859_3_INIT,
&encoding_rs::ISO_8859_4_INIT,
&encoding_rs::ISO_8859_5_INIT,
&encoding_rs::ISO_8859_6_INIT,
&encoding_rs::ISO_8859_7_INIT,
&encoding_rs::ISO_8859_8_I_INIT,
&encoding_rs::ISO_8859_8_INIT,
&encoding_rs::ISO_8859_10_INIT,
&encoding_rs::ISO_8859_13_INIT,
&encoding_rs::ISO_8859_14_INIT,
&encoding_rs::ISO_8859_15_INIT,
&encoding_rs::ISO_8859_16_INIT,
&encoding_rs::KOI8_R_INIT,
&encoding_rs::KOI8_U_INIT,
&encoding_rs::MACINTOSH_INIT,
&encoding_rs::WINDOWS_1250_INIT,
&encoding_rs::WINDOWS_1251_INIT,
&encoding_rs::WINDOWS_1252_INIT,
&encoding_rs::WINDOWS_1253_INIT,
&encoding_rs::WINDOWS_1254_INIT,
&encoding_rs::WINDOWS_1255_INIT,
&encoding_rs::WINDOWS_1256_INIT,
&encoding_rs::WINDOWS_1257_INIT,
&encoding_rs::WINDOWS_1258_INIT,
&encoding_rs::WINDOWS_874_INIT,
&encoding_rs::X_MAC_CYRILLIC_INIT,
&encoding_rs::X_USER_DEFINED_INIT,
encoding_rs::UTF_8,
encoding_rs::SHIFT_JIS,
encoding_rs::BIG5,
encoding_rs::EUC_JP,
encoding_rs::EUC_KR,
encoding_rs::GB18030,
encoding_rs::GBK,
encoding_rs::IBM866,
encoding_rs::ISO_8859_2,
encoding_rs::ISO_8859_3,
encoding_rs::ISO_8859_4,
encoding_rs::ISO_8859_5,
encoding_rs::ISO_8859_6,
encoding_rs::ISO_8859_7,
encoding_rs::ISO_8859_8_I,
encoding_rs::ISO_8859_8,
encoding_rs::ISO_8859_10,
encoding_rs::ISO_8859_13,
encoding_rs::ISO_8859_14,
encoding_rs::ISO_8859_15,
encoding_rs::ISO_8859_16,
encoding_rs::KOI8_R,
encoding_rs::KOI8_U,
encoding_rs::MACINTOSH,
encoding_rs::WINDOWS_1250,
encoding_rs::WINDOWS_1251,
encoding_rs::WINDOWS_1252,
encoding_rs::WINDOWS_1253,
encoding_rs::WINDOWS_1254,
encoding_rs::WINDOWS_1255,
encoding_rs::WINDOWS_1256,
encoding_rs::WINDOWS_1257,
encoding_rs::WINDOWS_1258,
encoding_rs::WINDOWS_874,
encoding_rs::X_MAC_CYRILLIC,
encoding_rs::X_USER_DEFINED,
// non-ASCII-compatible
&encoding_rs::REPLACEMENT_INIT,
&encoding_rs::UTF_16BE_INIT,
&encoding_rs::UTF_16LE_INIT,
&encoding_rs::ISO_2022_JP_INIT,
encoding_rs::REPLACEMENT,
encoding_rs::UTF_16BE,
encoding_rs::UTF_16LE,
encoding_rs::ISO_2022_JP,
];

#[cfg_attr(debug_assertions, track_caller)]
fn encoding_to_index(encoding: AsciiCompatibleEncoding) -> usize {
let encoding: &'static Encoding = encoding.into();

ALL_ENCODINGS
.iter()
.position(|&e| e == encoding)
.expect("the ALL_ENCODINGS is not complete and needs to be updated")
let index = ALL_ENCODINGS.iter().position(|&e| e == encoding);
debug_assert!(
index.is_some(),
"the ALL_ENCODINGS is not complete and needs to be updated"
);
index.unwrap_or(0)
}

/// A charset encoding that can be shared and modified.
Expand All @@ -71,6 +74,7 @@ pub struct SharedEncoding {

impl SharedEncoding {
#[must_use]
#[cfg_attr(debug_assertions, track_caller)]
pub fn new(encoding: AsciiCompatibleEncoding) -> Self {
Self {
encoding: Arc::new(AtomicUsize::new(encoding_to_index(encoding))),
Expand All @@ -84,6 +88,7 @@ impl SharedEncoding {
ALL_ENCODINGS.get(encoding).unwrap_or(&ALL_ENCODINGS[0])
}

#[cfg_attr(debug_assertions, track_caller)]
pub fn set(&self, encoding: AsciiCompatibleEncoding) {
self.encoding
.store(encoding_to_index(encoding), Ordering::Relaxed);
Expand Down
20 changes: 14 additions & 6 deletions src/memory/arena.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,20 @@ pub(crate) struct Arena {

impl Arena {
pub fn new(limiter: SharedMemoryLimiter, preallocated_size: usize) -> Self {
limiter.preallocate(preallocated_size);

Self {
limiter,
data: Vec::with_capacity(preallocated_size),
}
let mut data = Vec::new();

let preallocated = limiter
.increase_usage(preallocated_size)
.ok()
.and_then(|()| data.try_reserve_exact(preallocated_size).ok())
.is_some();
// HtmlRewriter::new() has no way to report this
debug_assert!(
preallocated,
"Total preallocated memory size should be less than `MemorySettings::max_allowed_memory_usage`."
);

Self { limiter, data }
}

pub fn append(&mut self, slice: &[u8]) -> Result<(), MemoryLimitExceededError> {
Expand Down
Loading