Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 175 additions & 21 deletions libdd-alloc/src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
use crate::LinearAllocator;
use crate::{AllocError, Allocator};
use core::alloc::Layout;
use core::cell::UnsafeCell;
use core::cell::{Cell, UnsafeCell};
use core::mem::size_of;
use core::ptr::NonNull;

Expand All @@ -17,11 +17,16 @@ use core::ptr::NonNull;
/// [ChainAllocator] creates a new [LinearAllocator] when the current one
/// doesn't have enough space for the requested allocation, and then links the
/// new [LinearAllocator] to the previous one, creating a chain. This is where
/// its name comes from.
/// its name comes from. Each successful growth doubles the target chunk size

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have we experimented with other factors? e.g. 1.5x would still grow geometrically, but not as fast.

/// up to a cap, so small arenas retain a low initial footprint while larger
/// workloads quickly converge to larger chunks.
pub struct ChainAllocator<A: Allocator + Clone> {
top: UnsafeCell<ChainNodePtr<A>>,
/// The size hint for the linear allocator's chunk.
node_size: usize,
/// The size hint for the next linear allocator chunk.
node_size: Cell<usize>,
/// The maximum size hint used for routine geometric growth. Individual
/// oversized allocations can still request larger chunks.
max_node_size: usize,
allocator: A,
}

Expand Down Expand Up @@ -87,38 +92,89 @@ impl<A: Allocator + Clone> ChainAllocator<A> {
/// is worth it. This is somewhat arbitrarily chosen at the moment.
const MIN_NODE_SIZE: usize = 4 * Self::CHAIN_NODE_OVERHEAD;

/// Creates a new [ChainAllocator]. The `chunk_size_hint` is used as a
/// size hint when creating new chunks of the chain. Note that the
/// Default cap for routine geometric growth. This preserves the historical
/// chunk size used by profiling dictionaries while allowing smaller initial
/// chunks to ramp up quickly.
const DEFAULT_MAX_NODE_SIZE: usize = 1024 * 1024;

const fn normalize_node_size(size: usize) -> usize {
if size < Self::MIN_NODE_SIZE {
Self::MIN_NODE_SIZE
} else {
size
}
}

/// Creates a new [ChainAllocator]. The `chunk_size_hint` is used as the
/// initial size hint for chunks of the chain. Routine growth doubles the
/// chunk size up to at least `Self::DEFAULT_MAX_NODE_SIZE`. Note that the
/// [ChainAllocator] will use some bytes at the beginning of each chunk of
/// the chain. The number of bytes is [Self::CHAIN_NODE_OVERHEAD]. Keep
/// this in mind when sizing your hint if you are trying to be precise,
/// such as making sure a specific object fits.
pub const fn new_in(chunk_size_hint: usize, allocator: A) -> Self {
let initial_node_size = Self::normalize_node_size(chunk_size_hint);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why does one of these get a function and the other is inline?

let max_node_size = if initial_node_size < Self::DEFAULT_MAX_NODE_SIZE {
Self::DEFAULT_MAX_NODE_SIZE
} else {
initial_node_size
};
Self::new_capped_in(initial_node_size, max_node_size, allocator)
}

/// Creates a new [ChainAllocator] whose routine growth starts at
/// `chunk_size_hint` and doubles until reaching `max_chunk_size_hint`.
/// Requests larger than the cap are still honored by allocating an
/// oversized chunk for that request.
pub const fn new_capped_in(
chunk_size_hint: usize,
max_chunk_size_hint: usize,
allocator: A,
) -> Self {
let initial_node_size = Self::normalize_node_size(chunk_size_hint);
let max_node_size = if max_chunk_size_hint < initial_node_size {
initial_node_size
} else {
max_chunk_size_hint
};
Self {
top: UnsafeCell::new(ChainNodePtr::new()),
// max is not a const fn, do it manually.
node_size: if chunk_size_hint < Self::MIN_NODE_SIZE {
Self::MIN_NODE_SIZE
} else {
chunk_size_hint
},
node_size: Cell::new(initial_node_size),

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

document why this need to be a cell?

max_node_size,
allocator,
}
}

fn next_geometric_node_size(current: usize, max: usize, align: usize) -> usize {
if current >= max {
return max;
}

let Some(doubled) = current.checked_mul(2) else {
return max;
};
let next = if doubled > max { max } else { doubled };

if Layout::from_size_align(next, align).is_ok() {
next
} else {
current

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this the right fall-back when from_size_align fails?

}
}

#[cold]
#[inline(never)]
fn grow(&self, min_size: usize) -> Result<(), AllocError> {
let top = self.top.get();
let chain_layout = Layout::new::<ChainNode<A>>();

let node_size = min_size.max(self.node_size);
let linear = {
let layout = Layout::from_size_align(node_size, chain_layout.align())
.map_err(|_| AllocError)?
.pad_to_align();
LinearAllocator::new_in(layout, self.allocator.clone())?
};
let node_size = min_size.max(self.node_size.get());
let layout = Layout::from_size_align(node_size, chain_layout.align())
.map_err(|_| AllocError)?
.pad_to_align();
let next_node_size =
Self::next_geometric_node_size(layout.size(), self.max_node_size, chain_layout.align());
let linear = LinearAllocator::new_in(layout, self.allocator.clone())?;

// This shouldn't fail.
let chain_node_addr = linear
Expand Down Expand Up @@ -148,6 +204,7 @@ impl<A: Allocator + Clone> ChainAllocator<A> {
// Additionally, references are always temporary for the top, so this
// write will not violate aliasing rules.
unsafe { self.top.get().write(chain_node_ptr) };
self.node_size.set(next_node_size);

Ok(())
}
Expand Down Expand Up @@ -382,6 +439,101 @@ mod tests {
unsafe { allocator.deallocate(ptr.cast(), layout) };
}

#[test]
fn test_size_hint_below_minimum_uses_minimum_node_size() {
let allocator = ChainAllocator::new_capped_in(0, 0, Global);
let layout = Layout::new::<u8>();
let ptr = allocator.allocate(layout).unwrap();
unsafe { allocator.deallocate(ptr.cast(), layout) };

assert!(allocator.reserved_bytes() >= ChainAllocator::<Global>::MIN_NODE_SIZE);
}

#[test]
fn test_geometric_growth() {
let allocator = ChainAllocator::new_in(4096, Global);
let layout = Layout::new::<u8>();

let _ = allocator.allocate(layout).unwrap();
let first_reserved = allocator.reserved_bytes();
fill_to_capacity(&allocator);

let _ = allocator.allocate(layout).unwrap();
let second_reserved = allocator.reserved_bytes();
let second_chunk = second_reserved - first_reserved;
assert!(
second_chunk >= first_reserved * 2,
"second chunk should grow geometrically: first={first_reserved}, second={second_chunk}"
);
fill_to_capacity(&allocator);

let _ = allocator.allocate(layout).unwrap();
let third_reserved = allocator.reserved_bytes();
let third_chunk = third_reserved - second_reserved;
assert!(
third_chunk >= second_chunk * 2,
"third chunk should grow geometrically: second={second_chunk}, third={third_chunk}"
);
}

#[test]
fn test_geometric_growth_clamps_to_cap() {
let allocator = ChainAllocator::new_capped_in(4096, 10 * 1024, Global);
let layout = Layout::new::<u8>();

let _ = allocator.allocate(layout).unwrap();
let first_reserved = allocator.reserved_bytes();
fill_to_capacity(&allocator);

let _ = allocator.allocate(layout).unwrap();
let second_reserved = allocator.reserved_bytes();
fill_to_capacity(&allocator);

let _ = allocator.allocate(layout).unwrap();
let third_reserved = allocator.reserved_bytes();
let third_chunk = third_reserved - second_reserved;

assert!(second_reserved - first_reserved >= first_reserved * 2);
assert!(third_chunk >= 10 * 1024);
assert!(third_chunk < (second_reserved - first_reserved) * 2);
}

#[test]
fn test_capped_growth_honors_initial_size_as_minimum_cap() {
let allocator = ChainAllocator::new_capped_in(8192, 4096, Global);
let layout = Layout::new::<u8>();

let _ = allocator.allocate(layout).unwrap();
let first_reserved = allocator.reserved_bytes();
fill_to_capacity(&allocator);

let _ = allocator.allocate(layout).unwrap();
let second_reserved = allocator.reserved_bytes();
let second_chunk = second_reserved - first_reserved;

assert!(second_chunk >= first_reserved);
assert!(second_chunk < first_reserved * 2);
}

#[test]
fn test_next_geometric_node_size_edge_cases() {
assert_eq!(
4096,
ChainAllocator::<Global>::next_geometric_node_size(8192, 4096, 1)
);
assert_eq!(
usize::MAX,
ChainAllocator::<Global>::next_geometric_node_size(usize::MAX / 2 + 1, usize::MAX, 1)
);

let invalid_layout_size = isize::MAX as usize + 1;
let current = invalid_layout_size / 2 + 1;
assert_eq!(
current,
ChainAllocator::<Global>::next_geometric_node_size(current, invalid_layout_size, 1)
);
}

#[track_caller]
fn fill_to_capacity<A: Allocator + Clone>(allocator: &ChainAllocator<A>) {
let remaining_capacity = allocator.remaining_capacity();
Expand All @@ -401,8 +553,10 @@ mod tests {

let bool_layout = Layout::new::<bool>();

const GROWTH_ITERATIONS: usize = 16;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why the reduction?


// test that it fills to capacity a few times.
for _ in 0..100 {
for _ in 0..GROWTH_ITERATIONS {
fill_to_capacity(&allocator);

// This check is theoretically redundant because fill_to_capacity
Expand All @@ -426,7 +580,7 @@ mod tests {
let reserved_bytes = allocator.reserved_bytes();
// The allocations can theoretically be over-allocated, so use >= to
// do the comparison.
assert!(reserved_bytes >= page_size * 100);
assert!(reserved_bytes >= page_size * GROWTH_ITERATIONS);

// Everything is filled to capacity except the last iteration.
let used_bytes = allocator.used_bytes();
Expand Down
10 changes: 7 additions & 3 deletions libdd-profiling/src/collections/string_table/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,17 @@ impl StringTable {
/// Creates a new string table, which initially holds the empty string and
/// no others.
pub fn new() -> Self {
// Common profiles fit comfortably below the historical chunk size, so
// start small and let larger profiles grow geometrically.
const SIZE_HINT: usize = 512 * 1024;

// Keep in mind 32-bit .NET. There is only 2 GiB of virtual memory
// total available to an application, and we're not the application,
// we're just a piece inside it. Additionally, there may be 2 or more
// string tables in memory at a given time. Talk to .NET profiling
// engineers before making this any bigger.
const SIZE_HINT: usize = 4 * 1024 * 1024;
let bytes = ChainAllocator::new_in(SIZE_HINT, VirtualAllocator {});
// engineers before making this cap any bigger.
const MAX_SIZE_HINT: usize = 4 * 1024 * 1024;
let bytes = ChainAllocator::new_capped_in(SIZE_HINT, MAX_SIZE_HINT, VirtualAllocator {});

let mut strings = HashSet::with_hasher(Hasher::default());
// It varies by implementation, but frequently I've noticed that the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use std::ops::Deref;
/// Number of shards used by the parallel slice set and (by extension)
/// the string-specific parallel set. Kept as a constant so tests and
/// related code can refer to the same value.
pub const N_SHARDS: usize = 16;
pub const N_SHARDS: usize = 4;

/// The initial capacities for Rust's hash map (and set) currently go
/// like this: 3, 7, 14, 28. We want to avoid some of the smaller sizes so
Expand Down Expand Up @@ -64,9 +64,7 @@ impl<T: Copy + hash::Hash + Eq + 'static> ParallelSliceSet<T> {
pub const fn select_shard(hash: u64) -> usize {
// Use lower bits for shard selection to avoid interfering with
// Swiss tables' internal SIMD comparisons that use upper 7 bits.
// Using 4 bits provides resilience against hash function deficiencies
// and optimal scaling for low thread counts.
(hash & 0b1111) as usize
(hash as usize) & (N_SHARDS - 1)
}

/// Tries to create a new parallel slice set.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ impl ParallelStringSet {
Ok(StringRef(thin_slice.into()))
}

/// Selects which shard a hash should go to (0-3 for 4 shards).
/// Selects which shard a hash should go to.
pub fn select_shard(hash: u64) -> usize {
ParallelSliceSet::<u8>::select_shard(hash)
}
Expand Down Expand Up @@ -184,10 +184,9 @@ mod tests {
shard_counts[shard] += 1;
}

// Verify that distribution is not completely degenerate
// (both shards should get at least some strings)
assert!(shard_counts[0] > 0, "Shard 0 got no strings");
assert!(shard_counts[1] > 0, "Shard 1 got no strings");
// Verify that distribution is not completely degenerate.
assert!(shard_counts.iter().any(|&count| count > 0));
assert!(shard_counts.iter().filter(|&&count| count > 0).count() > 1);

// Print distribution for manual inspection
println!("Shard distribution: {:?}", shard_counts);
Expand Down
11 changes: 9 additions & 2 deletions libdd-profiling/src/profiles/collections/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,10 @@ pub struct Set<T: Hash + Eq + 'static> {
}

impl<T: Eq + Hash + 'static> Set<T> {
pub const SIZE_HINT: usize = 1024 * 1024;
// Keep the per-shard arena small; larger dictionaries grow
// geometrically up to the historical 1 MiB chunk size.
pub const SIZE_HINT: usize = 64 * 1024;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why this constant?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be named INITIAL_SIZE_HINT

pub const MAX_SIZE_HINT: usize = 1024 * 1024;

pub fn try_new() -> Result<Self, SetError> {
Self::try_with_capacity(SET_MIN_CAPACITY)
Expand Down Expand Up @@ -146,7 +149,11 @@ impl<T: Hash + Eq + 'static> Drop for Set<T> {

impl<T: Hash + Eq + 'static> Set<T> {
pub(crate) fn try_with_capacity(capacity: usize) -> Result<Self, SetError> {
let arena = ChainAllocator::new_in(Self::SIZE_HINT, VirtualAllocator {});
let arena = ChainAllocator::new_capped_in(
Self::SIZE_HINT,
Self::MAX_SIZE_HINT,
VirtualAllocator {},
);
let mut table = HashTable::new();

// SAFETY: new empty table cannot require rehash, callback unreachable.
Expand Down
11 changes: 9 additions & 2 deletions libdd-profiling/src/profiles/collections/slice_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,17 @@ pub struct SliceSet<T: Copy + Hash + Eq + 'static> {
}

impl<T: Copy + Hash + Eq + 'static> SliceSet<T> {
const SIZE_HINT: usize = 1024 * 1024;
// Keep the per-shard arena small; larger dictionaries grow
// geometrically up to the historical 1 MiB chunk size.
const SIZE_HINT: usize = 64 * 1024;
const MAX_SIZE_HINT: usize = 1024 * 1024;

pub fn try_with_capacity(capacity: usize) -> Result<Self, SetError> {
let arena = ChainAllocator::new_in(Self::SIZE_HINT, VirtualAllocator {});
let arena = ChainAllocator::new_capped_in(
Self::SIZE_HINT,
Self::MAX_SIZE_HINT,
VirtualAllocator {},
);

let mut slices = HashTable::new();
// SAFETY: we just made the empty hash table, so there's nothing that
Expand Down
Loading
Loading