From 77ebcd423e63a96a837bd006e0747c364b77b1f2 Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Wed, 30 Jul 2025 20:51:48 -0400 Subject: [PATCH] chore(stringtheory): add Interner trait to generalize interners --- .../src/destinations/prometheus/mod.rs | 5 +++- lib/saluki-context/src/resolver.rs | 5 +++- .../src/workload/collectors/containerd.rs | 2 +- .../collectors/remote_agent/tagger.rs | 5 +++- .../collectors/remote_agent/workloadmeta.rs | 5 +++- .../src/workload/helpers/cgroups.rs | 5 +++- lib/saluki-env/src/workload/on_demand_pid.rs | 4 ++++ lib/stringtheory/src/interning/fixed_size.rs | 24 +++++++------------ lib/stringtheory/src/interning/map.rs | 24 +++++++------------ lib/stringtheory/src/interning/mod.rs | 20 ++++++++++++++++ lib/stringtheory/src/lib.rs | 1 + 11 files changed, 64 insertions(+), 36 deletions(-) diff --git a/lib/saluki-components/src/destinations/prometheus/mod.rs b/lib/saluki-components/src/destinations/prometheus/mod.rs index 4939928cef..397944d19f 100644 --- a/lib/saluki-components/src/destinations/prometheus/mod.rs +++ b/lib/saluki-components/src/destinations/prometheus/mod.rs @@ -25,7 +25,10 @@ use saluki_io::net::{ ListenAddress, }; use serde::Deserialize; -use stringtheory::{interning::FixedSizeInterner, MetaString}; +use stringtheory::{ + interning::{FixedSizeInterner, Interner as _}, + MetaString, +}; use tokio::{select, sync::RwLock}; use tracing::debug; diff --git a/lib/saluki-context/src/resolver.rs b/lib/saluki-context/src/resolver.rs index 948a862ab5..50d26766be 100644 --- a/lib/saluki-context/src/resolver.rs +++ b/lib/saluki-context/src/resolver.rs @@ -7,7 +7,10 @@ use saluki_common::{ }; use saluki_error::{generic_error, GenericError}; use saluki_metrics::static_metrics; -use stringtheory::{interning::GenericMapInterner, CheapMetaString, MetaString}; +use stringtheory::{ + interning::{GenericMapInterner, Interner as _}, + CheapMetaString, MetaString, +}; use tokio::time::sleep; use tracing::debug; diff --git a/lib/saluki-env/src/workload/collectors/containerd.rs b/lib/saluki-env/src/workload/collectors/containerd.rs index 871655b740..07c3eb1348 100644 --- a/lib/saluki-env/src/workload/collectors/containerd.rs +++ b/lib/saluki-env/src/workload/collectors/containerd.rs @@ -9,7 +9,7 @@ use saluki_config::GenericConfiguration; use saluki_error::GenericError; use saluki_health::Health; use saluki_metrics::static_metrics; -use stringtheory::interning::GenericMapInterner; +use stringtheory::interning::{GenericMapInterner, Interner as _}; use tokio::{select, sync::mpsc, time::sleep}; use tracing::{debug, error, warn}; diff --git a/lib/saluki-env/src/workload/collectors/remote_agent/tagger.rs b/lib/saluki-env/src/workload/collectors/remote_agent/tagger.rs index aecaf56ac0..9cb2c9c7e0 100644 --- a/lib/saluki-env/src/workload/collectors/remote_agent/tagger.rs +++ b/lib/saluki-env/src/workload/collectors/remote_agent/tagger.rs @@ -9,7 +9,10 @@ use saluki_context::{ }; use saluki_error::GenericError; use saluki_health::Health; -use stringtheory::{interning::GenericMapInterner, MetaString}; +use stringtheory::{ + interning::{GenericMapInterner, Interner as _}, + MetaString, +}; use tokio::{select, sync::mpsc}; use tracing::{debug, trace, warn}; diff --git a/lib/saluki-env/src/workload/collectors/remote_agent/workloadmeta.rs b/lib/saluki-env/src/workload/collectors/remote_agent/workloadmeta.rs index e1b91e77c1..d9e9f6651d 100644 --- a/lib/saluki-env/src/workload/collectors/remote_agent/workloadmeta.rs +++ b/lib/saluki-env/src/workload/collectors/remote_agent/workloadmeta.rs @@ -7,7 +7,10 @@ use saluki_context::origin::ExternalData; use saluki_error::GenericError; use saluki_health::Health; use saluki_metrics::static_metrics; -use stringtheory::{interning::GenericMapInterner, MetaString}; +use stringtheory::{ + interning::{GenericMapInterner, Interner as _}, + MetaString, +}; use tokio::{select, sync::mpsc}; use tracing::{debug, trace}; diff --git a/lib/saluki-env/src/workload/helpers/cgroups.rs b/lib/saluki-env/src/workload/helpers/cgroups.rs index 2a4ecf0e29..b5184873a4 100644 --- a/lib/saluki-env/src/workload/helpers/cgroups.rs +++ b/lib/saluki-env/src/workload/helpers/cgroups.rs @@ -12,7 +12,10 @@ use std::{ use regex::Regex; use saluki_config::GenericConfiguration; use saluki_error::{generic_error, ErrorContext as _, GenericError}; -use stringtheory::{interning::GenericMapInterner, MetaString}; +use stringtheory::{ + interning::{GenericMapInterner, Interner as _}, + MetaString, +}; use tracing::{debug, error, trace}; use crate::features::{Feature, FeatureDetector}; diff --git a/lib/saluki-env/src/workload/on_demand_pid.rs b/lib/saluki-env/src/workload/on_demand_pid.rs index 44689dbb27..fa33b57c32 100644 --- a/lib/saluki-env/src/workload/on_demand_pid.rs +++ b/lib/saluki-env/src/workload/on_demand_pid.rs @@ -91,6 +91,8 @@ impl OnDemandPIDResolver { pub fn from_configuration( config: &GenericConfiguration, feature_detector: FeatureDetector, interner: GenericMapInterner, ) -> Result { + use stringtheory::interning::Interner as _; + let telemetry = Telemetry::new(); telemetry .interner_capacity_bytes() @@ -128,6 +130,8 @@ impl OnDemandPIDResolver { #[cfg(target_os = "linux")] async fn drive_telemetry(interner: GenericMapInterner, telemetry: Telemetry) { + use stringtheory::interning::Interner as _; + loop { sleep(Duration::from_secs(1)).await; diff --git a/lib/stringtheory/src/interning/fixed_size.rs b/lib/stringtheory/src/interning/fixed_size.rs index 4e5d3832ff..02b222d438 100644 --- a/lib/stringtheory/src/interning/fixed_size.rs +++ b/lib/stringtheory/src/interning/fixed_size.rs @@ -19,7 +19,7 @@ use loom::sync::{atomic::AtomicUsize, Arc, Mutex}; use super::{ helpers::{aligned, aligned_string, hash_string, layout_for_data, PackedLengthCapacity}, - InternedString, InternerVtable, ReclaimedEntries, ReclaimedEntry, + InternedString, Interner, InternerVtable, ReclaimedEntries, ReclaimedEntry, }; const HEADER_LEN: usize = std::mem::size_of::(); @@ -589,7 +589,7 @@ impl InternerState { /// ┗━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━┛ /// ▲ ▲ ▲ /// └────────── `EntryHeader` ──────────┘ └── aligned for `EntryHeader` -/// (8 byte alignment) via trailing padding  +/// (8 byte alignment) via trailing padding /// ``` /// /// The backing buffer is always aligned properly for `EntryHeader`, so that the first entry can be referenced @@ -643,32 +643,26 @@ impl FixedSizeInterner { state: Arc::new(InternerState::with_capacity(capacity)), } } +} - /// Returns `true` if the interner contains no strings. - pub fn is_empty(&self) -> bool { +impl Interner for FixedSizeInterner { + fn is_empty(&self) -> bool { self.state.is_empty() } - /// Returns the number of strings in the interner. - pub fn len(&self) -> usize { + fn len(&self) -> usize { self.state.len() } - /// Returns the total number of bytes in the interner. - pub fn len_bytes(&self) -> usize { + fn len_bytes(&self) -> usize { self.state.len_bytes() } - /// Returns the total number of bytes the interner can hold. - pub fn capacity_bytes(&self) -> usize { + fn capacity_bytes(&self) -> usize { self.state.capacity_bytes() } - /// Tries to intern the given string. - /// - /// If the intern is at capacity and the given string cannot fit, `None` is returned. Otherwise, `Some` is - /// returned with a reference to the interned string. - pub fn try_intern(&self, s: &str) -> Option { + fn try_intern(&self, s: &str) -> Option { self.state.try_intern(s) } } diff --git a/lib/stringtheory/src/interning/map.rs b/lib/stringtheory/src/interning/map.rs index 4c32285b2f..5b504ade78 100644 --- a/lib/stringtheory/src/interning/map.rs +++ b/lib/stringtheory/src/interning/map.rs @@ -20,7 +20,7 @@ use loom::sync::{atomic::AtomicUsize, Arc, Mutex}; use super::{ helpers::{aligned_string, layout_for_data, PackedLengthCapacity}, - InternedString, InternerVtable, ReclaimedEntries, ReclaimedEntry, + InternedString, Interner, InternerVtable, ReclaimedEntries, ReclaimedEntry, }; const HEADER_LEN: usize = std::mem::size_of::(); @@ -532,7 +532,7 @@ unsafe impl Sync for InternerState {} /// ┗━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━┛ /// ▲ ▲ ▲ /// └────────── `EntryHeader` ──────────┘ └── aligned for `EntryHeader` -/// (8 byte alignment) via trailing padding +/// (8 byte alignment) via trailing padding /// ``` /// /// The backing buffer is always aligned properly for `EntryHeader`, so that the first entry can be referenced @@ -586,32 +586,26 @@ impl GenericMapInterner { state: Arc::new(Mutex::new(InternerState::with_capacity(capacity))), } } +} - /// Returns `true` if the interner contains no strings. - pub fn is_empty(&self) -> bool { +impl Interner for GenericMapInterner { + fn is_empty(&self) -> bool { self.state.lock().unwrap().entries.is_empty() } - /// Returns the number of strings in the interner. - pub fn len(&self) -> usize { + fn len(&self) -> usize { self.state.lock().unwrap().entries.len() } - /// Returns the total number of bytes in the interner. - pub fn len_bytes(&self) -> usize { + fn len_bytes(&self) -> usize { self.state.lock().unwrap().storage.len } - /// Returns the total number of bytes the interner can hold. - pub fn capacity_bytes(&self) -> usize { + fn capacity_bytes(&self) -> usize { self.state.lock().unwrap().storage.capacity.get() } - /// Tries to intern the given string. - /// - /// If the intern is at capacity and the given string cannot fit, `None` is returned. Otherwise, `Some` is - /// returned with a reference to the interned string. - pub fn try_intern(&self, s: &str) -> Option { + fn try_intern(&self, s: &str) -> Option { let header = { let mut state = self.state.lock().unwrap(); state.try_intern(s)? diff --git a/lib/stringtheory/src/interning/mod.rs b/lib/stringtheory/src/interning/mod.rs index aeee90df80..83c3a6833b 100644 --- a/lib/stringtheory/src/interning/mod.rs +++ b/lib/stringtheory/src/interning/mod.rs @@ -9,6 +9,26 @@ mod helpers; mod map; pub use self::map::GenericMapInterner; +/// A string interner. +pub trait Interner { + /// Returns `true` if the interner contains no strings. + fn is_empty(&self) -> bool; + + /// Returns the number of strings in the interner. + fn len(&self) -> usize; + + /// Returns the total number of bytes in the interner. + fn len_bytes(&self) -> usize; + + /// Returns the total number of bytes the interner can hold. + fn capacity_bytes(&self) -> usize; + + /// Attempts to intern the given string. + /// + /// Returns `None` if the interner is full or the string cannot fit. + fn try_intern(&self, s: &str) -> Option; +} + pub(crate) struct InternerVtable { /// Name of the interner implementation that this string was interned with. pub interner_name: &'static str, diff --git a/lib/stringtheory/src/lib.rs b/lib/stringtheory/src/lib.rs index 5b3b4cf19a..368813283e 100644 --- a/lib/stringtheory/src/lib.rs +++ b/lib/stringtheory/src/lib.rs @@ -833,6 +833,7 @@ mod tests { use proptest::{prelude::*, proptest}; use super::{interning::GenericMapInterner, InlinedUnion, Inner, MetaString, UnionType}; + use crate::interning::Interner as _; #[test] fn struct_sizes() {