From c295145a8b6798681cf04563af959e79163f75e4 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Wed, 3 Jun 2026 23:30:09 +0200 Subject: [PATCH] extend storage metrics with bytes --- .../lib/docs_rs_storage/src/archive_index.rs | 28 +----- .../docs_rs_storage/src/backends/memory.rs | 7 +- crates/lib/docs_rs_storage/src/backends/s3.rs | 24 +++-- crates/lib/docs_rs_storage/src/metrics.rs | 88 ++++++++++++++++++- 4 files changed, 112 insertions(+), 35 deletions(-) diff --git a/crates/lib/docs_rs_storage/src/archive_index.rs b/crates/lib/docs_rs_storage/src/archive_index.rs index 21fa0ba46..2b3fe6319 100644 --- a/crates/lib/docs_rs_storage/src/archive_index.rs +++ b/crates/lib/docs_rs_storage/src/archive_index.rs @@ -1,6 +1,6 @@ use crate::{ PathNotFoundError, blob::StreamingBlob, config::ArchiveIndexCacheConfig, file::FolderEntry, - types::FileRange, utils::file_list::walk_dir_recursive, + metrics::FILE_SIZE_HISTOGRAM_BUCKETS, types::FileRange, utils::file_list::walk_dir_recursive, }; use anyhow::{Context as _, Result, anyhow, bail}; use async_stream::try_stream; @@ -68,28 +68,6 @@ impl Metrics { fn new(meter_provider: &AnyMeterProvider) -> Self { let meter = meter_provider.meter("storage"); const PREFIX: &str = "docsrs.storage.archive_index_cache"; - const KIB: f64 = 1024.0; - const MIB: f64 = 1024.0 * KIB; - const GIB: f64 = 1024.0 * MIB; - - let entry_size_boundaries = vec![ - 500.0 * KIB, - 1.0 * MIB, - 2.0 * MIB, - 4.0 * MIB, - 8.0 * MIB, - 16.0 * MIB, - 32.0 * MIB, - 64.0 * MIB, - 128.0 * MIB, - 256.0 * MIB, - 512.0 * MIB, - 1.0 * GIB, - 2.0 * GIB, - 4.0 * GIB, - 8.0 * GIB, - 10.0 * GIB, - ]; Self { find_calls: meter @@ -115,12 +93,12 @@ impl Metrics { evicted_entry_size: meter .u64_histogram(format!("{PREFIX}.evicted_entry_size")) .with_unit("By") - .with_boundaries(entry_size_boundaries.clone()) + .with_boundaries(FILE_SIZE_HISTOGRAM_BUCKETS.to_vec()) .build(), downloaded_entry_size: meter .u64_histogram(format!("{PREFIX}.downloaded_entry_size")) .with_unit("By") - .with_boundaries(entry_size_boundaries) + .with_boundaries(FILE_SIZE_HISTOGRAM_BUCKETS.to_vec()) .build(), weighted_size_bytes: meter .u64_gauge(format!("{PREFIX}.weighted_size_bytes")) diff --git a/crates/lib/docs_rs_storage/src/backends/memory.rs b/crates/lib/docs_rs_storage/src/backends/memory.rs index 818c1e4f4..9d93f4afa 100644 --- a/crates/lib/docs_rs_storage/src/backends/memory.rs +++ b/crates/lib/docs_rs_storage/src/backends/memory.rs @@ -3,7 +3,7 @@ use crate::{ backends::StorageBackendMethods, blob::{StreamUpload, StreamUploadSource, StreamingBlob}, errors::PathNotFoundError, - metrics::StorageMetrics, + metrics::{StorageMetrics, UploadType}, types::FileRange, }; use anyhow::{Result, anyhow}; @@ -60,6 +60,7 @@ impl StorageBackendMethods for MemoryBackend { StreamUploadSource::Bytes(content) => content.to_vec(), StreamUploadSource::File(path) => fs::read(&path).await?, }; + let content_len = content.len(); let blob = Blob { path, @@ -70,7 +71,9 @@ impl StorageBackendMethods for MemoryBackend { compression, }; - self.otel_metrics.uploaded_files.add(1, &[]); + self.otel_metrics + .record_upload_metrics(content_len as u64, UploadType::Single); + self.objects.insert(blob.path.clone(), blob); Ok(()) } diff --git a/crates/lib/docs_rs_storage/src/backends/s3.rs b/crates/lib/docs_rs_storage/src/backends/s3.rs index c1cd6ccd6..d1cebc9e0 100644 --- a/crates/lib/docs_rs_storage/src/backends/s3.rs +++ b/crates/lib/docs_rs_storage/src/backends/s3.rs @@ -4,7 +4,7 @@ use crate::{ blob::{StreamUpload, StreamUploadSource, StreamingBlob}, crc32_for_path, errors::PathNotFoundError, - metrics::StorageMetrics, + metrics::{StorageMetrics, UploadType}, types::FileRange, utils::crc32::crc32_for_path_range, }; @@ -200,7 +200,8 @@ impl S3Backend { request.send().await?; - self.otel_metrics.uploaded_files.add(1, &[]); + self.otel_metrics + .record_upload_metrics(content_length, UploadType::Single); Ok(()) } @@ -256,7 +257,8 @@ impl S3Backend { match result { Ok(()) => { - self.otel_metrics.uploaded_files.add(1, &[]); + self.otel_metrics + .record_upload_metrics(content_length, UploadType::MultiPart); Ok(()) } Err(err) => { @@ -436,6 +438,17 @@ impl StorageBackendMethods for S3Backend { None }; + let content_length: usize = res + .content_length + .and_then(|length| length.try_into().ok()) + .unwrap_or(0); + + // NOTE: we record the download, even though we don't know if the caller + // actually consumes the stream. + // For the current usage, that's fine. + self.otel_metrics + .record_download_metrics(content_length as u64); + Ok(StreamingBlob { path: path.into(), mime: res @@ -446,10 +459,7 @@ impl StorageBackendMethods for S3Backend { .unwrap_or(mime::APPLICATION_OCTET_STREAM), date_updated, etag, - content_length: res - .content_length - .and_then(|length| length.try_into().ok()) - .unwrap_or(0), + content_length, content: Box::new(res.body.into_async_read()), compression, }) diff --git a/crates/lib/docs_rs_storage/src/metrics.rs b/crates/lib/docs_rs_storage/src/metrics.rs index 53bcd34f6..efa6d2c5c 100644 --- a/crates/lib/docs_rs_storage/src/metrics.rs +++ b/crates/lib/docs_rs_storage/src/metrics.rs @@ -1,9 +1,59 @@ use docs_rs_opentelemetry::AnyMeterProvider; -use opentelemetry::metrics::Counter; +use opentelemetry::{ + KeyValue, + metrics::{Counter, Histogram}, +}; +use strum::IntoStaticStr; + +const KIB: f64 = 1024.0; +const MIB: f64 = 1024.0 * KIB; +const GIB: f64 = 1024.0 * MIB; + +/// boundaries for histogram metrics where we track +/// file sizes on our S3 bucket. +/// This has to include: +/// * zip archives (between 500 KiB & 10 GiB) +/// * archive indexes (between <100 KiB & 500 MiB) +/// * plain old html / css files (mostly super small) +pub(crate) const FILE_SIZE_HISTOGRAM_BUCKETS: &[f64] = &[ + KIB, + 4.0 * KIB, + 16.0 * KIB, + 64.0 * KIB, + 256.0 * KIB, + 512.0 * KIB, + MIB, + 2.0 * MIB, + 4.0 * MIB, + 8.0 * MIB, + 16.0 * MIB, + 32.0 * MIB, + 64.0 * MIB, + 128.0 * MIB, + 256.0 * MIB, + 512.0 * MIB, + GIB, + 2.0 * GIB, + 4.0 * GIB, + 8.0 * GIB, + 10.0 * GIB, +]; + +#[derive(Copy, Clone, IntoStaticStr)] +#[strum(serialize_all = "snake_case")] +pub(crate) enum UploadType { + Single, + MultiPart, +} #[derive(Debug)] pub(crate) struct StorageMetrics { pub(crate) uploaded_files: Counter, + pub(crate) uploaded_bytes: Counter, + pub(crate) uploaded_entry_size: Histogram, + pub(crate) downloaded_files: Counter, + pub(crate) downloaded_bytes: Counter, + pub(crate) downloaded_entry_size: Histogram, } impl StorageMetrics { @@ -16,6 +66,42 @@ impl StorageMetrics { .u64_counter(format!("{PREFIX}.uploaded_files")) .with_unit("1") .build(), + uploaded_bytes: meter + .u64_counter(format!("{PREFIX}.uploaded_bytes")) + .with_unit("By") + .build(), + uploaded_entry_size: meter + .u64_histogram(format!("{PREFIX}.uploaded_entry_size")) + .with_unit("By") + .with_boundaries(FILE_SIZE_HISTOGRAM_BUCKETS.to_vec()) + .build(), + downloaded_files: meter + .u64_counter(format!("{PREFIX}.downloaded_files")) + .with_unit("1") + .build(), + downloaded_bytes: meter + .u64_counter(format!("{PREFIX}.downloaded_bytes")) + .with_unit("By") + .build(), + downloaded_entry_size: meter + .u64_histogram(format!("{PREFIX}.downloaded_entry_size")) + .with_unit("By") + .with_boundaries(FILE_SIZE_HISTOGRAM_BUCKETS.to_vec()) + .build(), } } + + pub(crate) fn record_download_metrics(&self, content_length: u64) { + self.downloaded_files.add(1, &[]); + self.downloaded_bytes.add(content_length, &[]); + self.downloaded_entry_size.record(content_length, &[]); + } + + pub(crate) fn record_upload_metrics(&self, content_length: u64, via: UploadType) { + let via: &str = via.into(); + let via = [KeyValue::new("via", via)]; + self.uploaded_files.add(1, &via); + self.uploaded_bytes.add(content_length, &via); + self.uploaded_entry_size.record(content_length, &via); + } }