From 05704f7a15f2a2d98ae6c7d38f6c2a7b7fce888c Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Thu, 26 Feb 2026 19:53:24 +0100 Subject: [PATCH 01/10] Fix autodetect process crash from oversized field values by truncating at 256 characters --- docs/CHANGELOG.asciidoc | 4 ++ include/model/CFieldValueTruncator.h | 62 ++++++++++++++++++ lib/api/CAnomalyJob.cc | 13 +++- lib/api/CDataProcessor.cc | 10 ++- lib/api/unittest/CAnomalyJobTest.cc | 50 +++++++++++++++ lib/model/CBucketGatherer.cc | 4 ++ lib/model/CDynamicStringIdRegistry.cc | 5 +- lib/model/CEventRateBucketGatherer.cc | 2 + lib/model/CGathererTools.cc | 3 + .../unittest/CDynamicStringIdRegistryTest.cc | 31 +++++++++ .../unittest/CFieldValueTruncatorTest.cc | 64 +++++++++++++++++++ lib/model/unittest/CMakeLists.txt | 1 + 12 files changed, 246 insertions(+), 3 deletions(-) create mode 100644 include/model/CFieldValueTruncator.h create mode 100644 lib/model/unittest/CFieldValueTruncatorTest.cc diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index fa2d532256..ff1b4c731e 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -30,6 +30,10 @@ == {es} version 9.4.0 +=== Bug Fixes + +* Truncate oversized field values to prevent autodetect process crash. (See {ml-issue}2796[#2796].) + === Enhancements * Better handling of invalid JSON state documents (See {ml-pull}[]#2895].) diff --git a/include/model/CFieldValueTruncator.h b/include/model/CFieldValueTruncator.h new file mode 100644 index 0000000000..f56c2b7719 --- /dev/null +++ b/include/model/CFieldValueTruncator.h @@ -0,0 +1,62 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ +#ifndef INCLUDED_ml_model_CFieldValueTruncator_h +#define INCLUDED_ml_model_CFieldValueTruncator_h + +#include + +#include + +namespace ml { +namespace model { + +//! \brief Truncates field values to prevent memory amplification. +//! +//! DESCRIPTION:\n +//! Field values (by, over, partition, influencer) are term fields +//! in the anomaly detection domain. They are categorical identifiers, +//! not free text. Their length must be bounded to prevent excessive +//! memory consumption that could cause the autodetect process to crash. +//! +//! IMPLEMENTATION DECISIONS:\n +//! The limit of 256 characters aligns with Elasticsearch's +//! ignore_above default for keyword fields. This is sufficient for +//! meaningful anomaly detection field values while preventing memory +//! amplification from extremely long strings (e.g., 77K+ characters) +//! that have been observed to crash the autodetect process. +class MODEL_EXPORT CFieldValueTruncator { +public: + //! Maximum length for analysis term fields (by, over, partition, influencer). + //! Values longer than this are truncated to prevent excessive memory usage. + static constexpr std::size_t MAX_FIELD_VALUE_LENGTH = 256; + + //! In-place truncation of a field value. + //! \return true if truncation occurred, false if value was within limit. + static bool truncate(std::string& value) { + if (value.size() <= MAX_FIELD_VALUE_LENGTH) { + return false; + } + value.resize(MAX_FIELD_VALUE_LENGTH); + return true; + } + + //! Returns a truncated copy of the field value. Original unchanged. + static std::string truncated(const std::string& value) { + if (value.size() <= MAX_FIELD_VALUE_LENGTH) { + return value; + } + return value.substr(0, MAX_FIELD_VALUE_LENGTH); + } +}; +} +} + +#endif // INCLUDED_ml_model_CFieldValueTruncator_h diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index d7321cd2a1..8d30e35564 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -1706,8 +1707,18 @@ void CAnomalyJob::addRecord(const TAnomalyDetectorPtr& detector, model::CAnomalyDetector::TStrCPtrVec fieldValues; const TStrVec& fieldNames = detector->fieldsOfInterest(); fieldValues.reserve(fieldNames.size()); + TStrVec truncatedCopies; for (const auto& fieldName : fieldNames) { - fieldValues.push_back(fieldValue(fieldName, dataRowFields)); + const std::string* value = fieldValue(fieldName, dataRowFields); + if (value != nullptr && value->size() > model::CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH) { + truncatedCopies.push_back(model::CFieldValueTruncator::truncated(*value)); + fieldValues.push_back(&truncatedCopies.back()); + LOG_WARN(<< "Field '" << fieldName << "' value exceeds " + << model::CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH + << " characters and has been truncated"); + } else { + fieldValues.push_back(value); + } } detector->addRecord(time, fieldValues); diff --git a/lib/api/CDataProcessor.cc b/lib/api/CDataProcessor.cc index 93db8c4751..7638094f5e 100644 --- a/lib/api/CDataProcessor.cc +++ b/lib/api/CDataProcessor.cc @@ -15,6 +15,8 @@ #include #include +#include + namespace ml { namespace api { @@ -49,7 +51,13 @@ std::string CDataProcessor::debugPrintRecord(const TStrStrUMap& dataRowFields) { fieldValues.push_back(','); } fieldNames.append(rowIter->first); - fieldValues.append(rowIter->second); + const auto& val = rowIter->second; + if (val.size() > model::CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH) { + fieldValues.append(val, 0, model::CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH); + fieldValues.append("..."); + } else { + fieldValues.append(val); + } } result << fieldNames << core_t::LINE_ENDING << fieldValues; diff --git a/lib/api/unittest/CAnomalyJobTest.cc b/lib/api/unittest/CAnomalyJobTest.cc index d5384327ef..5cb277b966 100644 --- a/lib/api/unittest/CAnomalyJobTest.cc +++ b/lib/api/unittest/CAnomalyJobTest.cc @@ -1205,4 +1205,54 @@ BOOST_AUTO_TEST_CASE(testHierarchicalResultsNormalizerShouldIncreaseMemoryUsage) resourceMonitor.forceRefreshAll(); BOOST_TEST_REQUIRE(resourceMonitor.totalMemory() < memoryUsageBeforeUnregister); } + +BOOST_AUTO_TEST_CASE(testOversizedFieldValuesTruncated) { + model::CLimits limits; + api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( + "count", "", "by_field", "", "", {"influencer_field"}); + + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + std::stringstream outputStrm; + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); + + CTestAnomalyJob job("job", limits, jobConfig, modelConfig, wrappedOutputStream); + + std::string const oversizedValue(77000, 'x'); + CTestAnomalyJob::TStrStrUMap dataRows{ + {"time", "1000"}, {"by_field", oversizedValue}, {"influencer_field", oversizedValue}}; + + BOOST_TEST_REQUIRE(job.handleRecord(dataRows)); + BOOST_REQUIRE_EQUAL(uint64_t(1), job.numRecordsHandled()); +} + +BOOST_AUTO_TEST_CASE(testNormalFieldValuesNotTruncated) { + model::CLimits limits; + api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( + "count", "", "by_field", "", "", {"influencer_field"}); + + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + std::stringstream outputStrm; + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); + + CTestAnomalyJob job("job", limits, jobConfig, modelConfig, wrappedOutputStream); + + std::string const normalValue("normal_value"); + CTestAnomalyJob::TStrStrUMap dataRows{ + {"time", "1000"}, {"by_field", normalValue}, {"influencer_field", normalValue}}; + + BOOST_TEST_REQUIRE(job.handleRecord(dataRows)); + BOOST_REQUIRE_EQUAL(uint64_t(1), job.numRecordsHandled()); +} + +BOOST_AUTO_TEST_CASE(testDebugPrintRecordTruncatesLongValues) { + api::CDataProcessor::TStrStrUMap record; + record["field1"] = std::string(1000, 'x'); + record["field2"] = "short"; + std::string result = api::CDataProcessor::debugPrintRecord(record); + BOOST_TEST_REQUIRE(result.find("...") != std::string::npos); + BOOST_TEST_REQUIRE(result.size() < 1500); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/model/CBucketGatherer.cc b/lib/model/CBucketGatherer.cc index cdffd8d238..520561e7be 100644 --- a/lib/model/CBucketGatherer.cc +++ b/lib/model/CBucketGatherer.cc @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -116,6 +117,9 @@ bool restoreInfluencerPersonAttributeCounts(core::CStateRestoreTraverser& traver RESTORE_BUILT_IN(PERSON_UID_TAG, person) RESTORE_BUILT_IN(ATTRIBUTE_UID_TAG, attribute) RESTORE_NO_ERROR(INFLUENCER_TAG, influence = traverser.value()) + if (name == INFLUENCER_TAG) { + CFieldValueTruncator::truncate(influence); + } if (name == COUNT_TAG) { if (core::CStringUtils::stringToType(traverser.value(), count) == false) { LOG_ERROR(<< "Failed to restore COUNT_TAG, got " << traverser.value()); diff --git a/lib/model/CDynamicStringIdRegistry.cc b/lib/model/CDynamicStringIdRegistry.cc index 9974e8e15b..c293f664d4 100644 --- a/lib/model/CDynamicStringIdRegistry.cc +++ b/lib/model/CDynamicStringIdRegistry.cc @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -251,7 +252,9 @@ bool CDynamicStringIdRegistry::acceptRestoreTraverser(core::CStateRestoreTravers do { const std::string& name = traverser.name(); if (name == NAMES_TAG) { - m_Names.emplace_back(traverser.value()); + std::string value = traverser.value(); + CFieldValueTruncator::truncate(value); + m_Names.emplace_back(std::move(value)); } else if (name == FREE_NAMES_TAG) { if (!core::CPersistUtils::restore(FREE_NAMES_TAG, m_FreeUids, traverser)) { return false; diff --git a/lib/model/CEventRateBucketGatherer.cc b/lib/model/CEventRateBucketGatherer.cc index a01ddc9cdd..600719089e 100644 --- a/lib/model/CEventRateBucketGatherer.cc +++ b/lib/model/CEventRateBucketGatherer.cc @@ -27,6 +27,7 @@ #include #include +#include #include #include @@ -684,6 +685,7 @@ bool restoreInfluencerUniqueStrings(core::CStateRestoreTraverser& traverser, const std::string& name = traverser.name(); if (name == DICTIONARY_WORD_TAG) { key = traverser.value(); + CFieldValueTruncator::truncate(key); } else if (name == UNIQUE_WORD_TAG) { CUniqueStringFeatureData::TWord value; if (value.fromDelimited(traverser.value()) == false) { diff --git a/lib/model/CGathererTools.cc b/lib/model/CGathererTools.cc index 378e0ddd2a..2c9e48414c 100644 --- a/lib/model/CGathererTools.cc +++ b/lib/model/CGathererTools.cc @@ -23,6 +23,8 @@ #include #include +#include + #include namespace ml { @@ -89,6 +91,7 @@ struct SInfluencerSumSerializer { const std::string& name = traverser.name(); if (name == SUM_MAP_KEY_TAG) { key = traverser.value(); + CFieldValueTruncator::truncate(key); } else if (name == SUM_MAP_VALUE_TAG) { if (core::CStringUtils::stringToType(traverser.value(), map[key]) == false) { LOG_ERROR(<< "Invalid sum in " << traverser.value()); diff --git a/lib/model/unittest/CDynamicStringIdRegistryTest.cc b/lib/model/unittest/CDynamicStringIdRegistryTest.cc index 1d4aa16988..60cea2b1da 100644 --- a/lib/model/unittest/CDynamicStringIdRegistryTest.cc +++ b/lib/model/unittest/CDynamicStringIdRegistryTest.cc @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -109,4 +110,34 @@ BOOST_AUTO_TEST_CASE(testPersist) { BOOST_REQUIRE_EQUAL(restoredJson.str(), origJson.str()); } +BOOST_AUTO_TEST_CASE(testRestoreTruncatesOversizedNames) { + CResourceMonitor resourceMonitor; + CDynamicStringIdRegistry registry("person", counter_t::E_TSADNumberNewPeople, + counter_t::E_TSADNumberNewPeopleNotAllowed, + counter_t::E_TSADNumberNewPeopleRecycled); + + bool addedPerson = false; + std::string shortName("foo"); + std::string oversizedName(77000, 'x'); + registry.addName(shortName, 0, resourceMonitor, addedPerson); + registry.addName(oversizedName, 0, resourceMonitor, addedPerson); + + std::ostringstream origJson; + core::CJsonStatePersistInserter::persist( + origJson, std::bind_front(&CDynamicStringIdRegistry::acceptPersistInserter, ®istry)); + + std::istringstream is("{\"topLevel\" : " + origJson.str() + "}"); + core::CJsonStateRestoreTraverser traverser(is); + CDynamicStringIdRegistry restoredRegistry("person", counter_t::E_TSADNumberNewPeople, + counter_t::E_TSADNumberNewPeopleNotAllowed, + counter_t::E_TSADNumberNewPeopleRecycled); + traverser.traverseSubLevel(std::bind_front( + &CDynamicStringIdRegistry::acceptRestoreTraverser, &restoredRegistry)); + + BOOST_REQUIRE_EQUAL(2, restoredRegistry.numberNames()); + BOOST_REQUIRE_EQUAL(shortName, restoredRegistry.name(0, "")); + BOOST_REQUIRE_EQUAL(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, + restoredRegistry.name(1, "").size()); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/model/unittest/CFieldValueTruncatorTest.cc b/lib/model/unittest/CFieldValueTruncatorTest.cc new file mode 100644 index 0000000000..fe5033d496 --- /dev/null +++ b/lib/model/unittest/CFieldValueTruncatorTest.cc @@ -0,0 +1,64 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include + +#include + +BOOST_AUTO_TEST_SUITE(CFieldValueTruncatorTest) + +using namespace ml; +using namespace model; + +BOOST_AUTO_TEST_CASE(testShortValueUnchanged) { + std::string value("short"); + BOOST_REQUIRE_EQUAL(false, CFieldValueTruncator::truncate(value)); + BOOST_REQUIRE_EQUAL("short", value); +} + +BOOST_AUTO_TEST_CASE(testExactLimitUnchanged) { + std::string value(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, 'x'); + BOOST_REQUIRE_EQUAL(false, CFieldValueTruncator::truncate(value)); + BOOST_REQUIRE_EQUAL(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, value.size()); +} + +BOOST_AUTO_TEST_CASE(testOversizedValueTruncated) { + std::string value(1000, 'x'); + BOOST_REQUIRE_EQUAL(true, CFieldValueTruncator::truncate(value)); + BOOST_REQUIRE_EQUAL(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, value.size()); +} + +BOOST_AUTO_TEST_CASE(testEmptyValueUnchanged) { + std::string value; + BOOST_REQUIRE_EQUAL(false, CFieldValueTruncator::truncate(value)); + BOOST_REQUIRE_EQUAL(0, value.size()); +} + +BOOST_AUTO_TEST_CASE(testConstOverloadReturnsNewString) { + const std::string longValue(1000, 'x'); + std::string result = CFieldValueTruncator::truncated(longValue); + BOOST_REQUIRE_EQUAL(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, result.size()); + BOOST_REQUIRE_EQUAL(1000, longValue.size()); +} + +BOOST_AUTO_TEST_CASE(testConstOverloadShortValueReturnsSame) { + const std::string shortValue("short"); + std::string result = CFieldValueTruncator::truncated(shortValue); + BOOST_REQUIRE_EQUAL("short", result); +} + +BOOST_AUTO_TEST_CASE(testVeryLargeValueTruncated) { + std::string value(77000, 'y'); + BOOST_REQUIRE_EQUAL(true, CFieldValueTruncator::truncate(value)); + BOOST_REQUIRE_EQUAL(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, value.size()); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/model/unittest/CMakeLists.txt b/lib/model/unittest/CMakeLists.txt index 8e6d6dcf48..e8f64ac6b5 100644 --- a/lib/model/unittest/CMakeLists.txt +++ b/lib/model/unittest/CMakeLists.txt @@ -22,6 +22,7 @@ set (SRCS CDetectionRuleTest.cc CDetectorEqualizerTest.cc CDynamicStringIdRegistryTest.cc + CFieldValueTruncatorTest.cc CEventRateAnomalyDetectorTest.cc CEventRateDataGathererTest.cc CEventRateModelTest.cc From 4c6e66e6c53e72e2a6cdf4d194fb811c79a76424 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Thu, 26 Feb 2026 20:26:49 +0100 Subject: [PATCH 02/10] refactor --- docs/CHANGELOG.asciidoc | 2 +- include/model/CFieldValueTruncator.h | 12 ++++++++++-- lib/api/CAnomalyJob.cc | 2 +- lib/api/CDataProcessor.cc | 6 +++--- lib/api/unittest/CAnomalyJobTest.cc | 5 +++-- lib/model/unittest/CFieldValueTruncatorTest.cc | 11 +++++++++++ 6 files changed, 29 insertions(+), 9 deletions(-) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index ff1b4c731e..4de997996f 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -32,7 +32,7 @@ === Bug Fixes -* Truncate oversized field values to prevent autodetect process crash. (See {ml-issue}2796[#2796].) +* Truncate oversized field values to prevent autodetect process crash. (See {ml-pull}2929[#2929].) === Enhancements diff --git a/include/model/CFieldValueTruncator.h b/include/model/CFieldValueTruncator.h index f56c2b7719..6171ed83b7 100644 --- a/include/model/CFieldValueTruncator.h +++ b/include/model/CFieldValueTruncator.h @@ -38,10 +38,17 @@ class MODEL_EXPORT CFieldValueTruncator { //! Values longer than this are truncated to prevent excessive memory usage. static constexpr std::size_t MAX_FIELD_VALUE_LENGTH = 256; + //! Check if a field value needs truncation. + //! This avoids creating copies when checking if truncation is necessary. + //! \return true if the value exceeds MAX_FIELD_VALUE_LENGTH. + static bool needsTruncation(const std::string& value) { + return value.size() > MAX_FIELD_VALUE_LENGTH; + } + //! In-place truncation of a field value. //! \return true if truncation occurred, false if value was within limit. static bool truncate(std::string& value) { - if (value.size() <= MAX_FIELD_VALUE_LENGTH) { + if (!needsTruncation(value)) { return false; } value.resize(MAX_FIELD_VALUE_LENGTH); @@ -49,8 +56,9 @@ class MODEL_EXPORT CFieldValueTruncator { } //! Returns a truncated copy of the field value. Original unchanged. + //! Use needsTruncation() first if you want to avoid copying. static std::string truncated(const std::string& value) { - if (value.size() <= MAX_FIELD_VALUE_LENGTH) { + if (!needsTruncation(value)) { return value; } return value.substr(0, MAX_FIELD_VALUE_LENGTH); diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index 8d30e35564..6e2951f597 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -1710,7 +1710,7 @@ void CAnomalyJob::addRecord(const TAnomalyDetectorPtr& detector, TStrVec truncatedCopies; for (const auto& fieldName : fieldNames) { const std::string* value = fieldValue(fieldName, dataRowFields); - if (value != nullptr && value->size() > model::CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH) { + if (value != nullptr && model::CFieldValueTruncator::needsTruncation(*value)) { truncatedCopies.push_back(model::CFieldValueTruncator::truncated(*value)); fieldValues.push_back(&truncatedCopies.back()); LOG_WARN(<< "Field '" << fieldName << "' value exceeds " diff --git a/lib/api/CDataProcessor.cc b/lib/api/CDataProcessor.cc index 7638094f5e..61b792417b 100644 --- a/lib/api/CDataProcessor.cc +++ b/lib/api/CDataProcessor.cc @@ -51,9 +51,9 @@ std::string CDataProcessor::debugPrintRecord(const TStrStrUMap& dataRowFields) { fieldValues.push_back(','); } fieldNames.append(rowIter->first); - const auto& val = rowIter->second; - if (val.size() > model::CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH) { - fieldValues.append(val, 0, model::CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH); + const std::string& val = rowIter->second; + if (model::CFieldValueTruncator::needsTruncation(val)) { + fieldValues.append(model::CFieldValueTruncator::truncated(val)); fieldValues.append("..."); } else { fieldValues.append(val); diff --git a/lib/api/unittest/CAnomalyJobTest.cc b/lib/api/unittest/CAnomalyJobTest.cc index 5cb277b966..681a316641 100644 --- a/lib/api/unittest/CAnomalyJobTest.cc +++ b/lib/api/unittest/CAnomalyJobTest.cc @@ -1219,8 +1219,9 @@ BOOST_AUTO_TEST_CASE(testOversizedFieldValuesTruncated) { CTestAnomalyJob job("job", limits, jobConfig, modelConfig, wrappedOutputStream); std::string const oversizedValue(77000, 'x'); - CTestAnomalyJob::TStrStrUMap dataRows{ - {"time", "1000"}, {"by_field", oversizedValue}, {"influencer_field", oversizedValue}}; + CTestAnomalyJob::TStrStrUMap dataRows{{"time", "1000"}, + {"by_field", oversizedValue}, + {"influencer_field", oversizedValue}}; BOOST_TEST_REQUIRE(job.handleRecord(dataRows)); BOOST_REQUIRE_EQUAL(uint64_t(1), job.numRecordsHandled()); diff --git a/lib/model/unittest/CFieldValueTruncatorTest.cc b/lib/model/unittest/CFieldValueTruncatorTest.cc index fe5033d496..447ab7250d 100644 --- a/lib/model/unittest/CFieldValueTruncatorTest.cc +++ b/lib/model/unittest/CFieldValueTruncatorTest.cc @@ -61,4 +61,15 @@ BOOST_AUTO_TEST_CASE(testVeryLargeValueTruncated) { BOOST_REQUIRE_EQUAL(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, value.size()); } +BOOST_AUTO_TEST_CASE(testNeedsTruncation) { + BOOST_REQUIRE_EQUAL(false, CFieldValueTruncator::needsTruncation("short")); + BOOST_REQUIRE_EQUAL(false, CFieldValueTruncator::needsTruncation("")); + BOOST_REQUIRE_EQUAL(false, CFieldValueTruncator::needsTruncation(std::string( + CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, 'x'))); + BOOST_REQUIRE_EQUAL(true, CFieldValueTruncator::needsTruncation(std::string( + CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH + 1, 'x'))); + BOOST_REQUIRE_EQUAL( + true, CFieldValueTruncator::needsTruncation(std::string(77000, 'x'))); +} + BOOST_AUTO_TEST_SUITE_END() From 310f228c603930836853d96d6409444c9e312cef Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Fri, 27 Feb 2026 16:23:38 +0100 Subject: [PATCH 03/10] implement hash suffix --- include/api/CAnomalyJob.h | 12 ++ include/model/CFieldValueTruncator.h | 133 ++++++++++++++---- lib/api/CAnomalyJob.cc | 35 +++-- lib/api/CDataProcessor.cc | 2 +- .../unittest/CFieldValueTruncatorTest.cc | 109 +++++++++++++- 5 files changed, 251 insertions(+), 40 deletions(-) diff --git a/include/api/CAnomalyJob.h b/include/api/CAnomalyJob.h index 279a597936..4167c649a7 100644 --- a/include/api/CAnomalyJob.h +++ b/include/api/CAnomalyJob.h @@ -392,6 +392,18 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { core_t::TTime time, const TStrStrUMap& dataRowFields); + //! Prepare field values with truncation handling. + //! Extracts field values from \p dataRowFields, truncates oversized values, + //! and populates \p fieldValues with pointers to either original or truncated values. + //! \param fieldNames The names of fields to extract + //! \param dataRowFields The data row containing field values + //! \param fieldValues Output vector of pointers to field values + //! \param truncatedCopies Storage for truncated copies (must remain valid while fieldValues is used) + static void prepareTruncatedFieldValues(const TStrVec& fieldNames, + const TStrStrUMap& dataRowFields, + model::CAnomalyDetector::TStrCPtrVec& fieldValues, + TStrVec& truncatedCopies); + //! Parses a control message requesting that model state be persisted. //! Extracts optional arguments to be used for persistence. static bool parsePersistControlMessageArgs(const std::string& controlMessageArgs, diff --git a/include/model/CFieldValueTruncator.h b/include/model/CFieldValueTruncator.h index 6171ed83b7..f62c1f6d26 100644 --- a/include/model/CFieldValueTruncator.h +++ b/include/model/CFieldValueTruncator.h @@ -11,57 +11,138 @@ #ifndef INCLUDED_ml_model_CFieldValueTruncator_h #define INCLUDED_ml_model_CFieldValueTruncator_h +#include + #include +#include #include namespace ml { namespace model { -//! \brief Truncates field values to prevent memory amplification. +//! \brief Enforces term field length constraints with collision prevention. +//! +//! In the anomaly detection domain, term fields (by, over, partition, influencer) +//! are categorical identifiers that must satisfy two invariants: +//! 1. **Bounded Length** - Prevent memory amplification and OOM crashes +//! 2. **Unique Identity** - Distinct field values must remain distinguishable +//! +//! Values exceeding MAX_FIELD_VALUE_LENGTH (256 chars) are transformed using +//! collision-safe truncation: +//! - Retain PREFIX_LENGTH (240) characters of original value +//! - Append HASH_SEPARATOR ('$') +//! - Append HASH_HEX_DIGITS (15) character hex hash of complete original value //! -//! DESCRIPTION:\n -//! Field values (by, over, partition, influencer) are term fields -//! in the anomaly detection domain. They are categorical identifiers, -//! not free text. Their length must be bounded to prevent excessive -//! memory consumption that could cause the autodetect process to crash. +//! Format: "$" +//! Example: "very_long_field_value_that_exceeds_limit_and_continues_for_thousands_of_chars_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx$a1b2c3d4e5f6789" //! -//! IMPLEMENTATION DECISIONS:\n -//! The limit of 256 characters aligns with Elasticsearch's -//! ignore_above default for keyword fields. This is sufficient for -//! meaningful anomaly detection field values while preventing memory -//! amplification from extremely long strings (e.g., 77K+ characters) -//! that have been observed to crash the autodetect process. +//! The 256-character limit aligns with Elasticsearch's ignore_above default +//! for keyword fields. The hash suffix ensures data integrity while maintaining +//! human readability (first 240 characters visible) and compatibility with +//! prefix-based filtering. Collision probability is ~1 in 10^18 (effectively zero). class MODEL_EXPORT CFieldValueTruncator { public: - //! Maximum length for analysis term fields (by, over, partition, influencer). - //! Values longer than this are truncated to prevent excessive memory usage. + //! Domain constraint: Maximum length for term fields in anomaly detection. + //! Aligned with Elasticsearch's ignore_above default for keyword fields. static constexpr std::size_t MAX_FIELD_VALUE_LENGTH = 256; - //! Check if a field value needs truncation. - //! This avoids creating copies when checking if truncation is necessary. - //! \return true if the value exceeds MAX_FIELD_VALUE_LENGTH. + //! Collision prevention format components + static constexpr char HASH_SEPARATOR = '$'; + static constexpr std::size_t HASH_HEX_DIGITS = 15; // 15 hex chars for uint64_t + static constexpr std::size_t HASH_SUFFIX_LENGTH = + 1 /* separator */ + HASH_HEX_DIGITS; // 16 total + + //! Content prefix length (readable portion after truncation) + static constexpr std::size_t PREFIX_LENGTH = + MAX_FIELD_VALUE_LENGTH - HASH_SUFFIX_LENGTH; // 240 + + // Domain invariants (enforced at compile-time) + static_assert(PREFIX_LENGTH + HASH_SUFFIX_LENGTH == MAX_FIELD_VALUE_LENGTH, + "Term field format invariant: prefix + suffix = total length"); + static_assert(PREFIX_LENGTH >= 200, + "Readable prefix must be substantial for human comprehension"); + static_assert(HASH_HEX_DIGITS * 4 <= 64, + "Hash hex digits must fit in 64-bit hash output"); + + //! Check if a term field value exceeds the domain constraint. + //! \return true if the value requires length enforcement static bool needsTruncation(const std::string& value) { return value.size() > MAX_FIELD_VALUE_LENGTH; } - //! In-place truncation of a field value. - //! \return true if truncation occurred, false if value was within limit. + //! Enforce term field length constraint in-place. + //! Applies collision-safe truncation for values exceeding the limit. + //! \param[in,out] value Field value to constrain + //! \return true if truncation was applied, false if already within limit static bool truncate(std::string& value) { - if (!needsTruncation(value)) { + if (needsTruncation(value) == false) { return false; } - value.resize(MAX_FIELD_VALUE_LENGTH); + + std::string originalValue = std::move(value); + value.assign(originalValue, 0, PREFIX_LENGTH); + appendCollisionPreventionSuffix(originalValue, value); + return true; } - //! Returns a truncated copy of the field value. Original unchanged. - //! Use needsTruncation() first if you want to avoid copying. + //! Enforce term field length constraint, returning constrained copy. + //! Original value unchanged. For performance, call needsTruncation() first + //! to avoid copying when constraint is already satisfied. + //! \param value Original field value + //! \return Copy with length constraint enforced static std::string truncated(const std::string& value) { - if (!needsTruncation(value)) { - return value; + if (needsTruncation(value) == false) { + return value; // RVO applies + } + + std::string result; + result.reserve(MAX_FIELD_VALUE_LENGTH); + result.assign(value, 0, PREFIX_LENGTH); + appendCollisionPreventionSuffix(value, result); + + return result; + } + +private: + //! \brief Hash encoding for collision prevention. + //! + //! Encapsulates the technical details of hash computation and formatting. + //! Separated from domain logic for clarity and testability. + struct HashEncoding { + //! Compute collision-resistant identity hash. + //! Uses safeMurmurHash64 (endian-neutral) for state persistence safety. + static std::uint64_t compute(const std::string& value) { + return core::CHashing::safeMurmurHash64( + value.data(), static_cast(value.size()), + 0); // Fixed seed for determinism } - return value.substr(0, MAX_FIELD_VALUE_LENGTH); + + //! Format 64-bit hash as zero-padded lowercase hex string. + //! \param hash The hash value to format + //! \param[out] buffer Must be at least HASH_HEX_DIGITS + 1 bytes + //! \return Pointer to null-terminated hex string in buffer + static const char* toHex(std::uint64_t hash, char* buffer) { + // %015llx produces 15-char zero-padded lowercase hex + std::snprintf(buffer, HASH_HEX_DIGITS + 1, "%015llx", + static_cast(hash)); + return buffer; + } + }; + + //! Append collision-prevention suffix: separator + hash. + //! \param originalValue Complete untruncated value for hash computation + //! \param[in,out] prefix Truncated prefix to which suffix is appended + static void appendCollisionPreventionSuffix(const std::string& originalValue, + std::string& prefix) { + std::uint64_t identityHash = HashEncoding::compute(originalValue); + + prefix.reserve(MAX_FIELD_VALUE_LENGTH); + prefix.push_back(HASH_SEPARATOR); + + char hashHexBuffer[HASH_HEX_DIGITS + 1]; + prefix.append(HashEncoding::toHex(identityHash, hashHexBuffer)); } }; } diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index 6e2951f597..e607bd9a3e 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -1701,25 +1701,42 @@ const std::string* CAnomalyJob::fieldValue(const std::string& fieldName, return !fieldName.empty() && fieldValue.empty() ? nullptr : &fieldValue; } -void CAnomalyJob::addRecord(const TAnomalyDetectorPtr& detector, - core_t::TTime time, - const TStrStrUMap& dataRowFields) { - model::CAnomalyDetector::TStrCPtrVec fieldValues; - const TStrVec& fieldNames = detector->fieldsOfInterest(); +void CAnomalyJob::prepareTruncatedFieldValues( + const TStrVec& fieldNames, + const TStrStrUMap& dataRowFields, + model::CAnomalyDetector::TStrCPtrVec& fieldValues, + TStrVec& truncatedCopies) { + fieldValues.reserve(fieldNames.size()); - TStrVec truncatedCopies; + truncatedCopies.reserve(fieldNames.size()); + for (const auto& fieldName : fieldNames) { const std::string* value = fieldValue(fieldName, dataRowFields); if (value != nullptr && model::CFieldValueTruncator::needsTruncation(*value)) { truncatedCopies.push_back(model::CFieldValueTruncator::truncated(*value)); fieldValues.push_back(&truncatedCopies.back()); - LOG_WARN(<< "Field '" << fieldName << "' value exceeds " - << model::CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH - << " characters and has been truncated"); + + std::string escapedFieldName = fieldName; + core::CStringUtils::escape('\\', "\n\r\t", escapedFieldName); + LOG_WARN(<< "Field '" << escapedFieldName + << "' value (length=" << value->size() + << ", prefix='" << value->substr(0, std::min(50, value->size())) + << "...') exceeds " << model::CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH + << " characters and has been truncated with collision-safe hash suffix"); } else { fieldValues.push_back(value); } } +} + +void CAnomalyJob::addRecord(const TAnomalyDetectorPtr& detector, + core_t::TTime time, + const TStrStrUMap& dataRowFields) { + model::CAnomalyDetector::TStrCPtrVec fieldValues; + TStrVec truncatedCopies; + const TStrVec& fieldNames = detector->fieldsOfInterest(); + + prepareTruncatedFieldValues(fieldNames, dataRowFields, fieldValues, truncatedCopies); detector->addRecord(time, fieldValues); } diff --git a/lib/api/CDataProcessor.cc b/lib/api/CDataProcessor.cc index 61b792417b..3ffdb4915e 100644 --- a/lib/api/CDataProcessor.cc +++ b/lib/api/CDataProcessor.cc @@ -53,7 +53,7 @@ std::string CDataProcessor::debugPrintRecord(const TStrStrUMap& dataRowFields) { fieldNames.append(rowIter->first); const std::string& val = rowIter->second; if (model::CFieldValueTruncator::needsTruncation(val)) { - fieldValues.append(model::CFieldValueTruncator::truncated(val)); + fieldValues.append(val.substr(0, model::CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH)); fieldValues.append("..."); } else { fieldValues.append(val); diff --git a/lib/model/unittest/CFieldValueTruncatorTest.cc b/lib/model/unittest/CFieldValueTruncatorTest.cc index 447ab7250d..65c3777da3 100644 --- a/lib/model/unittest/CFieldValueTruncatorTest.cc +++ b/lib/model/unittest/CFieldValueTruncatorTest.cc @@ -18,19 +18,23 @@ BOOST_AUTO_TEST_SUITE(CFieldValueTruncatorTest) using namespace ml; using namespace model; +// ============================================================================ +// Constraint Enforcement Behavior +// ============================================================================ + BOOST_AUTO_TEST_CASE(testShortValueUnchanged) { std::string value("short"); BOOST_REQUIRE_EQUAL(false, CFieldValueTruncator::truncate(value)); BOOST_REQUIRE_EQUAL("short", value); } -BOOST_AUTO_TEST_CASE(testExactLimitUnchanged) { +BOOST_AUTO_TEST_CASE(testValueAtExactLimitUnchanged) { std::string value(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, 'x'); BOOST_REQUIRE_EQUAL(false, CFieldValueTruncator::truncate(value)); BOOST_REQUIRE_EQUAL(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, value.size()); } -BOOST_AUTO_TEST_CASE(testOversizedValueTruncated) { +BOOST_AUTO_TEST_CASE(testOversizedValueEnforcedTo256Chars) { std::string value(1000, 'x'); BOOST_REQUIRE_EQUAL(true, CFieldValueTruncator::truncate(value)); BOOST_REQUIRE_EQUAL(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, value.size()); @@ -42,7 +46,7 @@ BOOST_AUTO_TEST_CASE(testEmptyValueUnchanged) { BOOST_REQUIRE_EQUAL(0, value.size()); } -BOOST_AUTO_TEST_CASE(testConstOverloadReturnsNewString) { +BOOST_AUTO_TEST_CASE(testConstOverloadPreservesOriginal) { const std::string longValue(1000, 'x'); std::string result = CFieldValueTruncator::truncated(longValue); BOOST_REQUIRE_EQUAL(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, result.size()); @@ -55,7 +59,7 @@ BOOST_AUTO_TEST_CASE(testConstOverloadShortValueReturnsSame) { BOOST_REQUIRE_EQUAL("short", result); } -BOOST_AUTO_TEST_CASE(testVeryLargeValueTruncated) { +BOOST_AUTO_TEST_CASE(testVeryLargeValueFromIssue2796) { std::string value(77000, 'y'); BOOST_REQUIRE_EQUAL(true, CFieldValueTruncator::truncate(value)); BOOST_REQUIRE_EQUAL(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, value.size()); @@ -72,4 +76,101 @@ BOOST_AUTO_TEST_CASE(testNeedsTruncation) { true, CFieldValueTruncator::needsTruncation(std::string(77000, 'x'))); } +// ============================================================================ +// Hash Suffix Format Validation +// ============================================================================ + +BOOST_AUTO_TEST_CASE(testTruncatedValueHasCorrectFormat) { + std::string value(1000, 'x'); + std::string result = CFieldValueTruncator::truncated(value); + + // Format: 240 prefix + '$' + 15 hex chars = 256 total + BOOST_REQUIRE_EQUAL(256, result.size()); + BOOST_REQUIRE_EQUAL(CFieldValueTruncator::HASH_SEPARATOR, result[240]); + + // Prefix should match original + BOOST_REQUIRE_EQUAL(0, result.compare(0, 240, value, 0, 240)); + + // Hash portion should be lowercase hex digits + for (std::size_t i = 241; i < 256; ++i) { + BOOST_REQUIRE(std::isxdigit(result[i])); + BOOST_REQUIRE((result[i] >= '0' && result[i] <= '9') || + (result[i] >= 'a' && result[i] <= 'f')); + } +} + +BOOST_AUTO_TEST_CASE(testInPlaceTruncationPreservesFormat) { + std::string value(1000, 'z'); + bool wasTruncated = CFieldValueTruncator::truncate(value); + + BOOST_REQUIRE_EQUAL(true, wasTruncated); + BOOST_REQUIRE_EQUAL(256, value.size()); + BOOST_REQUIRE_EQUAL(CFieldValueTruncator::HASH_SEPARATOR, value[240]); + + // Verify hash portion is valid hex + for (std::size_t i = 241; i < 256; ++i) { + BOOST_REQUIRE(std::isxdigit(value[i])); + } +} + +// ============================================================================ +// Collision Prevention (Data Integrity) +// ============================================================================ + +BOOST_AUTO_TEST_CASE(testDistinctValuesProduceDistinctResults) { + std::string prefix(240, 'x'); + std::string value1 = prefix + std::string(1000, 'A'); + std::string value2 = prefix + std::string(1000, 'B'); + + std::string truncated1 = CFieldValueTruncator::truncated(value1); + std::string truncated2 = CFieldValueTruncator::truncated(value2); + + // Same prefix + BOOST_REQUIRE_EQUAL(truncated1.substr(0, 241), truncated2.substr(0, 241)); + + // But different hash suffixes prevent collision + BOOST_REQUIRE_NE(truncated1.substr(241), truncated2.substr(241)); + BOOST_REQUIRE_NE(truncated1, truncated2); +} + +BOOST_AUTO_TEST_CASE(testCollisionsPreventedByHashSuffix) { + // Two values differing only after position 256 (original collision case) + std::string value1(300, 'x'); + value1.replace(280, 20, "AAAAAAAAAAAAAAAAAAAA"); + + std::string value2(300, 'x'); + value2.replace(280, 20, "BBBBBBBBBBBBBBBBBBBB"); + + std::string truncated1 = CFieldValueTruncator::truncated(value1); + std::string truncated2 = CFieldValueTruncator::truncated(value2); + + // Must be distinct despite identical first 240 chars + BOOST_REQUIRE_NE(truncated1, truncated2); + BOOST_REQUIRE_EQUAL(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, truncated1.size()); + BOOST_REQUIRE_EQUAL(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, truncated2.size()); +} + +BOOST_AUTO_TEST_CASE(testDeterministicHashing) { + std::string value(1000, 'y'); + std::string result1 = CFieldValueTruncator::truncated(value); + std::string result2 = CFieldValueTruncator::truncated(value); + + BOOST_REQUIRE_EQUAL(result1, result2); +} + +BOOST_AUTO_TEST_CASE(testVeryLongValueWithDistinctEnding) { + // Simulate the 77K influencer case from issue #2796 + std::string value1(77000, 'x'); + value1.replace(76990, 10, "VARIANT_A"); + + std::string value2(77000, 'x'); + value2.replace(76990, 10, "VARIANT_B"); + + std::string truncated1 = CFieldValueTruncator::truncated(value1); + std::string truncated2 = CFieldValueTruncator::truncated(value2); + + // Must be distinct despite identical first 240 chars + BOOST_REQUIRE_NE(truncated1, truncated2); +} + BOOST_AUTO_TEST_SUITE_END() From 50379b457975e225ca82bed2af7699e9c4d37911 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Thu, 19 Mar 2026 10:24:04 +0100 Subject: [PATCH 04/10] Change hash suffix to 16 characters --- include/api/CAnomalyJob.h | 6 ++-- include/model/CFieldValueTruncator.h | 34 +++++++++---------- lib/api/CAnomalyJob.cc | 13 ++++--- .../unittest/CFieldValueTruncatorTest.cc | 26 +++++++------- 4 files changed, 38 insertions(+), 41 deletions(-) diff --git a/include/api/CAnomalyJob.h b/include/api/CAnomalyJob.h index 4167c649a7..fd1cc4bc88 100644 --- a/include/api/CAnomalyJob.h +++ b/include/api/CAnomalyJob.h @@ -400,9 +400,9 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { //! \param fieldValues Output vector of pointers to field values //! \param truncatedCopies Storage for truncated copies (must remain valid while fieldValues is used) static void prepareTruncatedFieldValues(const TStrVec& fieldNames, - const TStrStrUMap& dataRowFields, - model::CAnomalyDetector::TStrCPtrVec& fieldValues, - TStrVec& truncatedCopies); + const TStrStrUMap& dataRowFields, + model::CAnomalyDetector::TStrCPtrVec& fieldValues, + TStrVec& truncatedCopies); //! Parses a control message requesting that model state be persisted. //! Extracts optional arguments to be used for persistence. diff --git a/include/model/CFieldValueTruncator.h b/include/model/CFieldValueTruncator.h index f62c1f6d26..b044f1adfc 100644 --- a/include/model/CFieldValueTruncator.h +++ b/include/model/CFieldValueTruncator.h @@ -30,12 +30,12 @@ namespace model { //! //! Values exceeding MAX_FIELD_VALUE_LENGTH (256 chars) are transformed using //! collision-safe truncation: -//! - Retain PREFIX_LENGTH (240) characters of original value +//! - Retain PREFIX_LENGTH (239) characters of original value //! - Append HASH_SEPARATOR ('$') -//! - Append HASH_HEX_DIGITS (15) character hex hash of complete original value +//! - Append HASH_HEX_DIGITS (16) character hex hash of complete original value //! -//! Format: "$" -//! Example: "very_long_field_value_that_exceeds_limit_and_continues_for_thousands_of_chars_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx$a1b2c3d4e5f6789" +//! Format: "$" +//! Example: "very_long_field_value_that_exceeds_limit_and_continues_for_thousands_of_chars_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx$a1b2c3d4e5f67890" //! //! The 256-character limit aligns with Elasticsearch's ignore_above default //! for keyword fields. The hash suffix ensures data integrity while maintaining @@ -49,21 +49,19 @@ class MODEL_EXPORT CFieldValueTruncator { //! Collision prevention format components static constexpr char HASH_SEPARATOR = '$'; - static constexpr std::size_t HASH_HEX_DIGITS = 15; // 15 hex chars for uint64_t - static constexpr std::size_t HASH_SUFFIX_LENGTH = - 1 /* separator */ + HASH_HEX_DIGITS; // 16 total + static constexpr std::size_t HASH_HEX_DIGITS = 16; // 16 hex chars = full 64-bit hash + static constexpr std::size_t HASH_SUFFIX_LENGTH = 1 /* separator */ + HASH_HEX_DIGITS; // 17 total //! Content prefix length (readable portion after truncation) - static constexpr std::size_t PREFIX_LENGTH = - MAX_FIELD_VALUE_LENGTH - HASH_SUFFIX_LENGTH; // 240 + static constexpr std::size_t PREFIX_LENGTH = MAX_FIELD_VALUE_LENGTH - HASH_SUFFIX_LENGTH; // 239 // Domain invariants (enforced at compile-time) static_assert(PREFIX_LENGTH + HASH_SUFFIX_LENGTH == MAX_FIELD_VALUE_LENGTH, "Term field format invariant: prefix + suffix = total length"); static_assert(PREFIX_LENGTH >= 200, "Readable prefix must be substantial for human comprehension"); - static_assert(HASH_HEX_DIGITS * 4 <= 64, - "Hash hex digits must fit in 64-bit hash output"); + static_assert(HASH_HEX_DIGITS * 4 == 64, + "Hash hex digits must represent full 64-bit hash output"); //! Check if a term field value exceeds the domain constraint. //! \return true if the value requires length enforcement @@ -114,9 +112,9 @@ class MODEL_EXPORT CFieldValueTruncator { //! Compute collision-resistant identity hash. //! Uses safeMurmurHash64 (endian-neutral) for state persistence safety. static std::uint64_t compute(const std::string& value) { - return core::CHashing::safeMurmurHash64( - value.data(), static_cast(value.size()), - 0); // Fixed seed for determinism + return core::CHashing::safeMurmurHash64(value.data(), + static_cast(value.size()), + 0); // Fixed seed for determinism } //! Format 64-bit hash as zero-padded lowercase hex string. @@ -124,9 +122,9 @@ class MODEL_EXPORT CFieldValueTruncator { //! \param[out] buffer Must be at least HASH_HEX_DIGITS + 1 bytes //! \return Pointer to null-terminated hex string in buffer static const char* toHex(std::uint64_t hash, char* buffer) { - // %015llx produces 15-char zero-padded lowercase hex - std::snprintf(buffer, HASH_HEX_DIGITS + 1, "%015llx", - static_cast(hash)); + // %016llx produces 16-char zero-padded lowercase hex (full 64 bits) + std::snprintf(buffer, HASH_HEX_DIGITS + 1, "%016llx", + static_cast(hash)); return buffer; } }; @@ -135,7 +133,7 @@ class MODEL_EXPORT CFieldValueTruncator { //! \param originalValue Complete untruncated value for hash computation //! \param[in,out] prefix Truncated prefix to which suffix is appended static void appendCollisionPreventionSuffix(const std::string& originalValue, - std::string& prefix) { + std::string& prefix) { std::uint64_t identityHash = HashEncoding::compute(originalValue); prefix.reserve(MAX_FIELD_VALUE_LENGTH); diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index e607bd9a3e..af3256e037 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -1701,11 +1701,10 @@ const std::string* CAnomalyJob::fieldValue(const std::string& fieldName, return !fieldName.empty() && fieldValue.empty() ? nullptr : &fieldValue; } -void CAnomalyJob::prepareTruncatedFieldValues( - const TStrVec& fieldNames, - const TStrStrUMap& dataRowFields, - model::CAnomalyDetector::TStrCPtrVec& fieldValues, - TStrVec& truncatedCopies) { +void CAnomalyJob::prepareTruncatedFieldValues(const TStrVec& fieldNames, + const TStrStrUMap& dataRowFields, + model::CAnomalyDetector::TStrCPtrVec& fieldValues, + TStrVec& truncatedCopies) { fieldValues.reserve(fieldNames.size()); truncatedCopies.reserve(fieldNames.size()); @@ -1719,8 +1718,8 @@ void CAnomalyJob::prepareTruncatedFieldValues( std::string escapedFieldName = fieldName; core::CStringUtils::escape('\\', "\n\r\t", escapedFieldName); LOG_WARN(<< "Field '" << escapedFieldName - << "' value (length=" << value->size() - << ", prefix='" << value->substr(0, std::min(50, value->size())) + << "' value (length=" << value->size() << ", prefix='" + << value->substr(0, std::min(50, value->size())) << "...') exceeds " << model::CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH << " characters and has been truncated with collision-safe hash suffix"); } else { diff --git a/lib/model/unittest/CFieldValueTruncatorTest.cc b/lib/model/unittest/CFieldValueTruncatorTest.cc index 65c3777da3..6cc61174e2 100644 --- a/lib/model/unittest/CFieldValueTruncatorTest.cc +++ b/lib/model/unittest/CFieldValueTruncatorTest.cc @@ -84,18 +84,18 @@ BOOST_AUTO_TEST_CASE(testTruncatedValueHasCorrectFormat) { std::string value(1000, 'x'); std::string result = CFieldValueTruncator::truncated(value); - // Format: 240 prefix + '$' + 15 hex chars = 256 total + // Format: 239 prefix + '$' + 16 hex chars = 256 total BOOST_REQUIRE_EQUAL(256, result.size()); - BOOST_REQUIRE_EQUAL(CFieldValueTruncator::HASH_SEPARATOR, result[240]); + BOOST_REQUIRE_EQUAL(CFieldValueTruncator::HASH_SEPARATOR, result[239]); // Prefix should match original - BOOST_REQUIRE_EQUAL(0, result.compare(0, 240, value, 0, 240)); + BOOST_REQUIRE_EQUAL(0, result.compare(0, 239, value, 0, 239)); // Hash portion should be lowercase hex digits - for (std::size_t i = 241; i < 256; ++i) { + for (std::size_t i = 240; i < 256; ++i) { BOOST_REQUIRE(std::isxdigit(result[i])); BOOST_REQUIRE((result[i] >= '0' && result[i] <= '9') || - (result[i] >= 'a' && result[i] <= 'f')); + (result[i] >= 'a' && result[i] <= 'f')); } } @@ -105,10 +105,10 @@ BOOST_AUTO_TEST_CASE(testInPlaceTruncationPreservesFormat) { BOOST_REQUIRE_EQUAL(true, wasTruncated); BOOST_REQUIRE_EQUAL(256, value.size()); - BOOST_REQUIRE_EQUAL(CFieldValueTruncator::HASH_SEPARATOR, value[240]); + BOOST_REQUIRE_EQUAL(CFieldValueTruncator::HASH_SEPARATOR, value[239]); // Verify hash portion is valid hex - for (std::size_t i = 241; i < 256; ++i) { + for (std::size_t i = 240; i < 256; ++i) { BOOST_REQUIRE(std::isxdigit(value[i])); } } @@ -118,18 +118,18 @@ BOOST_AUTO_TEST_CASE(testInPlaceTruncationPreservesFormat) { // ============================================================================ BOOST_AUTO_TEST_CASE(testDistinctValuesProduceDistinctResults) { - std::string prefix(240, 'x'); + std::string prefix(239, 'x'); std::string value1 = prefix + std::string(1000, 'A'); std::string value2 = prefix + std::string(1000, 'B'); std::string truncated1 = CFieldValueTruncator::truncated(value1); std::string truncated2 = CFieldValueTruncator::truncated(value2); - // Same prefix - BOOST_REQUIRE_EQUAL(truncated1.substr(0, 241), truncated2.substr(0, 241)); + // Same prefix (239 chars + separator) + BOOST_REQUIRE_EQUAL(truncated1.substr(0, 240), truncated2.substr(0, 240)); // But different hash suffixes prevent collision - BOOST_REQUIRE_NE(truncated1.substr(241), truncated2.substr(241)); + BOOST_REQUIRE_NE(truncated1.substr(240), truncated2.substr(240)); BOOST_REQUIRE_NE(truncated1, truncated2); } @@ -144,7 +144,7 @@ BOOST_AUTO_TEST_CASE(testCollisionsPreventedByHashSuffix) { std::string truncated1 = CFieldValueTruncator::truncated(value1); std::string truncated2 = CFieldValueTruncator::truncated(value2); - // Must be distinct despite identical first 240 chars + // Must be distinct despite identical first 239 chars BOOST_REQUIRE_NE(truncated1, truncated2); BOOST_REQUIRE_EQUAL(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, truncated1.size()); BOOST_REQUIRE_EQUAL(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, truncated2.size()); @@ -169,7 +169,7 @@ BOOST_AUTO_TEST_CASE(testVeryLongValueWithDistinctEnding) { std::string truncated1 = CFieldValueTruncator::truncated(value1); std::string truncated2 = CFieldValueTruncator::truncated(value2); - // Must be distinct despite identical first 240 chars + // Must be distinct despite identical first 239 chars BOOST_REQUIRE_NE(truncated1, truncated2); } From dd955e5a82bb0fb11f9d9d7720f5f7fef177b1f0 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Thu, 19 Mar 2026 10:56:16 +0100 Subject: [PATCH 05/10] Add tests --- docs/CHANGELOG.asciidoc | 2 +- lib/api/CAnomalyJob.cc | 1 + lib/model/CBucketGatherer.cc | 7 +- .../unittest/CEventRateDataGathererTest.cc | 75 ++++++++++++++++++ lib/model/unittest/CMetricDataGathererTest.cc | 77 +++++++++++++++++++ 5 files changed, 157 insertions(+), 5 deletions(-) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 8da062cb0b..5c2485db47 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -32,7 +32,7 @@ === Bug Fixes -* Truncate oversized field values to prevent autodetect process crash. (See {ml-pull}2929[#2929].) +* Truncate oversized field values to prevent autodetect process crash. (See {ml-pull}2929[#2929], {es-pull}143180[#143180], issue: {ml-issue}2796[#2796].) * Report RSS in bytes instead of pages. (See {ml-pull}2917[#2917].) === Enhancements diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index af3256e037..8082f6c08f 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -1707,6 +1707,7 @@ void CAnomalyJob::prepareTruncatedFieldValues(const TStrVec& fieldNames, TStrVec& truncatedCopies) { fieldValues.reserve(fieldNames.size()); + // Reserve ensures no reallocation invalidates pointers stored in fieldValues. truncatedCopies.reserve(fieldNames.size()); for (const auto& fieldName : fieldNames) { diff --git a/lib/model/CBucketGatherer.cc b/lib/model/CBucketGatherer.cc index 520561e7be..70e9f95114 100644 --- a/lib/model/CBucketGatherer.cc +++ b/lib/model/CBucketGatherer.cc @@ -116,10 +116,9 @@ bool restoreInfluencerPersonAttributeCounts(core::CStateRestoreTraverser& traver const std::string& name = traverser.name(); RESTORE_BUILT_IN(PERSON_UID_TAG, person) RESTORE_BUILT_IN(ATTRIBUTE_UID_TAG, attribute) - RESTORE_NO_ERROR(INFLUENCER_TAG, influence = traverser.value()) - if (name == INFLUENCER_TAG) { - CFieldValueTruncator::truncate(influence); - } + RESTORE_NO_ERROR(INFLUENCER_TAG, influence = traverser.value(); + CFieldValueTruncator::truncate(influence)) + if (name == COUNT_TAG) { if (core::CStringUtils::stringToType(traverser.value(), count) == false) { LOG_ERROR(<< "Failed to restore COUNT_TAG, got " << traverser.value()); diff --git a/lib/model/unittest/CEventRateDataGathererTest.cc b/lib/model/unittest/CEventRateDataGathererTest.cc index 859d0e204d..b540b38245 100644 --- a/lib/model/unittest/CEventRateDataGathererTest.cc +++ b/lib/model/unittest/CEventRateDataGathererTest.cc @@ -1891,4 +1891,79 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CDiurnalTestFixture) { } } +BOOST_FIXTURE_TEST_CASE(testRestoreTruncatesOversizedInfluencerValues, CTestFixture) { + // Verify that oversized influencer field values persisted in old state + // are truncated on restore. This exercises truncation in: + // - CBucketGatherer::restoreInfluencerPersonAttributeCounts (Finding 7) + // - CEventRateBucketGatherer::restoreInfluencerUniqueStrings (Finding 8) + + constexpr core_t::TTime startTime = 0; + constexpr core_t::TTime bucketLength = 600; + SModelParams params(bucketLength); + params.s_LatencyBuckets = 2; + + TFeatureVec features; + features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson); + TStrVec influencerFieldNames{"IF1"}; + + CDataGatherer gatherer = + CDataGathererBuilder(model_t::E_EventRate, features, params, key, startTime) + .personFieldName("P") + .valueFieldName("V") + .influenceFieldNames(influencerFieldNames) + .build(); + + BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p", "v", 1)); + + // Add arrivals with an oversized influencer value (bypasses CAnomalyJob input truncation). + std::string const oversizedInfluencer(500, 'x'); + addArrival(gatherer, m_ResourceMonitor, startTime + 1, "p", "val1", oversizedInfluencer); + addArrival(gatherer, m_ResourceMonitor, startTime + 2, "p", "val2", oversizedInfluencer); + + // Persist — the JSON will contain the oversized influencer value. + std::ostringstream origJson; + core::CJsonStatePersistInserter::persist( + origJson, [&gatherer](core::CJsonStatePersistInserter& inserter) { + gatherer.acceptPersistInserter(inserter); + }); + + // Sanity check: the persisted JSON contains the full oversized value. + BOOST_TEST_REQUIRE(origJson.str().find(oversizedInfluencer) != std::string::npos); + + // Restore from persisted JSON — truncation should apply. + std::istringstream origJsonStrm{"{\"topLevel\" : " + origJson.str() + "}"}; + core::CJsonStateRestoreTraverser traverser(origJsonStrm); + + CBucketGatherer::SBucketGathererInitData bucketGathererInitData{ + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, {}, 0, 0}; + CDataGatherer restoredGatherer(model_t::E_EventRate, model_t::E_None, + params, EMPTY_STRING, key, + bucketGathererInitData, traverser); + + // Persist restored gatherer — should NOT contain the oversized value. + std::ostringstream restoredJson; + core::CJsonStatePersistInserter::persist( + restoredJson, [&restoredGatherer](core::CJsonStatePersistInserter& inserter) { + restoredGatherer.acceptPersistInserter(inserter); + }); + + // The full 500-char string must no longer appear (it was truncated to 256). + BOOST_TEST_REQUIRE(restoredJson.str().find(oversizedInfluencer) == std::string::npos); + + // Verify idempotency: restore again and persist — should be identical. + std::istringstream restoredJsonStrm{"{\"topLevel\" : " + restoredJson.str() + "}"}; + core::CJsonStateRestoreTraverser traverser2(restoredJsonStrm); + CDataGatherer restoredGatherer2(model_t::E_EventRate, model_t::E_None, + params, EMPTY_STRING, key, + bucketGathererInitData, traverser2); + + std::ostringstream restoredJson2; + core::CJsonStatePersistInserter::persist( + restoredJson2, [&restoredGatherer2](core::CJsonStatePersistInserter& inserter) { + restoredGatherer2.acceptPersistInserter(inserter); + }); + + BOOST_REQUIRE_EQUAL(restoredJson.str(), restoredJson2.str()); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/model/unittest/CMetricDataGathererTest.cc b/lib/model/unittest/CMetricDataGathererTest.cc index 76feaf0546..0941e95827 100644 --- a/lib/model/unittest/CMetricDataGathererTest.cc +++ b/lib/model/unittest/CMetricDataGathererTest.cc @@ -1843,4 +1843,81 @@ BOOST_FIXTURE_TEST_CASE(testVarp, CTestFixture) { } } +BOOST_FIXTURE_TEST_CASE(testRestoreTruncatesOversizedInfluencerSums, CTestFixture) { + // Verify that oversized influencer keys in CGathererTools::SInfluencerSumSerializer + // are truncated on restore. This exercises truncation in the metric sum gatherer's + // influencer bucket sum restore path. + + constexpr core_t::TTime startTime = 0; + constexpr core_t::TTime bucketLength = 600; + SModelParams params(bucketLength); + params.s_LatencyBuckets = 2; + params.s_SampleCountFactor = 1; + params.s_SampleQueueGrowthFactor = 0.1; + + TFeatureVec features; + features.push_back(model_t::E_IndividualSumByBucketAndPerson); + TStrVec const influencerNames{"i1"}; + + CDataGatherer gatherer = + CDataGathererBuilder(model_t::E_Metric, features, params, KEY, startTime) + .influenceFieldNames(influencerNames) + .sampleCountOverride(2U) + .build(); + + BOOST_REQUIRE_EQUAL(0, addPerson("p", gatherer, m_ResourceMonitor, 1)); + + // Add arrivals with an oversized influencer value (bypasses CAnomalyJob input truncation). + std::string const oversizedInfluencer(500, 'y'); + addArrival(gatherer, m_ResourceMonitor, startTime + 1, "p", 1.0, + oversizedInfluencer, ""); + addArrival(gatherer, m_ResourceMonitor, startTime + 2, "p", 2.0, + oversizedInfluencer, ""); + + // Persist — the JSON will contain the oversized influencer value. + std::ostringstream origJson; + core::CJsonStatePersistInserter::persist( + origJson, [&gatherer](core::CJsonStatePersistInserter& inserter) { + gatherer.acceptPersistInserter(inserter); + }); + + // Sanity check: the persisted JSON contains the full oversized value. + BOOST_TEST_REQUIRE(origJson.str().find(oversizedInfluencer) != std::string::npos); + + // Restore from persisted JSON — truncation should apply. + std::istringstream origJsonStrm{"{\"topLevel\" : " + origJson.str() + "}"}; + core::CJsonStateRestoreTraverser traverser(origJsonStrm); + + CBucketGatherer::SBucketGathererInitData bucketGathererInitData{ + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, {}, 0, 0}; + CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, + params, EMPTY_STRING, KEY, + bucketGathererInitData, traverser); + + // Persist restored gatherer — should NOT contain the oversized value. + std::ostringstream restoredJson; + core::CJsonStatePersistInserter::persist( + restoredJson, [&restoredGatherer](core::CJsonStatePersistInserter& inserter) { + restoredGatherer.acceptPersistInserter(inserter); + }); + + // The full 500-char string must no longer appear (it was truncated to 256). + BOOST_TEST_REQUIRE(restoredJson.str().find(oversizedInfluencer) == std::string::npos); + + // Verify idempotency: restore again and persist — should be identical. + std::istringstream restoredJsonStrm{"{\"topLevel\" : " + restoredJson.str() + "}"}; + core::CJsonStateRestoreTraverser traverser2(restoredJsonStrm); + CDataGatherer restoredGatherer2(model_t::E_Metric, model_t::E_None, + params, EMPTY_STRING, KEY, + bucketGathererInitData, traverser2); + + std::ostringstream restoredJson2; + core::CJsonStatePersistInserter::persist( + restoredJson2, [&restoredGatherer2](core::CJsonStatePersistInserter& inserter) { + restoredGatherer2.acceptPersistInserter(inserter); + }); + + BOOST_REQUIRE_EQUAL(restoredJson.str(), restoredJson2.str()); +} + BOOST_AUTO_TEST_SUITE_END() From 3f6471f63c0dd665039830ef676b73f5936c3f35 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Thu, 19 Mar 2026 11:08:24 +0100 Subject: [PATCH 06/10] formatting --- .../unittest/CEventRateDataGathererTest.cc | 22 +++++++++---------- lib/model/unittest/CMetricDataGathererTest.cc | 16 +++++--------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/lib/model/unittest/CEventRateDataGathererTest.cc b/lib/model/unittest/CEventRateDataGathererTest.cc index b540b38245..70e6c55bea 100644 --- a/lib/model/unittest/CEventRateDataGathererTest.cc +++ b/lib/model/unittest/CEventRateDataGathererTest.cc @@ -1906,12 +1906,12 @@ BOOST_FIXTURE_TEST_CASE(testRestoreTruncatesOversizedInfluencerValues, CTestFixt features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson); TStrVec influencerFieldNames{"IF1"}; - CDataGatherer gatherer = - CDataGathererBuilder(model_t::E_EventRate, features, params, key, startTime) - .personFieldName("P") - .valueFieldName("V") - .influenceFieldNames(influencerFieldNames) - .build(); + CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features, + params, key, startTime) + .personFieldName("P") + .valueFieldName("V") + .influenceFieldNames(influencerFieldNames) + .build(); BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p", "v", 1)); @@ -1936,9 +1936,8 @@ BOOST_FIXTURE_TEST_CASE(testRestoreTruncatesOversizedInfluencerValues, CTestFixt CBucketGatherer::SBucketGathererInitData bucketGathererInitData{ EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, {}, 0, 0}; - CDataGatherer restoredGatherer(model_t::E_EventRate, model_t::E_None, - params, EMPTY_STRING, key, - bucketGathererInitData, traverser); + CDataGatherer restoredGatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, key, bucketGathererInitData, traverser); // Persist restored gatherer — should NOT contain the oversized value. std::ostringstream restoredJson; @@ -1953,9 +1952,8 @@ BOOST_FIXTURE_TEST_CASE(testRestoreTruncatesOversizedInfluencerValues, CTestFixt // Verify idempotency: restore again and persist — should be identical. std::istringstream restoredJsonStrm{"{\"topLevel\" : " + restoredJson.str() + "}"}; core::CJsonStateRestoreTraverser traverser2(restoredJsonStrm); - CDataGatherer restoredGatherer2(model_t::E_EventRate, model_t::E_None, - params, EMPTY_STRING, key, - bucketGathererInitData, traverser2); + CDataGatherer restoredGatherer2(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, key, bucketGathererInitData, traverser2); std::ostringstream restoredJson2; core::CJsonStatePersistInserter::persist( diff --git a/lib/model/unittest/CMetricDataGathererTest.cc b/lib/model/unittest/CMetricDataGathererTest.cc index 0941e95827..b3085727c7 100644 --- a/lib/model/unittest/CMetricDataGathererTest.cc +++ b/lib/model/unittest/CMetricDataGathererTest.cc @@ -1869,10 +1869,8 @@ BOOST_FIXTURE_TEST_CASE(testRestoreTruncatesOversizedInfluencerSums, CTestFixtur // Add arrivals with an oversized influencer value (bypasses CAnomalyJob input truncation). std::string const oversizedInfluencer(500, 'y'); - addArrival(gatherer, m_ResourceMonitor, startTime + 1, "p", 1.0, - oversizedInfluencer, ""); - addArrival(gatherer, m_ResourceMonitor, startTime + 2, "p", 2.0, - oversizedInfluencer, ""); + addArrival(gatherer, m_ResourceMonitor, startTime + 1, "p", 1.0, oversizedInfluencer, ""); + addArrival(gatherer, m_ResourceMonitor, startTime + 2, "p", 2.0, oversizedInfluencer, ""); // Persist — the JSON will contain the oversized influencer value. std::ostringstream origJson; @@ -1890,9 +1888,8 @@ BOOST_FIXTURE_TEST_CASE(testRestoreTruncatesOversizedInfluencerSums, CTestFixtur CBucketGatherer::SBucketGathererInitData bucketGathererInitData{ EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, {}, 0, 0}; - CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, - params, EMPTY_STRING, KEY, - bucketGathererInitData, traverser); + CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + KEY, bucketGathererInitData, traverser); // Persist restored gatherer — should NOT contain the oversized value. std::ostringstream restoredJson; @@ -1907,9 +1904,8 @@ BOOST_FIXTURE_TEST_CASE(testRestoreTruncatesOversizedInfluencerSums, CTestFixtur // Verify idempotency: restore again and persist — should be identical. std::istringstream restoredJsonStrm{"{\"topLevel\" : " + restoredJson.str() + "}"}; core::CJsonStateRestoreTraverser traverser2(restoredJsonStrm); - CDataGatherer restoredGatherer2(model_t::E_Metric, model_t::E_None, - params, EMPTY_STRING, KEY, - bucketGathererInitData, traverser2); + CDataGatherer restoredGatherer2(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + KEY, bucketGathererInitData, traverser2); std::ostringstream restoredJson2; core::CJsonStatePersistInserter::persist( From ac73b637bdf677b50342063abfa54b1554166711 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Thu, 19 Mar 2026 12:18:16 +0100 Subject: [PATCH 07/10] clean up --- include/model/CFieldValueTruncator.h | 12 ++++------ .../unittest/CDynamicStringIdRegistryTest.cc | 2 +- .../unittest/CFieldValueTruncatorTest.cc | 24 +------------------ 3 files changed, 7 insertions(+), 31 deletions(-) diff --git a/include/model/CFieldValueTruncator.h b/include/model/CFieldValueTruncator.h index b044f1adfc..c6df93a803 100644 --- a/include/model/CFieldValueTruncator.h +++ b/include/model/CFieldValueTruncator.h @@ -39,12 +39,11 @@ namespace model { //! //! The 256-character limit aligns with Elasticsearch's ignore_above default //! for keyword fields. The hash suffix ensures data integrity while maintaining -//! human readability (first 240 characters visible) and compatibility with -//! prefix-based filtering. Collision probability is ~1 in 10^18 (effectively zero). +//! human readability (first 239 characters visible) and compatibility with +//! prefix-based filtering. Collision probability is ~1 in 10^19 (effectively zero). class MODEL_EXPORT CFieldValueTruncator { public: - //! Domain constraint: Maximum length for term fields in anomaly detection. - //! Aligned with Elasticsearch's ignore_above default for keyword fields. + //! Maximum length for term fields in anomaly detection. static constexpr std::size_t MAX_FIELD_VALUE_LENGTH = 256; //! Collision prevention format components @@ -86,13 +85,12 @@ class MODEL_EXPORT CFieldValueTruncator { } //! Enforce term field length constraint, returning constrained copy. - //! Original value unchanged. For performance, call needsTruncation() first - //! to avoid copying when constraint is already satisfied. + //! Original value unchanged. //! \param value Original field value //! \return Copy with length constraint enforced static std::string truncated(const std::string& value) { if (needsTruncation(value) == false) { - return value; // RVO applies + return value; } std::string result; diff --git a/lib/model/unittest/CDynamicStringIdRegistryTest.cc b/lib/model/unittest/CDynamicStringIdRegistryTest.cc index 60cea2b1da..b060340dde 100644 --- a/lib/model/unittest/CDynamicStringIdRegistryTest.cc +++ b/lib/model/unittest/CDynamicStringIdRegistryTest.cc @@ -118,7 +118,7 @@ BOOST_AUTO_TEST_CASE(testRestoreTruncatesOversizedNames) { bool addedPerson = false; std::string shortName("foo"); - std::string oversizedName(77000, 'x'); + std::string oversizedName(1000, 'x'); registry.addName(shortName, 0, resourceMonitor, addedPerson); registry.addName(oversizedName, 0, resourceMonitor, addedPerson); diff --git a/lib/model/unittest/CFieldValueTruncatorTest.cc b/lib/model/unittest/CFieldValueTruncatorTest.cc index 6cc61174e2..b17a33b7ce 100644 --- a/lib/model/unittest/CFieldValueTruncatorTest.cc +++ b/lib/model/unittest/CFieldValueTruncatorTest.cc @@ -59,12 +59,6 @@ BOOST_AUTO_TEST_CASE(testConstOverloadShortValueReturnsSame) { BOOST_REQUIRE_EQUAL("short", result); } -BOOST_AUTO_TEST_CASE(testVeryLargeValueFromIssue2796) { - std::string value(77000, 'y'); - BOOST_REQUIRE_EQUAL(true, CFieldValueTruncator::truncate(value)); - BOOST_REQUIRE_EQUAL(CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, value.size()); -} - BOOST_AUTO_TEST_CASE(testNeedsTruncation) { BOOST_REQUIRE_EQUAL(false, CFieldValueTruncator::needsTruncation("short")); BOOST_REQUIRE_EQUAL(false, CFieldValueTruncator::needsTruncation("")); @@ -72,8 +66,7 @@ BOOST_AUTO_TEST_CASE(testNeedsTruncation) { CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH, 'x'))); BOOST_REQUIRE_EQUAL(true, CFieldValueTruncator::needsTruncation(std::string( CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH + 1, 'x'))); - BOOST_REQUIRE_EQUAL( - true, CFieldValueTruncator::needsTruncation(std::string(77000, 'x'))); + BOOST_REQUIRE_EQUAL(true, CFieldValueTruncator::needsTruncation(std::string(1000, 'x'))); } // ============================================================================ @@ -158,19 +151,4 @@ BOOST_AUTO_TEST_CASE(testDeterministicHashing) { BOOST_REQUIRE_EQUAL(result1, result2); } -BOOST_AUTO_TEST_CASE(testVeryLongValueWithDistinctEnding) { - // Simulate the 77K influencer case from issue #2796 - std::string value1(77000, 'x'); - value1.replace(76990, 10, "VARIANT_A"); - - std::string value2(77000, 'x'); - value2.replace(76990, 10, "VARIANT_B"); - - std::string truncated1 = CFieldValueTruncator::truncated(value1); - std::string truncated2 = CFieldValueTruncator::truncated(value2); - - // Must be distinct despite identical first 239 chars - BOOST_REQUIRE_NE(truncated1, truncated2); -} - BOOST_AUTO_TEST_SUITE_END() From 222697199aa69e2f220cb75c82549802659187c1 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Thu, 19 Mar 2026 12:32:09 +0100 Subject: [PATCH 08/10] fix unit test --- lib/model/unittest/CMetricDataGathererTest.cc | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/lib/model/unittest/CMetricDataGathererTest.cc b/lib/model/unittest/CMetricDataGathererTest.cc index b3085727c7..57bdc9838e 100644 --- a/lib/model/unittest/CMetricDataGathererTest.cc +++ b/lib/model/unittest/CMetricDataGathererTest.cc @@ -111,6 +111,24 @@ void addArrival(CDataGatherer& gatherer, gatherer.addArrival(fieldValues, eventData, resourceMonitor); } +void addArrival(CDataGatherer& gatherer, + CResourceMonitor& resourceMonitor, + core_t::TTime time, + const std::string& person, + double value, + const std::string& influencer) { + CDataGatherer::TStrCPtrVec fieldValues; + fieldValues.push_back(&person); + fieldValues.push_back(influencer.empty() ? nullptr : &influencer); + std::string const valueAsString(core::CStringUtils::typeToString(value)); + fieldValues.push_back(&valueAsString); + + CEventData eventData; + eventData.time(time); + + gatherer.addArrival(fieldValues, eventData, resourceMonitor); +} + void addArrival(CDataGatherer& gatherer, CResourceMonitor& resourceMonitor, core_t::TTime time, @@ -1869,8 +1887,11 @@ BOOST_FIXTURE_TEST_CASE(testRestoreTruncatesOversizedInfluencerSums, CTestFixtur // Add arrivals with an oversized influencer value (bypasses CAnomalyJob input truncation). std::string const oversizedInfluencer(500, 'y'); - addArrival(gatherer, m_ResourceMonitor, startTime + 1, "p", 1.0, oversizedInfluencer, ""); - addArrival(gatherer, m_ResourceMonitor, startTime + 2, "p", 2.0, oversizedInfluencer, ""); + addArrival(gatherer, m_ResourceMonitor, startTime + 1, "p", 1.0, oversizedInfluencer); + addArrival(gatherer, m_ResourceMonitor, startTime + 2, "p", 2.0, oversizedInfluencer); + + // Advance past the first bucket so influencer sums are flushed to the persistable queue. + gatherer.timeNow(startTime + bucketLength); // Persist — the JSON will contain the oversized influencer value. std::ostringstream origJson; From 72c3d6b89aaf393cf6402c47ac98af5e92bdea5a Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Thu, 19 Mar 2026 14:37:49 +0100 Subject: [PATCH 09/10] review comments --- include/core/CLoggerThrottler.h | 3 +- include/core/LogMacros.h | 18 +++++++ include/model/CFieldValueTruncator.h | 7 +-- lib/api/CAnomalyJob.cc | 11 +++-- lib/api/CDataProcessor.cc | 3 +- lib/api/unittest/CAnomalyJobTest.cc | 47 +++++++++++++++++-- .../unittest/CEventRateDataGathererTest.cc | 5 ++ lib/model/unittest/CMetricDataGathererTest.cc | 5 ++ 8 files changed, 83 insertions(+), 16 deletions(-) diff --git a/include/core/CLoggerThrottler.h b/include/core/CLoggerThrottler.h index c6a4505128..58b89c4d8b 100644 --- a/include/core/CLoggerThrottler.h +++ b/include/core/CLoggerThrottler.h @@ -30,7 +30,8 @@ namespace core { //! This is thread safe but uses a very simple strategy: all accesses to a single //! hash map are sychronised. We assume that log throttling is only applied to //! messages which normally occur infrequently; for example, this is only currently -//! applied to WARN and ERROR level logging (see LogMacros.h). So there will be +//! applied to WARN, ERROR, and throttled INFO (LOG_INFO_THROTTLED) logging +//! (see LogMacros.h). So there will be //! little contention. Furthermore, the overhead of locking and unlocking the mutex //! should be neglible compared to the work done if the log line were actually //! emitted. So this should actually give a significant performance improvement diff --git a/include/core/LogMacros.h b/include/core/LogMacros.h index 0c84a88d21..abb96afa21 100644 --- a/include/core/LogMacros.h +++ b/include/core/LogMacros.h @@ -83,6 +83,24 @@ BOOST_LOG_STREAM_SEV(ml::core::CLogger::instance().logger(), ml::core::CLogger::E_Info) \ LOG_LOCATION_INFO \ message +#ifdef LOG_INFO_THROTTLED +#undef LOG_INFO_THROTTLED +#endif +#define LOG_INFO_THROTTLED(message) \ + do { \ + std::size_t countOfInfoMessages; \ + bool skipInfoMessage; \ + std::tie(countOfInfoMessages, skipInfoMessage) = \ + ml::core::CLogger::instance().throttler().skip(__FILE__, __LINE__); \ + if (skipInfoMessage == false) { \ + BOOST_LOG_STREAM_SEV(ml::core::CLogger::instance().logger(), \ + ml::core::CLogger::E_Info) \ + LOG_LOCATION_INFO \ + message << (countOfInfoMessages > 1 \ + ? " | repeated [" + std::to_string(countOfInfoMessages) + "]" \ + : ""); \ + } \ + } while (0) #ifdef LOG_WARN #undef LOG_WARN #endif diff --git a/include/model/CFieldValueTruncator.h b/include/model/CFieldValueTruncator.h index c6df93a803..6c0b94aa60 100644 --- a/include/model/CFieldValueTruncator.h +++ b/include/model/CFieldValueTruncator.h @@ -35,12 +35,12 @@ namespace model { //! - Append HASH_HEX_DIGITS (16) character hex hash of complete original value //! //! Format: "$" -//! Example: "very_long_field_value_that_exceeds_limit_and_continues_for_thousands_of_chars_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx$a1b2c3d4e5f67890" +//! Example: "very_long_field_value_that_exceeds_limit_(...)$a1b2c3d4e5f67890" //! //! The 256-character limit aligns with Elasticsearch's ignore_above default //! for keyword fields. The hash suffix ensures data integrity while maintaining //! human readability (first 239 characters visible) and compatibility with -//! prefix-based filtering. Collision probability is ~1 in 10^19 (effectively zero). +//! prefix-based filtering. class MODEL_EXPORT CFieldValueTruncator { public: //! Maximum length for term fields in anomaly detection. @@ -59,8 +59,6 @@ class MODEL_EXPORT CFieldValueTruncator { "Term field format invariant: prefix + suffix = total length"); static_assert(PREFIX_LENGTH >= 200, "Readable prefix must be substantial for human comprehension"); - static_assert(HASH_HEX_DIGITS * 4 == 64, - "Hash hex digits must represent full 64-bit hash output"); //! Check if a term field value exceeds the domain constraint. //! \return true if the value requires length enforcement @@ -85,7 +83,6 @@ class MODEL_EXPORT CFieldValueTruncator { } //! Enforce term field length constraint, returning constrained copy. - //! Original value unchanged. //! \param value Original field value //! \return Copy with length constraint enforced static std::string truncated(const std::string& value) { diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index 8082f6c08f..374becaf37 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -1718,11 +1718,12 @@ void CAnomalyJob::prepareTruncatedFieldValues(const TStrVec& fieldNames, std::string escapedFieldName = fieldName; core::CStringUtils::escape('\\', "\n\r\t", escapedFieldName); - LOG_WARN(<< "Field '" << escapedFieldName - << "' value (length=" << value->size() << ", prefix='" - << value->substr(0, std::min(50, value->size())) - << "...') exceeds " << model::CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH - << " characters and has been truncated with collision-safe hash suffix"); + LOG_INFO_THROTTLED( + << "Field '" << escapedFieldName + << "' value (length=" << value->size() << ", prefix='" + << value->substr(0, std::min(50, value->size())) + << "...') exceeds " << model::CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH + << " characters and has been truncated with collision-safe hash suffix"); } else { fieldValues.push_back(value); } diff --git a/lib/api/CDataProcessor.cc b/lib/api/CDataProcessor.cc index 3ffdb4915e..cb796e19a1 100644 --- a/lib/api/CDataProcessor.cc +++ b/lib/api/CDataProcessor.cc @@ -53,8 +53,7 @@ std::string CDataProcessor::debugPrintRecord(const TStrStrUMap& dataRowFields) { fieldNames.append(rowIter->first); const std::string& val = rowIter->second; if (model::CFieldValueTruncator::needsTruncation(val)) { - fieldValues.append(val.substr(0, model::CFieldValueTruncator::MAX_FIELD_VALUE_LENGTH)); - fieldValues.append("..."); + fieldValues.append(model::CFieldValueTruncator::truncated(val)); } else { fieldValues.append(val); } diff --git a/lib/api/unittest/CAnomalyJobTest.cc b/lib/api/unittest/CAnomalyJobTest.cc index 681a316641..3e9a9da8f3 100644 --- a/lib/api/unittest/CAnomalyJobTest.cc +++ b/lib/api/unittest/CAnomalyJobTest.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -1207,6 +1208,10 @@ BOOST_AUTO_TEST_CASE(testHierarchicalResultsNormalizerShouldIncreaseMemoryUsage) } BOOST_AUTO_TEST_CASE(testOversizedFieldValuesTruncated) { + // Verify that addRecord (via prepareTruncatedFieldValues) truncates oversized + // by/influencer values before they enter the model. We assert on persisted + // state because that reflects what the detector stored; if addRecord did + // not truncate, the full value would appear here. model::CLimits limits; api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( "count", "", "by_field", "", "", {"influencer_field"}); @@ -1218,13 +1223,32 @@ BOOST_AUTO_TEST_CASE(testOversizedFieldValuesTruncated) { CTestAnomalyJob job("job", limits, jobConfig, modelConfig, wrappedOutputStream); - std::string const oversizedValue(77000, 'x'); + std::string const oversizedValue(1000, 'x'); CTestAnomalyJob::TStrStrUMap dataRows{{"time", "1000"}, {"by_field", oversizedValue}, {"influencer_field", oversizedValue}}; BOOST_TEST_REQUIRE(job.handleRecord(dataRows)); BOOST_REQUIRE_EQUAL(uint64_t(1), job.numRecordsHandled()); + + // Advance past bucket boundary so results are output and state can be persisted. + CTestAnomalyJob::TStrStrUMap advanceRows{{"time", "5000"}, + {"by_field", oversizedValue}, + {"influencer_field", oversizedValue}}; + BOOST_TEST_REQUIRE(job.handleRecord(advanceRows)); + BOOST_REQUIRE_EQUAL(uint64_t(2), job.numRecordsHandled()); + + std::ostringstream* strm{nullptr}; + api::CSingleStreamDataAdder::TOStreamP ptr{strm = new std::ostringstream()}; + api::CSingleStreamDataAdder persister{ptr}; + BOOST_TEST_REQUIRE(job.persistStateInForeground(persister, "")); + std::string const persistedState{strm->str()}; + + // Full oversized value must not be in state (addRecord truncated before store). + BOOST_TEST_REQUIRE(persistedState.find(oversizedValue) == std::string::npos); + // Persisted state must contain the truncated form produced by input truncation. + std::string const expectedTruncated = model::CFieldValueTruncator::truncated(oversizedValue); + BOOST_TEST_REQUIRE(persistedState.find(expectedTruncated) != std::string::npos); } BOOST_AUTO_TEST_CASE(testNormalFieldValuesNotTruncated) { @@ -1245,6 +1269,20 @@ BOOST_AUTO_TEST_CASE(testNormalFieldValuesNotTruncated) { BOOST_TEST_REQUIRE(job.handleRecord(dataRows)); BOOST_REQUIRE_EQUAL(uint64_t(1), job.numRecordsHandled()); + + // Advance past bucket boundary so results are output and state can be persisted. + CTestAnomalyJob::TStrStrUMap advanceRows{ + {"time", "5000"}, {"by_field", normalValue}, {"influencer_field", normalValue}}; + BOOST_TEST_REQUIRE(job.handleRecord(advanceRows)); + BOOST_REQUIRE_EQUAL(uint64_t(2), job.numRecordsHandled()); + + std::ostringstream* strm{nullptr}; + api::CSingleStreamDataAdder::TOStreamP ptr{strm = new std::ostringstream()}; + api::CSingleStreamDataAdder persister{ptr}; + BOOST_TEST_REQUIRE(job.persistStateInForeground(persister, "")); + std::string const persistedState{strm->str()}; + + BOOST_TEST_REQUIRE(persistedState.find(normalValue) != std::string::npos); } BOOST_AUTO_TEST_CASE(testDebugPrintRecordTruncatesLongValues) { @@ -1252,8 +1290,11 @@ BOOST_AUTO_TEST_CASE(testDebugPrintRecordTruncatesLongValues) { record["field1"] = std::string(1000, 'x'); record["field2"] = "short"; std::string result = api::CDataProcessor::debugPrintRecord(record); - BOOST_TEST_REQUIRE(result.find("...") != std::string::npos); - BOOST_TEST_REQUIRE(result.size() < 1500); + // truncated() produces prefix + '$' + 16 hex chars; full 1000-char value not present + BOOST_TEST_REQUIRE(result.find(std::string(1000, 'x')) == std::string::npos); + BOOST_TEST_REQUIRE(result.find(model::CFieldValueTruncator::HASH_SEPARATOR) != + std::string::npos); + BOOST_TEST_REQUIRE(result.size() < 500); } BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/model/unittest/CEventRateDataGathererTest.cc b/lib/model/unittest/CEventRateDataGathererTest.cc index 70e6c55bea..2a231a6cb9 100644 --- a/lib/model/unittest/CEventRateDataGathererTest.cc +++ b/lib/model/unittest/CEventRateDataGathererTest.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1948,6 +1949,10 @@ BOOST_FIXTURE_TEST_CASE(testRestoreTruncatesOversizedInfluencerValues, CTestFixt // The full 500-char string must no longer appear (it was truncated to 256). BOOST_TEST_REQUIRE(restoredJson.str().find(oversizedInfluencer) == std::string::npos); + // Restore-path truncation must produce the same format as CFieldValueTruncator::truncated. + std::string const expectedTruncated = + model::CFieldValueTruncator::truncated(oversizedInfluencer); + BOOST_TEST_REQUIRE(restoredJson.str().find(expectedTruncated) != std::string::npos); // Verify idempotency: restore again and persist — should be identical. std::istringstream restoredJsonStrm{"{\"topLevel\" : " + restoredJson.str() + "}"}; diff --git a/lib/model/unittest/CMetricDataGathererTest.cc b/lib/model/unittest/CMetricDataGathererTest.cc index 57bdc9838e..0413897891 100644 --- a/lib/model/unittest/CMetricDataGathererTest.cc +++ b/lib/model/unittest/CMetricDataGathererTest.cc @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -1921,6 +1922,10 @@ BOOST_FIXTURE_TEST_CASE(testRestoreTruncatesOversizedInfluencerSums, CTestFixtur // The full 500-char string must no longer appear (it was truncated to 256). BOOST_TEST_REQUIRE(restoredJson.str().find(oversizedInfluencer) == std::string::npos); + // Restore-path truncation must produce the same format as CFieldValueTruncator::truncated. + std::string const expectedTruncated = + model::CFieldValueTruncator::truncated(oversizedInfluencer); + BOOST_TEST_REQUIRE(restoredJson.str().find(expectedTruncated) != std::string::npos); // Verify idempotency: restore again and persist — should be identical. std::istringstream restoredJsonStrm{"{\"topLevel\" : " + restoredJson.str() + "}"}; From 691e8cffccebd9690999383871533779f2349d48 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Thu, 19 Mar 2026 14:43:46 +0100 Subject: [PATCH 10/10] formatting --- include/core/LogMacros.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/include/core/LogMacros.h b/include/core/LogMacros.h index abb96afa21..a66a6f5dda 100644 --- a/include/core/LogMacros.h +++ b/include/core/LogMacros.h @@ -86,20 +86,20 @@ #ifdef LOG_INFO_THROTTLED #undef LOG_INFO_THROTTLED #endif -#define LOG_INFO_THROTTLED(message) \ - do { \ - std::size_t countOfInfoMessages; \ - bool skipInfoMessage; \ - std::tie(countOfInfoMessages, skipInfoMessage) = \ - ml::core::CLogger::instance().throttler().skip(__FILE__, __LINE__); \ - if (skipInfoMessage == false) { \ - BOOST_LOG_STREAM_SEV(ml::core::CLogger::instance().logger(), \ - ml::core::CLogger::E_Info) \ - LOG_LOCATION_INFO \ - message << (countOfInfoMessages > 1 \ - ? " | repeated [" + std::to_string(countOfInfoMessages) + "]" \ - : ""); \ - } \ +#define LOG_INFO_THROTTLED(message) \ + do { \ + std::size_t countOfInfoMessages; \ + bool skipInfoMessage; \ + std::tie(countOfInfoMessages, skipInfoMessage) = \ + ml::core::CLogger::instance().throttler().skip(__FILE__, __LINE__); \ + if (skipInfoMessage == false) { \ + BOOST_LOG_STREAM_SEV(ml::core::CLogger::instance().logger(), \ + ml::core::CLogger::E_Info) \ + LOG_LOCATION_INFO \ + message << (countOfInfoMessages > 1 \ + ? " | repeated [" + std::to_string(countOfInfoMessages) + "]" \ + : ""); \ + } \ } while (0) #ifdef LOG_WARN #undef LOG_WARN