From e6e5fc5934536691b177cdb8a1a38439d369f51b Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 23 Mar 2026 09:18:03 +1300 Subject: [PATCH 1/5] Reapply "[ML] Harden pytorch_inference with TorchScript model graph validation (#2999)" (#3006) This reverts commit ceabc9b8efa6c042862a46d260a3e31eb266f0dd. --- .buildkite/pipeline.json.py | 2 + .../run_es_inference_tests_x86_64.yml.sh | 34 + .buildkite/pipelines/run_pytorch_tests.yml.sh | 4 +- .buildkite/pipelines/run_qa_tests.yml.sh | 10 +- .../scripts/steps/run_es_inference_tests.sh | 31 + bin/pytorch_inference/CMakeLists.txt | 2 + bin/pytorch_inference/CModelGraphValidator.cc | 115 +++ bin/pytorch_inference/CModelGraphValidator.h | 91 ++ bin/pytorch_inference/CSupportedOperations.cc | 150 +++ bin/pytorch_inference/CSupportedOperations.h | 68 ++ bin/pytorch_inference/Main.cc | 38 +- .../unittest/CCommandParserTest.cc | 2 +- bin/pytorch_inference/unittest/CMakeLists.txt | 3 + .../unittest/CModelGraphValidatorTest.cc | 483 +++++++++ .../unittest/CResultWriterTest.cc | 4 +- .../unittest/CThreadSettingsTest.cc | 2 +- .../malicious_models/malicious_conditional.pt | Bin 0 -> 2205 bytes .../malicious_models/malicious_file_reader.pt | Bin 0 -> 2141 bytes .../malicious_file_reader_in_submodule.pt | Bin 0 -> 2488 bytes .../malicious_models/malicious_heap_leak.pt | Bin 0 -> 4623 bytes .../malicious_hidden_in_submodule.pt | Bin 0 -> 2517 bytes .../malicious_many_unrecognised.pt | Bin 0 -> 2311 bytes .../malicious_mixed_file_reader.pt | Bin 0 -> 2311 bytes .../malicious_models/malicious_rop_exploit.pt | Bin 0 -> 6109 bytes .../testfiles/reference_model_ops.json | 938 ++++++++++++++++++ cmake/run-validation.cmake | 195 ++++ dev-tools/extract_model_ops/.gitignore | 1 + dev-tools/extract_model_ops/README.md | 166 ++++ .../extract_model_ops/es_it_models/README.md | 41 + .../supersimple_pytorch_model_it.pt | Bin 0 -> 1630 bytes .../es_it_models/tiny_text_embedding.pt | Bin 0 -> 1694 bytes .../es_it_models/tiny_text_expansion.pt | Bin 0 -> 2078 bytes .../extract_model_ops/extract_model_ops.py | 149 +++ .../extract_model_ops/reference_models.json | 31 + dev-tools/extract_model_ops/requirements.txt | 4 + .../extract_model_ops/torchscript_utils.py | 127 +++ .../extract_model_ops/validate_allowlist.py | 201 ++++ .../extract_model_ops/validation_models.json | 33 + dev-tools/generate_malicious_models.py | 274 +++++ dev-tools/run_es_inference_tests.sh | 31 + dev-tools/run_es_tests.sh | 171 +--- dev-tools/run_es_tests_common.sh | 206 ++++ docs/CHANGELOG.asciidoc | 1 + test/CMakeLists.txt | 22 + 44 files changed, 3449 insertions(+), 181 deletions(-) create mode 100755 .buildkite/pipelines/run_es_inference_tests_x86_64.yml.sh create mode 100755 .buildkite/scripts/steps/run_es_inference_tests.sh create mode 100644 bin/pytorch_inference/CModelGraphValidator.cc create mode 100644 bin/pytorch_inference/CModelGraphValidator.h create mode 100644 bin/pytorch_inference/CSupportedOperations.cc create mode 100644 bin/pytorch_inference/CSupportedOperations.h create mode 100644 bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc create mode 100644 bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_conditional.pt create mode 100644 bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_file_reader.pt create mode 100644 bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_file_reader_in_submodule.pt create mode 100644 bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_heap_leak.pt create mode 100644 bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_hidden_in_submodule.pt create mode 100644 bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_many_unrecognised.pt create mode 100644 bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_mixed_file_reader.pt create mode 100644 bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_rop_exploit.pt create mode 100644 bin/pytorch_inference/unittest/testfiles/reference_model_ops.json create mode 100644 cmake/run-validation.cmake create mode 100644 dev-tools/extract_model_ops/.gitignore create mode 100644 dev-tools/extract_model_ops/README.md create mode 100644 dev-tools/extract_model_ops/es_it_models/README.md create mode 100644 dev-tools/extract_model_ops/es_it_models/supersimple_pytorch_model_it.pt create mode 100644 dev-tools/extract_model_ops/es_it_models/tiny_text_embedding.pt create mode 100644 dev-tools/extract_model_ops/es_it_models/tiny_text_expansion.pt create mode 100644 dev-tools/extract_model_ops/extract_model_ops.py create mode 100644 dev-tools/extract_model_ops/reference_models.json create mode 100644 dev-tools/extract_model_ops/requirements.txt create mode 100644 dev-tools/extract_model_ops/torchscript_utils.py create mode 100644 dev-tools/extract_model_ops/validate_allowlist.py create mode 100644 dev-tools/extract_model_ops/validation_models.json create mode 100644 dev-tools/generate_malicious_models.py create mode 100755 dev-tools/run_es_inference_tests.sh create mode 100755 dev-tools/run_es_tests_common.sh diff --git a/.buildkite/pipeline.json.py b/.buildkite/pipeline.json.py index 56b02f457..1796a665b 100755 --- a/.buildkite/pipeline.json.py +++ b/.buildkite/pipeline.json.py @@ -52,6 +52,8 @@ def main(): if config.build_x86_64: pipeline_steps.append(pipeline_steps.generate_step("Upload ES tests x86_64 runner pipeline", ".buildkite/pipelines/run_es_tests_x86_64.yml.sh")) + pipeline_steps.append(pipeline_steps.generate_step("Upload ES inference tests x86_64 runner pipeline", + ".buildkite/pipelines/run_es_inference_tests_x86_64.yml.sh")) # We only use linux x86_64 builds for QA tests. if config.run_qa_tests: pipeline_steps.append(pipeline_steps.generate_step("Upload QA tests runner pipeline", diff --git a/.buildkite/pipelines/run_es_inference_tests_x86_64.yml.sh b/.buildkite/pipelines/run_es_inference_tests_x86_64.yml.sh new file mode 100755 index 000000000..46b0de614 --- /dev/null +++ b/.buildkite/pipelines/run_es_inference_tests_x86_64.yml.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0 and the following additional limitation. Functionality enabled by the +# files subject to the Elastic License 2.0 may only be used in production when +# invoked by an Elasticsearch process with a license key installed that permits +# use of machine learning features. You may not use this file except in +# compliance with the Elastic License 2.0 and the foregoing additional +# limitation. + +cat < + +#include + +#include + +namespace ml { +namespace torch { + +CModelGraphValidator::SResult CModelGraphValidator::validate(const ::torch::jit::Module& module) { + + TStringSet observedOps; + std::size_t nodeCount{0}; + collectModuleOps(module, observedOps, nodeCount); + + if (nodeCount > MAX_NODE_COUNT) { + LOG_ERROR(<< "Model graph is too large: " << nodeCount + << " nodes exceeds limit of " << MAX_NODE_COUNT); + return {false, {}, {}, nodeCount}; + } + + LOG_DEBUG(<< "Model graph contains " << observedOps.size() + << " distinct operations across " << nodeCount << " nodes"); + for (const auto& op : observedOps) { + LOG_DEBUG(<< " observed op: " << op); + } + + auto result = validate(observedOps, CSupportedOperations::ALLOWED_OPERATIONS, + CSupportedOperations::FORBIDDEN_OPERATIONS); + result.s_NodeCount = nodeCount; + return result; +} + +CModelGraphValidator::SResult +CModelGraphValidator::validate(const TStringSet& observedOps, + const std::unordered_set& allowedOps, + const std::unordered_set& forbiddenOps) { + + SResult result; + + // Two-pass check: forbidden ops first, then unrecognised. This lets us + // fail fast when a known-dangerous operation is present and avoids the + // cost of scanning for unrecognised ops on a model we will reject anyway. + for (const auto& op : observedOps) { + if (forbiddenOps.contains(op)) { + result.s_IsValid = false; + result.s_ForbiddenOps.push_back(op); + } + } + + if (result.s_ForbiddenOps.empty()) { + for (const auto& op : observedOps) { + if (allowedOps.contains(op) == false) { + result.s_IsValid = false; + result.s_UnrecognisedOps.push_back(op); + } + } + } + + std::sort(result.s_ForbiddenOps.begin(), result.s_ForbiddenOps.end()); + std::sort(result.s_UnrecognisedOps.begin(), result.s_UnrecognisedOps.end()); + + return result; +} + +void CModelGraphValidator::collectBlockOps(const ::torch::jit::Block& block, + TStringSet& ops, + std::size_t& nodeCount) { + for (const auto* node : block.nodes()) { + if (++nodeCount > MAX_NODE_COUNT) { + return; + } + ops.emplace(node->kind().toQualString()); + for (const auto* subBlock : node->blocks()) { + collectBlockOps(*subBlock, ops, nodeCount); + if (nodeCount > MAX_NODE_COUNT) { + return; + } + } + } +} + +void CModelGraphValidator::collectModuleOps(const ::torch::jit::Module& module, + TStringSet& ops, + std::size_t& nodeCount) { + for (const auto& method : module.get_methods()) { + // Inline all method calls so that operations hidden behind + // prim::CallMethod are surfaced. After inlining, any remaining + // prim::CallMethod indicates a call that could not be resolved + // statically and will be flagged as unrecognised. + auto graph = method.graph()->copy(); + ::torch::jit::Inline(*graph); + collectBlockOps(*graph->block(), ops, nodeCount); + if (nodeCount > MAX_NODE_COUNT) { + return; + } + } +} +} +} diff --git a/bin/pytorch_inference/CModelGraphValidator.h b/bin/pytorch_inference/CModelGraphValidator.h new file mode 100644 index 000000000..2c589dab5 --- /dev/null +++ b/bin/pytorch_inference/CModelGraphValidator.h @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#ifndef INCLUDED_ml_torch_CModelGraphValidator_h +#define INCLUDED_ml_torch_CModelGraphValidator_h + +#include + +#include +#include +#include +#include + +namespace ml { +namespace torch { + +//! \brief +//! Validates TorchScript model computation graphs against a set of +//! allowed operations. +//! +//! DESCRIPTION:\n +//! Provides defense-in-depth by statically inspecting the TorchScript +//! graph of a loaded model and rejecting any model that contains +//! operations not present in the allowlist derived from supported +//! transformer architectures. +//! +//! IMPLEMENTATION DECISIONS:\n +//! The validation walks all methods of the module and its submodules +//! recursively, collecting every distinct operation. Any operation +//! that appears in the forbidden set causes immediate rejection. +//! Any operation not in the allowed set is collected and reported. +//! This ensures that even operations buried in helper methods or +//! nested submodules are inspected. +//! +class CModelGraphValidator { +public: + using TStringSet = std::unordered_set; + using TStringVec = std::vector; + + //! Upper bound on the number of graph nodes we are willing to inspect. + //! Transformer models typically have O(10k) nodes after inlining; a + //! limit of 1M provides generous headroom while preventing a + //! pathologically large graph from consuming unbounded memory or CPU. + static constexpr std::size_t MAX_NODE_COUNT{1000000}; + + //! Result of validating a model graph. + struct SResult { + bool s_IsValid{true}; + TStringVec s_ForbiddenOps; + TStringVec s_UnrecognisedOps; + std::size_t s_NodeCount{0}; + }; + +public: + //! Validate the computation graph of the given module against the + //! supported operation allowlist. Recursively inspects all methods + //! across all submodules. + static SResult validate(const ::torch::jit::Module& module); + + //! Validate a pre-collected set of operation names. Useful for + //! unit testing the matching logic without requiring a real model. + static SResult validate(const TStringSet& observedOps, + const std::unordered_set& allowedOps, + const std::unordered_set& forbiddenOps); + +private: + //! Collect all operation names from a block, recursing into sub-blocks. + static void collectBlockOps(const ::torch::jit::Block& block, + TStringSet& ops, + std::size_t& nodeCount); + + //! Inline all method calls and collect ops from the flattened graph. + //! After inlining, prim::CallMethod should not appear; if it does, + //! the call could not be resolved statically and is treated as + //! unrecognised. + static void collectModuleOps(const ::torch::jit::Module& module, + TStringSet& ops, + std::size_t& nodeCount); +}; +} +} + +#endif // INCLUDED_ml_torch_CModelGraphValidator_h diff --git a/bin/pytorch_inference/CSupportedOperations.cc b/bin/pytorch_inference/CSupportedOperations.cc new file mode 100644 index 000000000..3ecd4bd47 --- /dev/null +++ b/bin/pytorch_inference/CSupportedOperations.cc @@ -0,0 +1,150 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include "CSupportedOperations.h" + +namespace ml { +namespace torch { + +using namespace std::string_view_literals; + +const CSupportedOperations::TStringViewSet CSupportedOperations::FORBIDDEN_OPERATIONS = { + // Arbitrary memory access — enables heap scanning, address leaks, and + // ROP chain construction. + "aten::as_strided"sv, + "aten::from_file"sv, + "aten::save"sv, + // After graph inlining, method and function calls should be resolved. + // Their presence indicates an opaque call that cannot be validated. + "prim::CallFunction"sv, + "prim::CallMethod"sv, +}; + +// Generated by dev-tools/extract_model_ops/extract_model_ops.py against PyTorch 2.7.1. +// Reference models: bert-base-uncased, roberta-base, distilbert-base-uncased, +// google/electra-small-discriminator, microsoft/mpnet-base, +// microsoft/deberta-base, facebook/dpr-ctx_encoder-single-nq-base, +// google/mobilebert-uncased, xlm-roberta-base, elastic/bge-m3, +// elastic/distilbert-base-{cased,uncased}-finetuned-conll03-english, +// elastic/eis-elser-v2, elastic/elser-v2, elastic/hugging-face-elser, +// elastic/multilingual-e5-small-optimized, elastic/splade-v3, +// elastic/test-elser-v2, .rerank-v1 (Elastic rerank model), +// distilbert-base-uncased-finetuned-sst-2-english, +// sentence-transformers/all-distilroberta-v1. +// Eland-deployed variants of the above models (with pooling/normalization layers). +// Additional ops from Elasticsearch integration test models +// (PyTorchModelIT, TextExpansionQueryIT, TextEmbeddingQueryIT). +// Quantized operations from dynamically quantized variants of the above +// models (torch.quantization.quantize_dynamic on nn.Linear layers). +const CSupportedOperations::TStringViewSet CSupportedOperations::ALLOWED_OPERATIONS = { + // aten operations (core tensor computations) + "aten::Int"sv, + "aten::IntImplicit"sv, + "aten::ScalarImplicit"sv, + "aten::__and__"sv, + "aten::abs"sv, + "aten::add"sv, + "aten::add_"sv, + "aten::arange"sv, + "aten::bitwise_not"sv, + "aten::bmm"sv, + "aten::cat"sv, + "aten::ceil"sv, + "aten::chunk"sv, + "aten::clamp"sv, + "aten::clamp_min"sv, + "aten::contiguous"sv, + "aten::cumsum"sv, + "aten::detach"sv, + "aten::div"sv, + "aten::div_"sv, + "aten::dropout"sv, + "aten::embedding"sv, + "aten::eq"sv, + "aten::expand"sv, + "aten::expand_as"sv, + "aten::floor_divide"sv, + "aten::full_like"sv, + "aten::gather"sv, + "aten::ge"sv, + "aten::gelu"sv, + "aten::gt"sv, + "aten::hash"sv, + "aten::index"sv, + "aten::index_put_"sv, + "aten::layer_norm"sv, + "aten::le"sv, + "aten::len"sv, + "aten::linalg_vector_norm"sv, + "aten::linear"sv, + "aten::log"sv, + "aten::lt"sv, + "aten::manual_seed"sv, + "aten::masked_fill"sv, + "aten::masked_fill_"sv, + "aten::matmul"sv, + "aten::max"sv, + "aten::mean"sv, + "aten::min"sv, + "aten::mul"sv, + "aten::mul_"sv, + "aten::ne"sv, + "aten::neg"sv, + "aten::new_ones"sv, + "aten::ones"sv, + "aten::pad"sv, + "aten::permute"sv, + "aten::pow"sv, + "aten::rand"sv, + "aten::relu"sv, + "aten::repeat"sv, + "aten::reshape"sv, + "aten::rsub"sv, + "aten::scaled_dot_product_attention"sv, + "aten::select"sv, + "aten::sign"sv, + "aten::size"sv, + "aten::slice"sv, + "aten::softmax"sv, + "aten::sqrt"sv, + "aten::squeeze"sv, + "aten::str"sv, + "aten::sub"sv, + "aten::sum"sv, + "aten::tanh"sv, + "aten::tensor"sv, + "aten::to"sv, + "aten::transpose"sv, + "aten::type_as"sv, + "aten::unsqueeze"sv, + "aten::view"sv, + "aten::where"sv, + "aten::zeros"sv, + // prim operations (TorchScript graph infrastructure) + "prim::Constant"sv, + "prim::DictConstruct"sv, + "prim::GetAttr"sv, + "prim::If"sv, + "prim::ListConstruct"sv, + "prim::ListUnpack"sv, + "prim::Loop"sv, + "prim::NumToTensor"sv, + "prim::TupleConstruct"sv, + "prim::TupleUnpack"sv, + "prim::device"sv, + "prim::dtype"sv, + "prim::max"sv, + "prim::min"sv, + // quantized operations (dynamically quantized models, e.g. ELSER v2) + "quantized::linear_dynamic"sv, +}; +} +} diff --git a/bin/pytorch_inference/CSupportedOperations.h b/bin/pytorch_inference/CSupportedOperations.h new file mode 100644 index 000000000..3719bec80 --- /dev/null +++ b/bin/pytorch_inference/CSupportedOperations.h @@ -0,0 +1,68 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#ifndef INCLUDED_ml_torch_CSupportedOperations_h +#define INCLUDED_ml_torch_CSupportedOperations_h + +#include +#include + +namespace ml { +namespace torch { + +//! \brief +//! Flat allowlist of TorchScript operations observed across all +//! supported transformer architectures (BERT, RoBERTa, DistilBERT, +//! ELECTRA, MPNet, DeBERTa, BART, DPR, MobileBERT, XLM-RoBERTa). +//! +//! DESCRIPTION:\n +//! Generated by tracing reference HuggingFace models with +//! dev-tools/extract_model_ops/extract_model_ops.py and collecting the union of all +//! operations from the inlined forward() computation graphs. +//! +//! IMPLEMENTATION DECISIONS:\n +//! Stored as a compile-time data structure rather than an external +//! config file to avoid runtime loading failures and to keep the +//! security boundary self-contained. The list should be regenerated +//! whenever the set of supported architectures changes or when +//! upgrading the PyTorch version. +//! +class CSupportedOperations { +public: + using TStringViewSet = std::unordered_set; + + //! Operations explicitly forbidden regardless of the allowlist. + //! + //! The forbidden list is checked separately from (and takes precedence + //! over) the allowed list. This two-tier approach provides: + //! + //! 1. Stable, targeted error messages for known-dangerous operations + //! (e.g. "model contains forbidden operation: aten::save") rather + //! than the generic "unrecognised operation" that the allowlist + //! would produce. This helps model authors diagnose rejections. + //! + //! 2. A safety net against accidental allowlist expansion. If a + //! future PyTorch upgrade or new architecture inadvertently adds + //! a dangerous op to the allowed set, the forbidden list still + //! blocks it. The forbidden check is independent of regeneration. + //! + //! 3. Defence-in-depth: two independent mechanisms must both agree + //! before an operation is permitted, reducing the risk of a + //! single-point allowlist error opening an attack vector. + static const TStringViewSet FORBIDDEN_OPERATIONS; + + //! Union of all TorchScript operations observed in supported architectures. + static const TStringViewSet ALLOWED_OPERATIONS; +}; +} +} + +#endif // INCLUDED_ml_torch_CSupportedOperations_h diff --git a/bin/pytorch_inference/Main.cc b/bin/pytorch_inference/Main.cc index 00adee1df..4a7d2dde6 100644 --- a/bin/pytorch_inference/Main.cc +++ b/bin/pytorch_inference/Main.cc @@ -27,6 +27,7 @@ #include "CBufferedIStreamAdapter.h" #include "CCmdLineParser.h" #include "CCommandParser.h" +#include "CModelGraphValidator.h" #include "CResultWriter.h" #include "CThreadSettings.h" @@ -42,24 +43,35 @@ #include namespace { -// Add more forbidden ops here if needed -const std::unordered_set FORBIDDEN_OPERATIONS = {"aten::from_file", "aten::save"}; - void verifySafeModel(const torch::jit::script::Module& module_) { try { - const auto method = module_.get_method("forward"); - for (const auto graph = method.graph(); const auto& node : graph->nodes()) { - if (const std::string opName = node->kind().toQualString(); - FORBIDDEN_OPERATIONS.contains(opName)) { - HANDLE_FATAL(<< "Loading the inference process failed because it contains forbidden operation: " - << opName); - } + auto result = ml::torch::CModelGraphValidator::validate(module_); + + if (result.s_ForbiddenOps.empty() == false) { + std::string ops = ml::core::CStringUtils::join(result.s_ForbiddenOps, ", "); + HANDLE_FATAL(<< "Model contains forbidden operations: " << ops); } + + if (result.s_UnrecognisedOps.empty() == false) { + std::string ops = ml::core::CStringUtils::join(result.s_UnrecognisedOps, ", "); + HANDLE_FATAL(<< "Model graph does not match any supported architecture. " + << "Unrecognised operations: " << ops); + } + + if (result.s_NodeCount > ml::torch::CModelGraphValidator::MAX_NODE_COUNT) { + HANDLE_FATAL(<< "Model graph is too large: " << result.s_NodeCount << " nodes exceeds limit of " + << ml::torch::CModelGraphValidator::MAX_NODE_COUNT); + } + + if (result.s_IsValid == false) { + HANDLE_FATAL(<< "Model graph validation failed"); + } + + LOG_DEBUG(<< "Model verified: " << result.s_NodeCount + << " nodes, all operations match supported architectures."); } catch (const c10::Error& e) { - LOG_FATAL(<< "Failed to get forward method: " << e.what()); + HANDLE_FATAL(<< "Model graph validation failed: " << e.what()); } - - LOG_DEBUG(<< "Model verified: no forbidden operations detected."); } } diff --git a/bin/pytorch_inference/unittest/CCommandParserTest.cc b/bin/pytorch_inference/unittest/CCommandParserTest.cc index 7dcf6a7ef..5c7e7e4fd 100644 --- a/bin/pytorch_inference/unittest/CCommandParserTest.cc +++ b/bin/pytorch_inference/unittest/CCommandParserTest.cc @@ -9,7 +9,7 @@ * limitation. */ -#include "../CCommandParser.h" +#include #include diff --git a/bin/pytorch_inference/unittest/CMakeLists.txt b/bin/pytorch_inference/unittest/CMakeLists.txt index dd5394492..fe3c544a5 100644 --- a/bin/pytorch_inference/unittest/CMakeLists.txt +++ b/bin/pytorch_inference/unittest/CMakeLists.txt @@ -14,6 +14,7 @@ project("ML pytorch_inference unit tests") set (SRCS Main.cc CCommandParserTest.cc + CModelGraphValidatorTest.cc CResultWriterTest.cc CThreadSettingsTest.cc ) @@ -33,3 +34,5 @@ set(ML_LINK_LIBRARIES ) ml_add_test_executable(pytorch_inference ${SRCS}) + +target_include_directories(ml_test_pytorch_inference PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..) diff --git a/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc b/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc new file mode 100644 index 000000000..7818e88f0 --- /dev/null +++ b/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc @@ -0,0 +1,483 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include + +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include + +using namespace ml::torch; +using TStringSet = CModelGraphValidator::TStringSet; +using TStringViewSet = std::unordered_set; + +BOOST_AUTO_TEST_SUITE(CModelGraphValidatorTest) + +BOOST_AUTO_TEST_CASE(testAllAllowedOpsPass) { + // A model using only allowed ops should pass validation. + TStringSet observed{"aten::linear", "aten::layer_norm", "aten::gelu", + "aten::embedding", "prim::Constant", "prim::GetAttr"}; + + auto result = CModelGraphValidator::validate( + observed, CSupportedOperations::ALLOWED_OPERATIONS, + CSupportedOperations::FORBIDDEN_OPERATIONS); + + BOOST_REQUIRE(result.s_IsValid); + BOOST_REQUIRE(result.s_ForbiddenOps.empty()); + BOOST_REQUIRE(result.s_UnrecognisedOps.empty()); +} + +BOOST_AUTO_TEST_CASE(testEmptyGraphPasses) { + TStringSet observed; + + auto result = CModelGraphValidator::validate( + observed, CSupportedOperations::ALLOWED_OPERATIONS, + CSupportedOperations::FORBIDDEN_OPERATIONS); + + BOOST_REQUIRE(result.s_IsValid); + BOOST_REQUIRE(result.s_ForbiddenOps.empty()); + BOOST_REQUIRE(result.s_UnrecognisedOps.empty()); +} + +BOOST_AUTO_TEST_CASE(testForbiddenOpsRejected) { + TStringSet observed{"aten::linear", "aten::from_file", "prim::Constant"}; + + auto result = CModelGraphValidator::validate( + observed, CSupportedOperations::ALLOWED_OPERATIONS, + CSupportedOperations::FORBIDDEN_OPERATIONS); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE_EQUAL(1, result.s_ForbiddenOps.size()); + BOOST_REQUIRE_EQUAL("aten::from_file", result.s_ForbiddenOps[0]); + BOOST_REQUIRE(result.s_UnrecognisedOps.empty()); +} + +BOOST_AUTO_TEST_CASE(testMultipleForbiddenOps) { + TStringSet observed{"aten::from_file", "aten::save"}; + + auto result = CModelGraphValidator::validate( + observed, CSupportedOperations::ALLOWED_OPERATIONS, + CSupportedOperations::FORBIDDEN_OPERATIONS); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE_EQUAL(2, result.s_ForbiddenOps.size()); + BOOST_REQUIRE_EQUAL("aten::from_file", result.s_ForbiddenOps[0]); + BOOST_REQUIRE_EQUAL("aten::save", result.s_ForbiddenOps[1]); +} + +BOOST_AUTO_TEST_CASE(testUnrecognisedOpsRejected) { + TStringSet observed{"aten::linear", "custom::evil_op", "prim::Constant"}; + + auto result = CModelGraphValidator::validate( + observed, CSupportedOperations::ALLOWED_OPERATIONS, + CSupportedOperations::FORBIDDEN_OPERATIONS); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE(result.s_ForbiddenOps.empty()); + BOOST_REQUIRE_EQUAL(1, result.s_UnrecognisedOps.size()); + BOOST_REQUIRE_EQUAL("custom::evil_op", result.s_UnrecognisedOps[0]); +} + +BOOST_AUTO_TEST_CASE(testMixedForbiddenAndUnrecognised) { + // When forbidden ops are present, the validator short-circuits and + // does not report unrecognised ops — we reject immediately. + TStringSet observed{"aten::save", "custom::backdoor", "aten::linear"}; + + auto result = CModelGraphValidator::validate( + observed, CSupportedOperations::ALLOWED_OPERATIONS, + CSupportedOperations::FORBIDDEN_OPERATIONS); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE_EQUAL(1, result.s_ForbiddenOps.size()); + BOOST_REQUIRE_EQUAL("aten::save", result.s_ForbiddenOps[0]); + BOOST_REQUIRE(result.s_UnrecognisedOps.empty()); +} + +BOOST_AUTO_TEST_CASE(testResultsSorted) { + TStringSet observed{"zzz::unknown", "aaa::unknown", "mmm::unknown"}; + + auto result = CModelGraphValidator::validate( + observed, CSupportedOperations::ALLOWED_OPERATIONS, + CSupportedOperations::FORBIDDEN_OPERATIONS); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE_EQUAL(3, result.s_UnrecognisedOps.size()); + BOOST_REQUIRE_EQUAL("aaa::unknown", result.s_UnrecognisedOps[0]); + BOOST_REQUIRE_EQUAL("mmm::unknown", result.s_UnrecognisedOps[1]); + BOOST_REQUIRE_EQUAL("zzz::unknown", result.s_UnrecognisedOps[2]); +} + +BOOST_AUTO_TEST_CASE(testTypicalBertOps) { + // Simulate a realistic BERT-like op set. + TStringSet observed{"aten::Int", + "aten::ScalarImplicit", + "aten::__and__", + "aten::add", + "aten::arange", + "aten::contiguous", + "aten::div", + "aten::dropout", + "aten::embedding", + "aten::expand", + "aten::gelu", + "aten::ge", + "aten::index", + "aten::layer_norm", + "aten::linear", + "aten::masked_fill", + "aten::matmul", + "aten::mul", + "aten::new_ones", + "aten::permute", + "aten::reshape", + "aten::scaled_dot_product_attention", + "aten::size", + "aten::slice", + "aten::softmax", + "aten::tanh", + "aten::to", + "aten::transpose", + "aten::unsqueeze", + "aten::view", + "prim::Constant", + "prim::DictConstruct", + "prim::GetAttr", + "prim::If", + "prim::ListConstruct", + "prim::NumToTensor", + "prim::TupleConstruct"}; + + auto result = CModelGraphValidator::validate( + observed, CSupportedOperations::ALLOWED_OPERATIONS, + CSupportedOperations::FORBIDDEN_OPERATIONS); + + BOOST_REQUIRE(result.s_IsValid); + BOOST_REQUIRE(result.s_ForbiddenOps.empty()); + BOOST_REQUIRE(result.s_UnrecognisedOps.empty()); +} + +BOOST_AUTO_TEST_CASE(testCustomAllowlistAndForbiddenList) { + // Verify the three-argument overload works with arbitrary lists. + TStringViewSet allowed{"op::a", "op::b", "op::c"}; + TStringViewSet forbidden{"op::bad"}; + TStringSet observed{"op::a", "op::b"}; + + auto result = CModelGraphValidator::validate(observed, allowed, forbidden); + BOOST_REQUIRE(result.s_IsValid); + + observed.emplace("op::bad"); + result = CModelGraphValidator::validate(observed, allowed, forbidden); + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE_EQUAL(1, result.s_ForbiddenOps.size()); + + observed.erase("op::bad"); + observed.emplace("op::unknown"); + result = CModelGraphValidator::validate(observed, allowed, forbidden); + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE_EQUAL(1, result.s_UnrecognisedOps.size()); +} + +BOOST_AUTO_TEST_CASE(testCallMethodForbiddenAfterInlining) { + // prim::CallMethod must not appear after graph inlining; its presence + // means a method call could not be resolved and the graph cannot be + // fully validated. + TStringSet observed{"aten::linear", "prim::Constant", "prim::CallMethod"}; + + auto result = CModelGraphValidator::validate( + observed, CSupportedOperations::ALLOWED_OPERATIONS, + CSupportedOperations::FORBIDDEN_OPERATIONS); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE_EQUAL(1, result.s_ForbiddenOps.size()); + BOOST_REQUIRE_EQUAL("prim::CallMethod", result.s_ForbiddenOps[0]); + BOOST_REQUIRE(result.s_UnrecognisedOps.empty()); +} + +BOOST_AUTO_TEST_CASE(testCallFunctionForbiddenAfterInlining) { + TStringSet observed{"aten::linear", "prim::CallFunction"}; + + auto result = CModelGraphValidator::validate( + observed, CSupportedOperations::ALLOWED_OPERATIONS, + CSupportedOperations::FORBIDDEN_OPERATIONS); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE_EQUAL(1, result.s_ForbiddenOps.size()); + BOOST_REQUIRE_EQUAL("prim::CallFunction", result.s_ForbiddenOps[0]); +} + +BOOST_AUTO_TEST_CASE(testMaxNodeCountConstant) { + BOOST_REQUIRE(CModelGraphValidator::MAX_NODE_COUNT > 0); + BOOST_REQUIRE_EQUAL(std::size_t{1000000}, CModelGraphValidator::MAX_NODE_COUNT); +} + +BOOST_AUTO_TEST_CASE(testForbiddenOpAlsoInAllowlist) { + // If an op appears in both forbidden and allowed, forbidden takes precedence. + TStringViewSet allowed{"aten::from_file", "aten::linear"}; + TStringViewSet forbidden{"aten::from_file"}; + TStringSet observed{"aten::from_file", "aten::linear"}; + + auto result = CModelGraphValidator::validate(observed, allowed, forbidden); + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE_EQUAL(1, result.s_ForbiddenOps.size()); + BOOST_REQUIRE_EQUAL("aten::from_file", result.s_ForbiddenOps[0]); +} + +// --- Integration tests using real TorchScript modules --- + +BOOST_AUTO_TEST_CASE(testValidModuleWithAllowedOps) { + // A simple module using only aten::add and aten::mul, both of which + // are in the allowed set. + ::torch::jit::Module m("__torch__.ValidModel"); + m.define(R"( + def forward(self, x: Tensor) -> Tensor: + return x + x * x + )"); + + auto result = CModelGraphValidator::validate(m); + + BOOST_REQUIRE(result.s_IsValid); + BOOST_REQUIRE(result.s_ForbiddenOps.empty()); + BOOST_REQUIRE(result.s_UnrecognisedOps.empty()); + BOOST_REQUIRE(result.s_NodeCount > 0); +} + +BOOST_AUTO_TEST_CASE(testModuleWithUnrecognisedOps) { + // torch.sin is not in the transformer allowlist. + ::torch::jit::Module m("__torch__.UnknownOps"); + m.define(R"( + def forward(self, x: Tensor) -> Tensor: + return torch.sin(x) + )"); + + auto result = CModelGraphValidator::validate(m); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE(result.s_ForbiddenOps.empty()); + BOOST_REQUIRE(result.s_UnrecognisedOps.empty() == false); + bool foundSin = false; + for (const auto& op : result.s_UnrecognisedOps) { + if (op == "aten::sin") { + foundSin = true; + } + } + BOOST_REQUIRE(foundSin); +} + +BOOST_AUTO_TEST_CASE(testModuleNodeCountPopulated) { + ::torch::jit::Module m("__torch__.NodeCount"); + m.define(R"( + def forward(self, x: Tensor) -> Tensor: + a = x + x + b = a * a + c = b - a + return c + )"); + + auto result = CModelGraphValidator::validate(m); + + BOOST_REQUIRE(result.s_NodeCount > 0); +} + +BOOST_AUTO_TEST_CASE(testModuleWithSubmoduleInlines) { + // Create a parent module with a child submodule. After inlining, + // the child's operations should be visible and validated. + ::torch::jit::Module child("__torch__.Child"); + child.define(R"( + def forward(self, x: Tensor) -> Tensor: + return torch.sin(x) + )"); + + ::torch::jit::Module parent("__torch__.Parent"); + parent.register_module("child", child); + parent.define(R"( + def forward(self, x: Tensor) -> Tensor: + return self.child.forward(x) + x + )"); + + auto result = CModelGraphValidator::validate(parent); + + BOOST_REQUIRE(result.s_IsValid == false); + bool foundSin = false; + for (const auto& op : result.s_UnrecognisedOps) { + if (op == "aten::sin") { + foundSin = true; + } + } + BOOST_REQUIRE(foundSin); +} + +// --- Integration tests with malicious .pt model fixtures --- +// +// These load real TorchScript models that simulate attack vectors. +// The .pt files are generated by testfiles/generate_malicious_models.py. + +namespace { +bool hasForbiddenOp(const CModelGraphValidator::SResult& result, const std::string& op) { + return std::find(result.s_ForbiddenOps.begin(), result.s_ForbiddenOps.end(), + op) != result.s_ForbiddenOps.end(); +} + +bool hasUnrecognisedOp(const CModelGraphValidator::SResult& result, const std::string& op) { + return std::find(result.s_UnrecognisedOps.begin(), result.s_UnrecognisedOps.end(), + op) != result.s_UnrecognisedOps.end(); +} +} + +BOOST_AUTO_TEST_CASE(testMaliciousFileReader) { + // A model that uses aten::from_file to read arbitrary files. + auto module = ::torch::jit::load("testfiles/malicious_models/malicious_file_reader.pt"); + auto result = CModelGraphValidator::validate(module); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE(hasForbiddenOp(result, "aten::from_file")); +} + +BOOST_AUTO_TEST_CASE(testMaliciousMixedFileReader) { + // A model that mixes allowed ops (aten::add) with a forbidden + // aten::from_file. The entire model must be rejected. + auto module = ::torch::jit::load("testfiles/malicious_models/malicious_mixed_file_reader.pt"); + auto result = CModelGraphValidator::validate(module); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE(hasForbiddenOp(result, "aten::from_file")); + BOOST_REQUIRE(result.s_UnrecognisedOps.empty()); +} + +BOOST_AUTO_TEST_CASE(testMaliciousHiddenInSubmodule) { + // Unrecognised ops buried three levels deep in nested submodules. + // The validator must inline through all submodules to find them. + auto module = ::torch::jit::load("testfiles/malicious_models/malicious_hidden_in_submodule.pt"); + auto result = CModelGraphValidator::validate(module); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE(result.s_ForbiddenOps.empty()); + BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::sin")); +} + +BOOST_AUTO_TEST_CASE(testMaliciousConditionalBranch) { + // An unrecognised op hidden inside a conditional branch. The + // validator must recurse into prim::If blocks to detect it. + auto module = ::torch::jit::load("testfiles/malicious_models/malicious_conditional.pt"); + auto result = CModelGraphValidator::validate(module); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::sin")); +} + +BOOST_AUTO_TEST_CASE(testMaliciousManyUnrecognisedOps) { + // A model using many different unrecognised ops (sin, cos, tan, exp). + auto module = ::torch::jit::load("testfiles/malicious_models/malicious_many_unrecognised.pt"); + auto result = CModelGraphValidator::validate(module); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE(result.s_ForbiddenOps.empty()); + BOOST_REQUIRE(result.s_UnrecognisedOps.size() >= 4); + BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::sin")); + BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::cos")); + BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::tan")); + BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::exp")); +} + +BOOST_AUTO_TEST_CASE(testMaliciousFileReaderInSubmodule) { + // The forbidden aten::from_file is hidden inside a submodule. + // After inlining, the validator must still detect it. + auto module = ::torch::jit::load("testfiles/malicious_models/malicious_file_reader_in_submodule.pt"); + auto result = CModelGraphValidator::validate(module); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE(hasForbiddenOp(result, "aten::from_file")); +} + +// --- Sandbox2 attack models --- +// +// These reproduce real-world attack vectors that exploit torch.as_strided +// to read out-of-bounds heap memory, leak libtorch addresses, and build +// ROP chains that call mprotect + shellcode to write arbitrary files. +// The graph validator must reject them because aten::as_strided is in +// the forbidden operations list. + +BOOST_AUTO_TEST_CASE(testMaliciousHeapLeak) { + // A model that uses torch.as_strided with a malicious storage offset + // to scan the heap for libtorch pointers and leak their addresses + // via an assertion message. + auto module = ::torch::jit::load("testfiles/malicious_models/malicious_heap_leak.pt"); + auto result = CModelGraphValidator::validate(module); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE(hasForbiddenOp(result, "aten::as_strided")); +} + +BOOST_AUTO_TEST_CASE(testMaliciousRopExploit) { + // A model that extends the heap-leak technique to overwrite function + // pointers and build a ROP chain: mprotect a heap page as executable, + // then jump to shellcode that writes files to disk. + auto module = ::torch::jit::load("testfiles/malicious_models/malicious_rop_exploit.pt"); + auto result = CModelGraphValidator::validate(module); + + BOOST_REQUIRE(result.s_IsValid == false); + BOOST_REQUIRE(hasForbiddenOp(result, "aten::as_strided")); +} + +// --- Allowlist drift detection --- +// +// Validates that ALLOWED_OPERATIONS covers every operation observed in +// the reference HuggingFace models. The golden file is generated by +// dev-tools/extract_model_ops/extract_model_ops.py --golden and should +// be regenerated whenever PyTorch is upgraded or the set of supported +// architectures changes. + +BOOST_AUTO_TEST_CASE(testAllowlistCoversReferenceModels) { + std::ifstream file("testfiles/reference_model_ops.json"); + BOOST_REQUIRE_MESSAGE(file.is_open(), + "Could not open testfiles/reference_model_ops.json — " + "regenerate with: python3 dev-tools/extract_model_ops/" + "extract_model_ops.py --golden " + "bin/pytorch_inference/unittest/testfiles/reference_model_ops.json"); + + std::ostringstream buf; + buf << file.rdbuf(); + auto root = boost::json::parse(buf.str()).as_object(); + + auto& models = root.at("models").as_object(); + BOOST_REQUIRE_MESSAGE(models.size() > 0, "Golden file contains no models"); + + const auto& allowed = CSupportedOperations::ALLOWED_OPERATIONS; + const auto& forbidden = CSupportedOperations::FORBIDDEN_OPERATIONS; + + for (const auto & [ arch, entry ] : models) { + const auto& info = entry.as_object(); + const auto& ops = info.at("ops").as_array(); + std::string modelId{info.at("model_id").as_string()}; + + for (const auto& opVal : ops) { + std::string op{opVal.as_string()}; + + BOOST_CHECK_MESSAGE(forbidden.count(op) == 0, + arch << " (" << modelId << "): op " << op << " is in FORBIDDEN_OPERATIONS — a legitimate model " + << "should not use forbidden ops"); + + BOOST_CHECK_MESSAGE(allowed.count(op) == 1, + arch << " (" << modelId << "): op " << op << " is not in ALLOWED_OPERATIONS — update the allowlist " + << "or check if this op was introduced by a PyTorch upgrade"); + } + } +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/bin/pytorch_inference/unittest/CResultWriterTest.cc b/bin/pytorch_inference/unittest/CResultWriterTest.cc index 97b99038a..7803bbc39 100644 --- a/bin/pytorch_inference/unittest/CResultWriterTest.cc +++ b/bin/pytorch_inference/unittest/CResultWriterTest.cc @@ -9,9 +9,9 @@ * limitation. */ -#include "../CResultWriter.h" +#include -#include "../CThreadSettings.h" +#include #include #include diff --git a/bin/pytorch_inference/unittest/CThreadSettingsTest.cc b/bin/pytorch_inference/unittest/CThreadSettingsTest.cc index 8ab8d03d2..759affb02 100644 --- a/bin/pytorch_inference/unittest/CThreadSettingsTest.cc +++ b/bin/pytorch_inference/unittest/CThreadSettingsTest.cc @@ -9,7 +9,7 @@ * limitation. */ -#include "../CThreadSettings.h" +#include #include diff --git a/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_conditional.pt b/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_conditional.pt new file mode 100644 index 0000000000000000000000000000000000000000..114707e6a7fab8d3ab35ec81472020aba354cdc2 GIT binary patch literal 2205 zcmWIWW@cev;NW1u0CEg047rIpnaP>?rN!~d`FSasC7Jnoi8=Zyi6x181=%@nP7DkU zOv&-_CHY0k8S(L4&Im=mFr8e544RF#8WA8HN{SLQ^D^_&3mH2j#DM(x%;Na8(wv<5 zq{QUx^2DN)_>BDg>_R5L(xgIWy#Q}^juZRlpNR&l1mOUnQ$>JI#dNDWg8&YvCg-Q5 z>cbtVS5WEZ#KRDU1akWf`Iy9@HF>?nmYWO_zYE^HTgW1o{ZQQR`Mf^UWBn^O zcw0W@mXPQym^kT1gy7p3%WhxZxIKJP^yBv9zaM4_PLGvWa(Q$#P{?4`w*>Pd9KTB6_KsvQ*fma7g@c z<@#rgVV%w^=Sbh*0}L8=4uLHV(~kjz$%_$RP#H2v6B|@|DXB@N>G9x*7Dov*l<@QI zzTzOzX8n0-+R?=?*7i%z@vRnUDUF=LYCp}0D>sdCN>tA{x|K*wf z2lfu7ob`CVMIIGL6AE>@{zMI>uYP+ak zcAi>EOM5_uRcP?#6??c$*EiR_JRMn-f2GZ7@$9+k{Q2|VzEr;TyJ%v;@tf7XUdB2m zm$lM0jB+d185aM`(f;;%TWb2Aw6{Osot|=CRclRgO6=KQ$J`qQQ)3pS&EMcQRnlks z%m`d~Z5SvC#< z?Z0=8=ghy#aG;sfCy}jK!TBXhKrjGEx)XKo!h%7`!F{s1%xQp z$94c?-+~E}0vdq1E=WBpc9v5XS8W9R>v)A)p5;R`rWhi!y;lXH_Dotc}l1adVQv8z2T| zW~LVAhUS)*Mn*=)<_4BV#>NI_7G{Q~rUoWv#>U2`20+@v5ahyYmfvqQfUX1K0B=T6 zV8Kg%o<5JfX`Nu-Wq&T1eJyIB|oHM*h5iQ5px&}+aXPo$xUA`RXD z$mv%Z#rPY{xQ$23FX#p$CrMcp14Dqx6T5q%nE>4=y^M!{1wx?#wPKnKOJ zFjidd!k#n(yxG`bCde`C!p&lbvS73`IH55x`~aE{0ziFWDQjp&4vapa7%M2>Ft7t5 INIgU?0Cu6J4*&oF literal 0 HcmV?d00001 diff --git a/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_file_reader.pt b/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_file_reader.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb0b26f4691f58d71846fc580f4db24c2022b5fd GIT binary patch literal 2141 zcmWIWW@cev;NW1u0CEg047rIpnaP>?rN!}SnK`NPMX8A?sYUuJi6x181=%@nP7DkU zOv&-_CHY0k8S(L4Za_srU^Tw^DXBSJg$$aFwHgs1!%K=1GxIX@(hC_oBgBCG_)MUg zr8znANr}nX<%vZp@frE~*@aAgrAdX%dI8?-9LFQiGKT?Gf^Y!Pl_Ee_VmecuK@^88 zlYvgqhr3O$pwi7rkRb{k#P&O$K4iet_WobjgUsgISLM1dF3#q9JUE8sFoO&w2z~udd%bZG zc$@Of?hJd(J0lKprJl?8-X7rK+Na^3*drk>l(al4s(MmMIKN%kyI-&W2-Z8qbf{Sw zh`ELaE`K%W^9%KQ$>iO~FP{vXGf(!&@0Xr7widU%AFq9TSoZ9qC9N~RO>(?_$gD`` z&?2+#Gfyfzh4LKQ9Vn6Px;2vfy1DpcjSF=~QF}@pw}nNtKE80xZOY+8S2k~%z_mEE zFYJSw@~*St?20GZW&Yp45y4%e|MPS2fdzACXB`xNQKu9=VZM;mqZz{L$`-47)`+x3 zu0EKa6>Gw~e~M`SJNwk&NYRW9LMNV2-6D4K)4CG3j`+k@waqim8L76ec|PUbD;ZJV z+0}m!znU*-@G=wu}7~-&?EVZaOGe6JGNf&N3N;<*CG3Ek!@8Z1lP$rZf~S{fM{8Jine8W|fK7#kQ_m>C!t7+P8y7@C*^rHnx?G^+A{ zq5*Ur2nTpGf&vR(awC_Pav%v5fWI7vn1v``kqZ=c6mymVdDzXuC~eUVMNYwnD2AQ~ zrfDJ#MU+hF{zp!~$|%NfX2xwiQl>yR5IIT8q8KOwOrF@?3(W-RMj@vxc@(22;WP@K zqR|aQP6RqAhKaG_au@cb8Q{&v1~Wm9Sr={=JCp^Zoxur>fnf*Gd=LQY155peDgZ_w RP>dCnZy4Bt5TqWW765rTe&zrG literal 0 HcmV?d00001 diff --git a/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_file_reader_in_submodule.pt b/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_file_reader_in_submodule.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d6f6328b7d69658543f879c26f1b817cbc23e66 GIT binary patch literal 2488 zcmWIWW@cev;NW1u0NM=Z47rIpnaP>?rN!}SnK`NPMX8A?sYUUbdGW=iNxAtcr8%kk zDTyVCdIi}zZcfGwQKW+grsVkelKi6NjQDsiH=rd!U^_hXf?+mr6*6cx)@nq6oLEwn zn3z3g)AV$GZ;HFnEXnU3R!`eO)tQkokK0hV9`{dAs`$83?va?AYq1*2?HlFfs~w| zlBy5)r(Qv&n-eP}wDt!0dLJ=p^ zYMDn%1^2$yJh`WTPvzc+_YTiB`xIHE`10PqGk0Gc$w~P+f&ZEqLx-@s-6@5W%7^&A z-ATH_0g@Y-+a0y=VrQGlDnG2Q?|#xra;KTqP*kBWsxnL@4OK<%+yrx;7BUC z!rx(9r@mWui{M1nB}`8>ET_gvm3sXz?bqsJ=+oWf^;*_&$F9irxo+3;%O~7<^>iy! zq8j(jJ(a@RZ;(Wg(-q>p93 zPL7+k{y3g?3i0TF+JVI0PUFP!Zd#K{V0y_D3X()4(6`V_{N zK2!UA^RE~Pv`u~^ma#**?_!bXQ8PXDB}}esGcsqgEm2*uQ$f2_?Z|_gAIV3zEx-3? zd-nF&(xrVm{m0Mmd4I+?WFKGCgD1z8>~&rojV$!47wg{qK0dRsqNGlzc}t1Kghb17 z&BDOs$tKOe;#XQ-)tFVe>6Sm&o39JXCf#jre_p4vxagb0$AuSkLg%lUw7oMtm20(B z=&oCxPnSGdH}TJ<6($Fxgx5+q24)_fdCY6$w34Lhy~;bE*h;QH^I)IGd+CrH#ww@h zNjKjTs5gySpjN-ZJydc@tg6N1!$FtxRs_Dt*)!?#r@xnvzVq4a#w+_(>}Ai4JQb;# zk2hB?`e|BMwf*_FyGEZPxfpkg|9PsmSULSa%dzR+dRg123&zZju=B59o*^dSknps7 z_Y=Q=YB{!z0!kb<^Br=Zn$#;8{?7QU6?NO^y4jsm zH%zUc9(kscmCUeXgTdCH?(u6*I&MiU-r~9MN0U=(;%VR8D$Av%l;dx`5d)c49v_- zEzAwgEiH|VjEv0#dHx?qD+To9CUM$lco-exs!-97oH{1O-4>qmMA8R z0rLS7;ekD&2Y9ow!5k{btP3}o9m;~y^T27Cfnf(I(*XfcA6P1k6T%0kFrXMKSeBiE KodKjCq80$kd=vZt literal 0 HcmV?d00001 diff --git a/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_heap_leak.pt b/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_heap_leak.pt new file mode 100644 index 0000000000000000000000000000000000000000..3458ab76a4d5f7e16a372d7f7f9a4259e5b88c23 GIT binary patch literal 4623 zcmbVP2UJtp)(%BN7my}|4$?!1L=dC|>AfR_9!e0B&;%)>6e%hrO^S@P0TJmS5v(B_Pwg9#zY`{!bmZFMBjvyxeiQ}j zuJ@>xF|jaf9Ger9IQ_OjxTD5{bN$G9vbZqy^D=d|4+T>(+dPt&>e+dR$R|>1C3Klg zm86LjNx)BMJtxyQg6z2@R-B<+&gd_@Tbq(tzdPcQY5voGl{c1eR8FI=sR&q^DzWm2 z<;Fg`m9)(g3n|!GPTV%OJcz*Xy^!vWQ+hjmt>Nv+&7wWo%nBB3v*}a9o&1qNYm?qk zHnk?F2&qIm0oH9&wNZh?SW-jsFsoI4*n4l+Ni{poNPz{i&a#cwShHBc;*j!qMH?zv z2IXht%6Ag4VAij2gKdcQD`3$EeuwpKIXU{L+uj|RdpuVhaJyZmzB{iUg})^#Z!c@; zP>r;k%Im&cefH(W2>sa3A+15vg%aTVhc=~Py1qqSsSqEcw}ptuJDNGy#wa<`I;)?a zA0eua!aKUVSIGM5MBY5nZMrdSqr zEzOSp9DiJWI0u<_g#+VtmvonpRl1$pmk=>RcGWkUz?`vkXYUu8H#?;J!j_C>2Db|z z7#$Lij4x{e8(FJAb%bG6<+^BUyr8SLG+hp3f=BGe=i^u&JTA{R7MPv5ljJKi9+ru& z3VcCRBj zxxi+k<0@aru7w7Z007+-|EbC)0K)&S%E2zKPW}k^ABU5`$@Tu@5fGZ{g#=kLbp-}A zh9Mc2JQQy`hs=%lOw9ZSui)rPe)b~dV9RL6t<@`MQ! zU1VWytOU2QjcPj|Ij_4g69YpO0MEnIG^mpzR7%Ha##ykmn$U>(;D4!S?Qkc2pnhoz z^RfQ-A~%NpL?41T3iuN zot|dcQ@nrotdWgU9y-HDZ%K}%Pt)(Q*1 zc#3)8{Cyf(^-CFcsr~qL;O3j68L)@^7F8J;`h$GCFFq@U<*T`6OV{_raiUi7pQ_VD zM`$%fl6IWpwsp`eucT_MUL_#M!O7C~>2~(_drCu{**Xy5W{ zv2q3MP|B{q+SgnZKgxSm``zX;cYe1y$FIekzv$d{zNVPG)7QqVplOogyB{D@YR=4S zDv6x78d6&Xwjca_6Av+lgBtp##hbF7pVO5CfX)a2X~y!neUnbg9$**&y!hA{Iy)ie68SG_IEow*jn2?@+_ zDZj}vF+~He_+b00Io*thDQ3315+zqJrrg>R?S~)9-m2~FMWe(oT-u3|Uq#f__?TwE z4qQjU?@HcTqkW%ZD;FHD9*|F@bn0_1s<~b2Kp)gA)>-vO>sh}G5Pz{fjlA}NO#sZ! z(a6`UJTx=>aYSehT)-JcK_w%Vdaf_8H+zP&sX!buJ@rmDRWh^D#m(al$*sjE}LRH+9GLwDcsfx_!9Kr>Bih8Z0#UPP-eO5qO|EtcEQB$;w9v8svO z;v%_m3pezzMZ*cn4TVU{L*$hr5Q8#wmbj;Mj{7egSZMHSr57^V&#_oU*EI`aufjG~ zHSxGukp6J^GsWPPic%Ws)h$+^d$}^r8!NsAD_hNzc{F~{en(;XK-%|xWk4Zq78(sQzGO*;NX-HtUMPF-hUYus7jl& zsbfe!jL{kds{)BoMM2d&ep~HgTi=Y%jgNtiBD1;Uc~1UoZSfh5y>rX+v&# z)E?|2`9@l9ux|BHO*~u;xoBj1V=tG>9)`q8>hE0wo<=+5Qr-+9F6YYg#IFNzD%p&* zxLgJ@5?AEBZ(B!pYz*{J%M8xw{k8~oJI`mnh!)4PIC7hZEXrhz48v5aER;VpJ4$xr zK^1h)yfbnp)W27B`kX<)ElJJ@fp%J2ODveiScXD^Y}UTrfOZ#{oseNAwL?;rgugiXV8(8wY>2?j zxVc-jr`oz8*QQD?@Or*(oCt({2$`X1lJHcZFx0&mvJ8n)q3_okUxHF$_NZ`BUv$LPecTgSTDmit)Y2qfYTcU2FtePMIA3{2d1+RgHZcCZz|amS%e&l+ z=ccoBhV+JxE6k%nitx2rehXDIE=&c5=~J2mYMd>@9-ZMloXd%FApSko%`d6zhGhMP z>sJ^|p9(x`X7CQ5=j~HW6zu5Wo;yqXg=sX9JaF&Do-&q|ja2RxcfRBh_bYcDv)?G1 z@yW>Owo|P|T%B|SXsa`$0}ABZN1EydQR9mgoFOd7-HcNBulKYFB1Qeo*KX!yUB&a$ zZsrQc&+&>M=Bv2+m;FlwvG5;y$y8hWr58A~Q{XfJlG zKj$C<;5~5%z{j#y0^^GIAV}U|M?b=Dz&%_v)j0plhDgfG%Rv;R6rfOPX=xb+NvO1p zjHDa{A}u8ek&}~@hCrmG%_*d3dZF46I3{iVFW)w`hMh403H(&1l` z{?^Ce_S6F3q2v>0Cn5bqE&mAlZDLD&2N_35w0{NpcWeIRmw%hNJl}y^(fkbjn@af+ z?%Pyk{|@fePvHJc{vUz9%_olUfJ|sl=Ji(|{|NQ%j~Dq4YV0RaC#R#4E*Y8FpYL;$ wi2sEI{PXGm?{a^n761@*`s)JWy+4*p>A%j22sb0$Q^%D801~ca|F6FP2a29Z`2YX_ literal 0 HcmV?d00001 diff --git a/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_hidden_in_submodule.pt b/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_hidden_in_submodule.pt new file mode 100644 index 0000000000000000000000000000000000000000..39104c647ef007579fe16960f0521e914b29944c GIT binary patch literal 2517 zcmWIWW@cev;NW1u0BQ_247rIpnaP>?rN!|XnJFo$dGVQf@x`S{x%nxjIjQ<7i6x18 z1=%@nP8JMNWP%2!(!7wYh3K=vTYc(Q3&MPTO%*@NoOD|;X zj1U9z<1>rn(@Jx4;*%1Sv&$2UQsOi6^Ro+?{32L^vYB~#sYQj%NQTFIf`qsVSwJRc zFm`4DRRX=vkdvC2R>+F1&<7~ORmg@=SDIAF4#XTl%&8aP&CX%c_5JH6pk5FT0EVCl zFa$A!P@ln&KnN!1r=;q`{jXP0>E@(MT2Stta60R-fq?7xpIwn&4}LN8yxi3|P48B$ zMstp5Z--9nMAy0d*UP;-Eb=NgOz8W$$LA#1a4^dCOgSZc$NP=D{^HO#D{c1NW>kn{ z=?wJkVp4kZ@JHAcyR5yF)P#;ne&>_@a_@Fb3e&3$-ET=2^&FEbKK+~X=lSWBJ;L8+ za2KyH^b0f+Q1?H~x$&s;v`MN{+1_ey-@IVWzt}J86_ceHp7F&^<(_A8=dS+!br#n} zwwY{C-jb7-@XU4n@9z`Dx1`GB~?}xhq7p~4Qf1Wz^LUv5Q=gkG7 zoAVU47K;mQAI% zT!o8~fkBBGU%Xf{@RJ%ZdMT+%rRnkD#LkTzL#2Jb`Ij98+OC%c9bo12@Q`tc(mT<3 ztyNl|8F{e)H$!bbk&- zdAswI7Ms}H``SN?zkKuM^2s)~R(93vJ*Ip$3XI!!UHz7OtLfV4zq>DlrUYE6F0ovy z>2>)P=iU11uXEZ?otKO4<=MEXy~H#AQtFDPH#es(xTv>f3A5I-phbOQlLLB?SErcc2M(qoc)Q`H+?Wp&lRT;1@?@7tGUZ$G`iQE>CNQ|a+l zy9Fl{-TiwqcTV}9KdJ_MA_@Y(T1Or1<-NN1MRiLV8%syl(UVr1n^qj9|y3cmeXZa3~d7s@_`TGOg0mlcEuGblhyqD>T zDcYuY?UZ;u_h*Y631wz8gqO>$Q9rI%DHk()ZPklM^`JB$$D(wq510mmm>_8Yd+Dal zAW1L{I7|1sw_#(FG@)*BHI3(%#xCvRFE55Z68ar z0qqB2+-@*okRT8OdZ21Wzc{rh6Il6FC6)j~BR(_5%}E4nfEbvWnOc||np;{L85tRy z8(10{8ylFL8kiYdSX!D{nwpy%7#W*cSb$vky)XH@2GDgN9N^6e3M_cdhg_nofh14> zarGd?OhjFPTpH=4nCApcpZLwhs2R`=M^5IJD2CqxdW=NF5ycyNxFDx%Z4_(n0n<1k zYmjmfy1~dvRu#qI5Mcd{-(YAaK{paP1*)SMd5NHr@RW~kC~~4PK`}H8n9}jP6MIq) z@MdGvftn=8tP3}j9m;~y)!+om!0-c0N5KxR2EX&To&Hz#mQ40Vb CvmBHF literal 0 HcmV?d00001 diff --git a/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_many_unrecognised.pt b/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_many_unrecognised.pt new file mode 100644 index 0000000000000000000000000000000000000000..68639503af8672b4467a0c38dd34e3a3e0802203 GIT binary patch literal 2311 zcmWIWW@cev;NW1u04fZ247rIpnaP>?rN!~NiFuXrrFliE$@%GdnZ>Co`YDMeiFyUu zIc`o?3{hl*2Bzfr_>%mhSW=XjnU|TDUdY%P zAqM2fXBNk&mFDEcCnY9lmnRmb#AoE^XBRU0l_nK3>jikTb4b5)pB4pF3BmzDpNar| zis@Bd25!7QP0mkA)rY%Mub|S+i5=$U+`fZZhYfhz-q&);3ZFQ2uJJ9em;UXQJj%C= zvR_3y7DasfR=nw2zzatOyT9_kVx2-Q68`-?PG<&`}CKie(fAjyv8_F2r_G7^s9os#7?q4h1o$z+c%j({p(Z6_avB|!c-NnYe zZAt!wX)pQCW-oqG`#t9{V~%~p>c{_87aDo5d-skX6lB-;u)jPD46YnTd_iW;z(-n; z>7}G5m8Qpo(+d|Y@PMf%wC{A@We1VA<2#!)t;IGte7Y2P>x@`vgNO>()q@I6+-#gW z=`}vfd~(jKD&1AwID_~7fo0u#zpQFI_qka6tf2ZV9)Aao;zb>nkWoC6#Eq;4?UB%{JV`ceU7j;3&;9@MF#}r@^@MVG|1MFplCW9z}WRO=} zl9*Rg3@IoCks=;iVl*&mf^%xFK4&Yi;6Sz^W2=&F>S(L zL>Y5|eLF8b6zEX~5XNo0DgoP*DoaxHi&9dHNU%RAv!o;^7379i+sBe@K>I-$w;PNY zr0|A-9;jH>FHSAW1Qy*@i6y|$h|f%Ma}vW5AO>b;rWWRg=9ZR5Mn=Zw29`#~#s+2v z24?2Q#-^s0#%7jAmgYuAW*`@uiYPzV0J;u@1H2hQfd#KQkV{$>kOT@KrXGQqh$x|v z3nE<<(_R62_)NqouhET0PTb}wM#};{MUv5oat%F1kW;iKiX|$*)Q#T~q+Ek;>_w!c zs*Gan6avOVGY7hX$mvcM#Xuoe{O*LOcXT6>6N(Xvk<$nmi9PuSc(bwTK+Tb3MkEGy ps2GgS1t&=c1`ALg0s^2uu+&m$oewO}fMTp*S#}0?29SD)S^%&S$kzY> literal 0 HcmV?d00001 diff --git a/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_mixed_file_reader.pt b/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_mixed_file_reader.pt new file mode 100644 index 0000000000000000000000000000000000000000..78b8c47c43c1168d40dac2c9bd03dc71b2c3a148 GIT binary patch literal 2311 zcmWIWW@cev;NW1u04fZ247rIpnaP>?rN!~NnH8xi@oAYksqsari7BZ?`YDMeiFyUu zIc`o?3{hl*2Bzfr_>%mhq7QebUO}as6C2dexqXh=hYfgI-~Sa|w47y6*>0(AkMr&( zya{UyNUAg}U8`hm@^=0F$-NsL^_^_?zJK|3Vo_V{j2pJ?C-QyzO4a+4gP%+6+^WrR zL|9XH=Y}+wik82}7nLpk{(s3Lksy!x2@8cgH*2PRN$$B3SZ^`eO2&on>%S^3N1>yC zbLK=Im)rB=O{S51{_AF?Tl^ifqgj1Z8PvGn&)Jm0UG8tcI#zFbRMtcDCo#s7Gj6%= z|Mx2HX~({0X0LUp*Ue@xDvFb?n0SOu?%jUIr$$-7CYKr(Y@a{RB)x4Vj^-s!<_o2k=~pE@Z_G7WpSCnFEK*a&?Kh8Z`m$5q&r0gOdT%^@ z^mB7_K<_1X+m&v+uWj8v??UW~V=F)O)S9l~J$%Suv8D4&Y3&^c@8lsMN!^tCW1HT+K4bEDGtYhE$17&IvMG?XYO&#)3b ziYsF-kk2p9OAiHl zngN7y+pbE$_N2;^)chh~*+_!@IhiFTIjJBwwAwzFWCPj{!noaF#2|$?1oS|~vVL)D zQ6{kHu1YKchDLm5ikp)djsP(*Gc&a?H#E1jG%_+WHaD;|GB!3aHn%V^ur#(bGdHj> zH!`p^Ffj$WaHgc8vU=05SCl#6&~^ja&%nqL}s?$irtM zMsba9G;%^WM=@Fv=qZwnMwDsjA%dKuHBl^)0H$vImLTOIbYqc|sxpePQwSIf%^c_k zBBwi56a$4=@w*eA-qDRjPAEnwMouGOB=+PR;LXOS12spE8Ic&+p<*yP7n~#+7%V_} d2nc}sz*0-0RX(t61B$VNW!V|n89?eGY5`^-$|3*& literal 0 HcmV?d00001 diff --git a/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_rop_exploit.pt b/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_rop_exploit.pt new file mode 100644 index 0000000000000000000000000000000000000000..08beafc14cef417a5abf4b2585832b5d09dc427b GIT binary patch literal 6109 zcmb7|2T+sQ7RM<{Qv~T9fzUfaAXKFn=^&jzsDTt9^tOtj_f8;yNRuu|N0cHUNE4(Z z2uMdME=tu0>)U;~T2b%P3Bih@rCrX z2#J_vcnWTx?L41)XDX@ARQ^>aHxBhrRN9?M5dK4(kei>X;#C~;A4j&S7IIdC{OPd? zIY{FUB!a4>2ai|p{(6bZ-D?e@H(#_^W91SM;CyZ29myqolU@=HXDW>4p zd3?t?fiBbW^E0KdI_C%Ujd5j%Qb~M#(CSlZR)-G=#`X=8$n3w`%*p5Ia&M0fr#6r8 zQ%)uJ6qV?yf8O(~tThHB~i%Ig% zL=?TUlL0-c6|ori0;b#}`7(0)sG*PSs{M14n|jKWR>_s+xkhb^QnfE^u(m6=Qa&&5 z(nYYp^u_MV#&bOA4A&5ty1(Y{c1V8Jq!Nvv?YWs2tvg`uv z;#if!42PHX#`~fP8p@Ri2b@?cHtM~)^1)K@>zn2w?wDJOVpcN*b-f2R6tn_isxxBv zg$gfAO}sNY^}0X#;6CfgrJC49`P3WC=Xe$FEMr}wE`;~$SDph`$ZA!|KG}S1$D|3E8IXSq?x3fFS^yb6Z*4^RgQ$Z>TWqIK)9BW{?F-mu>*p+A}K9CJz$OI(@Mkx*ekxLwlq zsl?VXfqhg)=h!3~KJ*XIo7|p)4(s+cmmgWPyv$?Tq-BaE=MC2s}m(WMXDo!5Hk}YK8U(O)mGiuM~0epftCOf?1u!vDgc*M!cq$)(39+b-!v{ zZkt|Yt|Vyr$e&1E*4lj-TKa|nv3&d3#G`tVa`OP7J0CTO#eh_Mn&zEFB%-QCV`8T! z4TNok5K`f^9HSJkUXXTT*CJ6AsD5V2CjRj*JR;h7q}}tGMT3kS_0bkr-fTck+3>Y$ zK>^-}xqBTB*F#zp;Wt;!7pD}_BB?|%jWlV32xTH)$*zonKy8~>eJ2QJOy20OgS<8o zY-u^>LOo)-IgU2i;l1^2ayG-DRV>A*m%S1tvN>ZC_4qTAX|>)UAbc-&;=(4#yMc4H zbt_zlb)iVq94|y2l_O#w<$D9SiB=?TDdZJk0Bysun)z_-%c*)vf87nK&v=jU2|uI= z2ko0*yFaW?Q+m?+_+XL7dclK)cZB*T1qAaF0pzyymd)+3k=_$@$}=Q3pnA3;V66w? z=Ct}+u8VJUU|`5$a8)rDl`a=O>p)~fxYwYUMNLb4SjquekDg6bUxyC@hy220_=gr! z9y}|5tCCsW0sTmX^?#>cbTW~ruPycY5-V2tt;Uw*_Uo-ZBTFPiwL?KJW;9Z`uE{J8 zpb~DP-LNyYw0P7{X{VL}vt{Y1mW0vuX;$HBF%8i&-K)HDVAnJm?K&jqL%_(J#|@UpCnkJZ(qAuUr(tl=6nNW26$w zYyd&eS(tRR*9&;J3#QlP(vwa7?PoiU;uzi5?3mv(6HdErjM2R}JX~B@)r1r75hq1o ziLuYDb|Kj$25MxtjmByl(lzF*Ca)8j<&@U@AHln~UbNc=l+$H+IR&WA;Anr==-RUYa#slM_YREQD_ZLBcjVZ8$V_@r zVb(p3GL0Rpv1S!^Y)y8Y$eYa=KdzWk-TbqnsZe)+^V})O^o-A*{Mk;W&w5$Z99-#3mK*FYd0Hd z-?9;hLmpr3RxF${3mju0hQ0Q^N{|5;Uz+NMag953<(tV|eVM5bo0Up|MHG(45>k|? zVpp2ZsF>cEcui=UMAWs>P)VQhpiQK_7SN>1@Mq57b9rQCUQOoME|{9TPS zi<8^g)%|t-vD?Z^?>(I%Q_hUm4!4#*O+8QL>S~?lx%o&a8dGq^+hV`_%D!x_r?M1x z3u?pO4Eaf28XA^t7c|}yBaO649+kzakIZ=vvoOiT?Hl`t&y&>Tx5<@ad+7&Mc{&>D zd-ms@VY$8n(z~xWZM#5W$w3yup>CwCmY|+36CD-8l{z{am#qh?dHEf_M0oDpE_sCg^?kn8l%LNyV z=fij;XQP_#LUD@)k~FcQXvLf4{+KB+6rV=<3*A~#B9x>le%=KrfprBMa=ef+E0%`l z(~ZA#+qNo!o zljdZ*y7bi+S?Cd23?RvU)&m|ji1Z>n!Y^u7ql&&f}rz zdT>EOD-@g(>7mcYr;_@R<~k%ZL&z}#14+veT#0Ccu+(P?(?sCyS@7p1j1#z%ZH*QT z0}5)}Nf%w*jY#4zL_e3RscR3!nIJ!k_0=V*LNhY$#!<-=gAIG%mkx|7%iodqG~rn= z;q#Bq!y9@_@NmqAt&g2JQ!Ugw1nIovn$krPcv*;JpV#OJw;<+b0NbmCmuQaMZ@h|eoe(pz+hJ;kQ^Vc@AI>qT*s{0vVb%gyn8&7eMR z&C%}y7_c|Z?g3T2epML+^o}wZ`4?nxEr`~w8)D^(YiOm8OZu!Aw|oGr0UjE<>3Saw zGsh3;T=%M*B~jB$s5FE21F_9^vVOZTboQ)AI#OiF8zniBIQ!wQNSLHyhfmY8yUsR8 z(+vKx-eA%Wptm&+8%w?TYtQVb0{+kWSdInpUPf6d!NQDaR9?cqh4~9-`ra&AH^x3c z3L2`K-O6Q8wuUjQL+zB0qCrVR0dK|oE>a-a59>KxIcK}qnE_f)q-YCGc@~ZVFIz=u zm)VIG*g!pr&1gfc{16-rIP@W-dSEHApesB78FUKl{UyMyUAS6P zrOyVuAr=KKLFWS)pEOm2xfVo_&-@ul#)i#CZxie0nCFJ{zmWHJk9Bq^4$ z@{{W`rl`E;_d2m`SDn{!W>R0LYUxl@r)o7vmx|vZ2>n=1BaB&XpCVXwFWlOf8F~+` zYw1VfK<;$8cvMR-Eb!44I3s5{plO!yW(pfIGbha7%VW9={yF_oAOxEUm^iQz?BgQt zeBvHv*n$>Zx~fBl8p;PK?1Y5X&_^qI5d)XI3u<9IUjONGuoEe|)|nGNbBS{e}X>wCXFNRgpMhe8sEyCBud0@N>` zdsHiyQ{n1!-v*h4kI|v)uCBN|BFbNX&PdWkOr=Qv##6($I8d*!*pGX=v^s}v+=8ej zp`r_=>2Q%6X6;H%^3g~{rm9aKykM^{%%H3%9+lusG4Md8HcFyewKl$$lqr3j%RTiV zifR)uTPWRF1rgExVA6}P%yKnevzledI~A)3XEEsIO?`4dX(IAO>BKM!L5zI(P8;xc z%tmmq{39UneoeeomSM@GY0YjLV?Jhq6&PA;qmVNmwF!&9P5A5JfP`yU=UFAdx}P){An9n zd-eml`!2cpT7vF)T(VZ1?FKX~$`BO`yU&=YTbH$`>m!nPOjtgKV}oz)X)(-37`$a# zDdc|$F)D-9Tbs$-g{ROH+#27pm?*?lw`(I(}VkiG2q$43#)_M z1M7vepZJnCu_ZMY9kBq`(YK(205=t*Y8TnIb>bvl=#D_&rRps}uDdAIQ(wolgM;wP&{0nON)~)uo&o5k3a-9#J3U*mP88muKg@>(V zKNKdFS8CQ(47je9cW+b&(K4R;8cAmo#eYfPI?9JtM?1+O2fNiXiZLw9EOaxu&X~W1 zozvR0b$TI`yd%|1K=y_AMpOh^!R=_?MdImWm)8nbucca?!q=#*Anzv;hYxEdF7d`` zRD(N^t^|_NtdTv15t_bP&*?x``Ai0KqaN>cc(15O=onTBD6>$JXN%F`C%iTY-TiT?Y(;rIUG4j%7sZWmmb*yPNJJM-NBNeMHK=ueb*J9wa=NQA1Q z;QyxqNOB&BS2<3`cxNY{&vUQs@8iJw)6*e-o(@P4dk2sI`+OMG(-Y=!t|3%ziJsu> z{PX#94FWjVei8wpb6Zjv<=_E5GcEmXJcRuyBaL= 3.10)" +) + +if(NOT _python_path) + _validation_fail( + "No Python 3 interpreter found on PATH.\n" + "Install Python 3 or ensure it is on your PATH.") +endif() + +# Verify it is actually Python 3 (guards against "python" being Python 2). +execute_process( + COMMAND "${_python_path}" --version + OUTPUT_VARIABLE _py_version_out + ERROR_VARIABLE _py_version_out + RESULT_VARIABLE _py_rc + OUTPUT_STRIP_TRAILING_WHITESPACE +) +if(NOT _py_rc EQUAL 0 OR NOT _py_version_out MATCHES "Python 3\\.") + _validation_fail( + "Found ${_python_path} but it is not Python 3 (${_py_version_out}).") +endif() +message(STATUS "Found Python 3: ${_python_path} (${_py_version_out})") + +# --- Platform-specific venv paths --- +if(CMAKE_HOST_WIN32) + set(_venv_python "${_venv_dir}/Scripts/python.exe") + set(_venv_pip "${_venv_dir}/Scripts/pip.exe") +else() + set(_venv_python "${_venv_dir}/bin/python3") + set(_venv_pip "${_venv_dir}/bin/pip") +endif() + +# --- Create virtual environment if it does not exist --- +if(NOT EXISTS "${_venv_python}") + message(STATUS "Creating virtual environment in ${_venv_dir} ...") + execute_process( + COMMAND "${_python_path}" -m venv "${_venv_dir}" + RESULT_VARIABLE _venv_rc + ) + if(NOT _venv_rc EQUAL 0) + _validation_fail("Failed to create virtual environment (exit ${_venv_rc})") + endif() +endif() + +# --- Install / update dependencies when requirements.txt is newer --- +set(_stamp "${_venv_dir}/.requirements.stamp") +set(_needs_install FALSE) + +if(NOT EXISTS "${_stamp}") + set(_needs_install TRUE) +else() + file(TIMESTAMP "${_requirements}" _req_ts "%Y%m%d%H%M%S" UTC) + file(TIMESTAMP "${_stamp}" _stamp_ts "%Y%m%d%H%M%S" UTC) + if(_req_ts STRGREATER _stamp_ts) + set(_needs_install TRUE) + endif() +endif() + +if(_needs_install) + message(STATUS "Installing/updating Python dependencies ...") + execute_process( + COMMAND "${_venv_pip}" install --quiet --upgrade pip + RESULT_VARIABLE _pip_rc + ) + if(NOT _pip_rc EQUAL 0) + message(WARNING "pip upgrade failed (exit ${_pip_rc}) — continuing anyway") + endif() + + execute_process( + COMMAND "${_venv_pip}" install --quiet -r "${_requirements}" + RESULT_VARIABLE _pip_rc + ) + if(NOT _pip_rc EQUAL 0) + _validation_fail( + "Failed to install dependencies from ${_requirements} (exit ${_pip_rc}).\n" + "This may indicate no network access is available.") + endif() + + file(WRITE "${_stamp}" "installed") +endif() + +# --- Ensure the venv's torch libraries take precedence --- +# When a locally-built libtorch is installed in a system path (e.g. +# /usr/local/lib on macOS), the pip-installed torch package's +# libtorch_python will pick up the wrong libtorch_cpu at load time. +# Prepending the venv's torch/lib directory to the dynamic library +# search path forces the pip-bundled libraries to be found first. +if(CMAKE_HOST_WIN32) + set(_venv_site_packages "${_venv_dir}/Lib/site-packages") +else() + # Query the venv Python for its site-packages directory rather than + # globbing, which can yield a semicolon-separated list of paths. + execute_process( + COMMAND "${_venv_python}" -c "import sysconfig; print(sysconfig.get_path('purelib'))" + OUTPUT_VARIABLE _venv_site_packages + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE _sp_rc + ) + if(NOT _sp_rc EQUAL 0 OR _venv_site_packages STREQUAL "") + _validation_fail("Could not determine venv site-packages directory") + endif() +endif() +set(_torch_lib_dir "${_venv_site_packages}/torch/lib") + +if(EXISTS "${_torch_lib_dir}") + if(CMAKE_HOST_APPLE) + set(ENV{DYLD_LIBRARY_PATH} "${_torch_lib_dir}:$ENV{DYLD_LIBRARY_PATH}") + elseif(NOT CMAKE_HOST_WIN32) + set(ENV{LD_LIBRARY_PATH} "${_torch_lib_dir}:$ENV{LD_LIBRARY_PATH}") + endif() + message(STATUS "Prepended ${_torch_lib_dir} to dynamic library search path") +endif() + +# --- Build the command line for validate_allowlist.py --- +set(_cmd "${_venv_python}" "${_validate_script}") + +if(DEFINED VALIDATE_CONFIG) + list(APPEND _cmd "--config" "${VALIDATE_CONFIG}") +endif() + +if(DEFINED VALIDATE_PT_DIR) + list(APPEND _cmd "--pt-dir" "${VALIDATE_PT_DIR}") +endif() + +if(DEFINED VALIDATE_VERBOSE AND VALIDATE_VERBOSE) + list(APPEND _cmd "--verbose") +endif() + +message(STATUS "Running: ${_cmd}") + +execute_process( + COMMAND ${_cmd} + WORKING_DIRECTORY "${SOURCE_DIR}" + RESULT_VARIABLE _validate_rc +) + +if(NOT _validate_rc EQUAL 0) + _validation_fail("Validation failed (exit ${_validate_rc})") +endif() diff --git a/dev-tools/extract_model_ops/.gitignore b/dev-tools/extract_model_ops/.gitignore new file mode 100644 index 000000000..21d0b898f --- /dev/null +++ b/dev-tools/extract_model_ops/.gitignore @@ -0,0 +1 @@ +.venv/ diff --git a/dev-tools/extract_model_ops/README.md b/dev-tools/extract_model_ops/README.md new file mode 100644 index 000000000..f7b7f2f39 --- /dev/null +++ b/dev-tools/extract_model_ops/README.md @@ -0,0 +1,166 @@ +# extract_model_ops + +Developer tools for maintaining and validating the TorchScript operation +allowlist in `bin/pytorch_inference/CSupportedOperations.cc`. + +This directory contains two scripts that share the same Python environment: + +| Script | Purpose | +|---|---| +| `extract_model_ops.py` | Generate the C++ `ALLOWED_OPERATIONS` set from reference models | +| `validate_allowlist.py` | Verify the allowlist accepts all supported models (no false positives) | + +## Setup + +Create a Python virtual environment and install the dependencies: + +```bash +cd dev-tools/extract_model_ops +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +``` + +If any of the reference models are gated, set a HuggingFace token: + +```bash +export HF_TOKEN="hf_..." +``` + +## extract_model_ops.py + +Traces each model in `reference_models.json`, collects the TorchScript +operations from the inlined forward graph, and outputs the union as a +sorted list or a ready-to-paste C++ initializer. + +### When to run + +- A new transformer architecture is added to the supported set. +- The PyTorch (libtorch) version used by ml-cpp is upgraded. +- You need to inspect which operations a particular model uses. + +### Usage + +```bash +# Print the sorted union of all operations (default) +python3 extract_model_ops.py + +# Print a ready-to-paste C++ initializer list +python3 extract_model_ops.py --cpp + +# Also show per-model breakdowns +python3 extract_model_ops.py --per-model --cpp + +# Generate the golden file for the C++ allowlist drift test +python3 extract_model_ops.py --golden \ + ../../bin/pytorch_inference/unittest/testfiles/reference_model_ops.json + +# Use a custom config file +python3 extract_model_ops.py --config /path/to/models.json +``` + +## validate\_allowlist.py + +Parses `ALLOWED_OPERATIONS` and `FORBIDDEN_OPERATIONS` directly from +`CSupportedOperations.cc`, then traces every model in a config file and +checks that each model's operations are accepted. Exits non-zero if +any model would be rejected (a false positive). + +### When to run + +- After regenerating `ALLOWED_OPERATIONS` with `extract_model_ops.py`. +- After adding new models to `validation_models.json`. +- As a pre-merge check for any PR that touches the allowlist or the + graph validation logic. + +### Usage + +```bash +# Validate against the default set (validation_models.json) +python3 validate_allowlist.py + +# Validate with verbose per-model op counts +python3 validate_allowlist.py --verbose + +# Validate against a custom model set +python3 validate_allowlist.py --config /path/to/models.json +``` + +The script can also be run via the CMake `validate_pytorch_inference_models` +target, which automatically locates a Python 3 interpreter, creates a venv, +and installs dependencies — no manual setup required: + +```bash +cmake --build cmake-build-relwithdebinfo -t validate_pytorch_inference_models +``` + +The CMake target searches for `python3`, `python3.12`, `python3.11`, +`python3.10`, `python3.9`, and `python` (in that order), accepting the +first one that reports Python 3.x. This handles Linux build machines +where Python is only available as `python3.12` (via `make altinstall`) +as well as Windows where the canonical name is `python`. + +## Configuration files + +| File | Used by | Purpose | +|---|---|---| +| `reference_models.json` | `extract_model_ops.py` | Models whose ops form the allowlist | +| `validation_models.json` | `validate_allowlist.py` | Superset including task-specific models (NER, sentiment) from `bin/pytorch_inference/examples/` | + +Each file maps a short architecture name to a HuggingFace model identifier: + +```json +{ + "bert": "bert-base-uncased", + "roberta": "roberta-base" +} +``` + +To add a new architecture, append an entry to `reference_models.json`, +re-run `extract_model_ops.py --cpp`, and update `CSupportedOperations.cc`. +Then add the same entry (plus any task-specific variants) to +`validation_models.json` and run `validate_allowlist.py` to confirm +there are no false positives. Finally, regenerate the golden file +(see below). + +## Golden file for allowlist drift detection + +The C++ test `testAllowlistCoversReferenceModels` loads a golden JSON +file containing per-architecture op sets and verifies every op is in +`ALLOWED_OPERATIONS` and none are in `FORBIDDEN_OPERATIONS`. This +catches allowlist regressions in CI without requiring Python or network +access. + +The golden file lives at: +`bin/pytorch_inference/unittest/testfiles/reference_model_ops.json` + +### When to regenerate + +- After upgrading the PyTorch (libtorch) version. +- After adding or removing a supported architecture. +- After modifying `ALLOWED_OPERATIONS` or `FORBIDDEN_OPERATIONS`. + +### How to regenerate + +```bash +cd dev-tools/extract_model_ops +source .venv/bin/activate +python3 extract_model_ops.py --golden \ + ../../bin/pytorch_inference/unittest/testfiles/reference_model_ops.json +``` + +If the regenerated file introduces ops not in the allowlist, the C++ +test will fail until `CSupportedOperations.cc` is updated. + +## How it works + +1. Each reference model is loaded via `transformers.AutoModel` with + `torchscript=True` in the config. +2. The model is traced with `torch.jit.trace` using a short dummy input + (falls back to `torch.jit.script` if tracing fails). +3. All method calls in the forward graph are inlined via + `torch._C._jit_pass_inline` so that operations inside submodules + are visible. +4. Every node's operation name (`node.kind()`) is collected, recursing + into sub-blocks (e.g. inside `prim::If` / `prim::Loop` nodes). +5. The union across all models is reported. diff --git a/dev-tools/extract_model_ops/es_it_models/README.md b/dev-tools/extract_model_ops/es_it_models/README.md new file mode 100644 index 000000000..a3997d2ef --- /dev/null +++ b/dev-tools/extract_model_ops/es_it_models/README.md @@ -0,0 +1,41 @@ +# Elasticsearch Integration Test Models + +Pre-saved TorchScript `.pt` files extracted from the base64-encoded models +in the Elasticsearch Java integration tests. These are tiny synthetic models +(not real transformer architectures) used to test the `pytorch_inference` +loading and evaluation pipeline. + +| File | Source | Description | +|------|--------|-------------| +| `supersimple_pytorch_model_it.pt` | `PyTorchModelIT.java` | Returns `torch.ones` of shape `(batch, 2)` | +| `tiny_text_expansion.pt` | `TextExpansionQueryIT.java` | Sparse weight vector sized by max input ID | +| `tiny_text_embedding.pt` | `TextEmbeddingQueryIT.java` | Random 100-dim embedding seeded by input hash | + +## Regenerating + +If the Java test models change, re-extract them by running the generation +snippet from this repository's root: + +```bash +python3 -c " +import re, base64, os + +JAVA_DIR = '/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration' +OUTPUT_DIR = 'dev-tools/extract_model_ops/es_it_models' + +SOURCES = { + 'supersimple_pytorch_model_it.pt': ('PyTorchModelIT.java', 'BASE_64_ENCODED_MODEL'), + 'tiny_text_expansion.pt': ('TextExpansionQueryIT.java', 'BASE_64_ENCODED_MODEL'), + 'tiny_text_embedding.pt': ('TextEmbeddingQueryIT.java', 'BASE_64_ENCODED_MODEL'), +} +os.makedirs(OUTPUT_DIR, exist_ok=True) +for out_name, (java_file, var_name) in SOURCES.items(): + with open(os.path.join(JAVA_DIR, java_file)) as f: + src = f.read() + m = re.search(rf'{var_name}\s*=\s*(\".*?\");', src, re.DOTALL) + b64 = re.sub(r'\"\s*\+\s*\"', '', m.group(1)).strip('\"').replace('\n', '').replace(' ', '') + with open(os.path.join(OUTPUT_DIR, out_name), 'wb') as f: + f.write(base64.b64decode(b64)) + print(f'Wrote {out_name}') +" +``` diff --git a/dev-tools/extract_model_ops/es_it_models/supersimple_pytorch_model_it.pt b/dev-tools/extract_model_ops/es_it_models/supersimple_pytorch_model_it.pt new file mode 100644 index 0000000000000000000000000000000000000000..0eecbb1b3f93065391bf1b61feca58dfadd61aa6 GIT binary patch literal 1630 zcmWIWW@cev;NW1u03r;048@tb1v#m?`6;P6`YDMeiFyUuIc`o|3{h~vJ8$WZb$~YL?3RSUO}aslP!{C;I1N`QQBvieb_*t_4}VL<;gtj3NJey ze9(7ymg-F>fv#FF)=7SImWX}79v+dK*`9N${Qcv-Cs#>4oWT6);EsgAJ(uR_njL0y zIJF?c(&t2=m&C)8xRbL@reKE=8lZ70b5*b_Ilp@o0;t` za^S~=$tJ}z4?66G!q@zmCHqUI-2BI3-hV$1OYf-uSI3v_@-5Y<^#-fiy4&gHORB_P z@6em&FS5Tl>c#A3Ro{=U7W^b}^yCkfl>1vYSZ_C2uKM7|m8MYcrzY>bmj%?A3-?`J zCBJd5(>*b534K|sN2xV|W=Xk;J~L}IZ`Uw_Lf@A~qV^Lo=sw?a|B5ExdnFQomxts*D?Fj26ok-kbKIv!p@ndf>kjy*oYZ zo%1u|&ogB{T=(tLm&#xJ_83bn_tg9C+Ouv_jn7K)wL-ss&)D9f+@F49{qO#@ z+86iNRr$V(SHCe$Rk*}*A-AKN@0}2T)9w6MzFeAjS^Kgq`NKldaq=JFH%(T zl6O{`{=nt$oypho-f{?i2zl!h-*D>gws(G^?@pikea+5ck(AY|=MIaG_s!cLdR}%< zKxjC7W7yD6B##|upT%4C4 z$^^6-gaf=8K@_}9LM|ypKoTebTL}d*0a2nLr*>Hs-7P>KvI!XF3%XIr;jWHi6cfCTEu?7Nx{zGWnGz6*B7uc(ZeC2)VE!0;m#%1Ay)n0lE{@p|T9JDDF%K zx>g_VCcT16Hz!FX2g6+sVYK!eW*;^XX#M`DOIp{%d6U$owaK?tXB)QX%*{%SOfM12 z{a34do5f2r_I_>u_t^JaIqt;lxc@?XuaB(c<3sLWPpMT0ePv`QycXy`MT1KzW?|;_ zEvK*j>j=7{r<1y3Ve&WDq9(KI{<-gt`^)n3oq1eTKY#M$1+~EkE}5nrEYk5&eUiJE zN2Tv?L&dMu@)#}#y)%{zje`#NHWl1?bVQle=ljfUx0AD)Z@rdgzja~e(W#I3Kd@5u zaDAX3bnx-TyK#DQ5ROrL5AG+`=<;NrbIjfY;hy56$Bn#251|%hGe-G3WeR zs~Todh&KOU)AuIHP_x1{d8}YUcbwGFjDtis~1n4la>5(r)h6D z)^GWD!LjI~%JjB9huNLPPlis8k^aOP&!O#+H|xo^n{U@S zmR#hm1Lg;IjvXxB1{T1W{0ogu?D<#{BR2DjOA_-+iXoYq!uV@o(gf$#Tz$@PU_rtF z!l1m0-7Ufl)+i~fEVZaOGe6JG$(-C&WXuH$iHq~nLz#d!gK&U1BZz{RX~?CB2uK12 zU@PGuCLl^1!T^zWZzz;pEdZ_OH(&P9pk)sD07RlS~n`i{URqa!>Q zTUb<=S>N`LcaE^SyXsXV^SAf5_SSPl4G*2%ab5WCkI%V5dQ$H@ZGFTS|3O?c-EXRXcqCpp*6)%f}?kTa|Q z{+`7xes;5#{){h>6`gp@`0wO14-YTb_1Mcg@yF8@ch=~*Da~DaYQduku6#H5Im~;$ znu)*2>*<{Iqbj+2X2MJFA6dNX?n|x}W-qyCo<0!xrSrGv!Q*RIavA0dD?MjjlOoP> zDmKG-%aeD_OI~dH_)cZLqwN1QE{l%Lsrd?a`_#8`upqO|1*~9^RmBHx(2>mv;AVw@tZL(?-a|G z-O|o3pL_1or&qa)W^6ma75Fx@@P*tv$-OGKi;d4Ov3PTqX_EK_-jA$>RtbI&F1jvm zPM(#*6rQvCfX>UuHxk)3&)L4#ClT{8sScNP0-j^l+OakxplS-h4RA${bm8!+eMwCC^G z;{GcfSsPs;-tH;&aK_eaVe(rtBwW_cyL~)-@sYy|r^-zT?wMn@aMR@TpG*>_O=h$H zE71JbQJ~6CV6(vjo(aZj*N?2>6RdnHt3x|BW#b=h;Qkm)P@ z;-z7-bkS+2zTz*xBI1NLow~T&s8&DK*R@np-QrikGI{UT{j2{K?2>=_;C$e>)gr%j z=Pv6qYnk&T!%_ItzAl%?Nkt{G+83pR^j2qtdK|d8c+<}4Wqvd5^e*S>-rryJ$;|kX zz?x$h7jD`1Ir_DX?d3m;)r))kieiGLq-9LCoR6Kb%=^m2`(#h^*O_VhA@CIZu5j0tWnG>ODzHw({4`Y z#N`&`oC9GSbAfzwab9{T&@&7m9N^6eqTqE3aw#qXl0X62Y8Z$Kh|(UpbdW{Sy#mNX zHUXpTM>h&N*ws;tngnz-UZW6&3%U=H!(I}_&=yv7Ly_VS-5BIR6h<-T3Ysy{XbbRW zW7B~ulw;O~YXPbT0~nnJELlMyhz&vm^|6Ckd%z?E14uf+n-wg}3QTbz^$@iHM7!YN literal 0 HcmV?d00001 diff --git a/dev-tools/extract_model_ops/extract_model_ops.py b/dev-tools/extract_model_ops/extract_model_ops.py new file mode 100644 index 000000000..451369a6d --- /dev/null +++ b/dev-tools/extract_model_ops/extract_model_ops.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0 and the following additional limitation. Functionality enabled by the +# files subject to the Elastic License 2.0 may only be used in production when +# invoked by an Elasticsearch process with a license key installed that permits +# use of machine learning features. You may not use this file except in +# compliance with the Elastic License 2.0 and the foregoing additional +# limitation. +# +"""Extract TorchScript operation sets from supported HuggingFace transformer architectures. + +This developer tool traces/scripts reference models and collects the set of +TorchScript operations that appear in their forward() computation graphs. +The output is a sorted, de-duplicated union of all operations which can be +used to build the C++ allowlist in CSupportedOperations.h. + +Usage: + python3 extract_model_ops.py [--per-model] [--cpp] [--golden OUTPUT] [--config CONFIG] + +Flags: + --per-model Print the op set for each model individually. + --cpp Print the union as a C++ initializer list. + --golden OUTPUT Write per-model op sets as a JSON golden file for the + C++ allowlist drift test. + --config CONFIG Path to the reference models JSON config file. + Defaults to reference_models.json in the same directory. +""" + +import argparse +import json +import sys +from pathlib import Path +from typing import Optional + +import torch + +from torchscript_utils import ( + collect_inlined_ops, + load_and_trace_hf_model, + load_model_config, +) + +SCRIPT_DIR = Path(__file__).resolve().parent +DEFAULT_CONFIG = SCRIPT_DIR / "reference_models.json" + + +def extract_ops_for_model(model_name: str, + quantize: bool = False) -> Optional[set[str]]: + """Trace a HuggingFace model and return its TorchScript op set. + + Returns None if the model could not be loaded or traced. + """ + label = f"{model_name} (quantized)" if quantize else model_name + print(f" Loading {label}...", file=sys.stderr) + traced = load_and_trace_hf_model(model_name, quantize=quantize) + if traced is None: + return None + return collect_inlined_ops(traced) + + +def format_cpp_initializer(ops: set[str]) -> str: + """Format the op set as a C++ initializer list for std::unordered_set.""" + sorted_ops = sorted(ops) + lines = [] + for op in sorted_ops: + lines.append(f' "{op}"sv,') + return "{\n" + "\n".join(lines) + "\n}" + + +def main(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--per-model", action="store_true", + help="Print per-model op sets") + parser.add_argument("--cpp", action="store_true", + help="Print union as C++ initializer") + parser.add_argument("--golden", type=Path, default=None, metavar="OUTPUT", + help="Write per-model op sets as a JSON golden file") + parser.add_argument("--config", type=Path, default=DEFAULT_CONFIG, + help="Path to reference_models.json config file") + args = parser.parse_args() + + reference_models = load_model_config(args.config) + + per_model_ops = {} + union_ops = set() + + print("Extracting TorchScript ops from supported architectures...", + file=sys.stderr) + + failed = [] + for arch, spec in reference_models.items(): + ops = extract_ops_for_model(spec["model_id"], + quantize=spec["quantized"]) + if ops is None: + failed.append(arch) + print(f" {arch}: FAILED", file=sys.stderr) + continue + per_model_ops[arch] = ops + union_ops.update(ops) + print(f" {arch}: {len(ops)} ops", file=sys.stderr) + + print(f"\nTotal union: {len(union_ops)} unique ops", file=sys.stderr) + if failed: + print(f"Failed models: {', '.join(failed)}", file=sys.stderr) + + if args.golden: + golden = { + "pytorch_version": torch.__version__, + "models": { + arch: { + "model_id": reference_models[arch]["model_id"], + "quantized": reference_models[arch]["quantized"], + "ops": sorted(ops), + } + for arch, ops in sorted(per_model_ops.items()) + }, + } + args.golden.parent.mkdir(parents=True, exist_ok=True) + with open(args.golden, "w") as f: + json.dump(golden, f, indent=2) + f.write("\n") + print(f"Wrote golden file to {args.golden} " + f"({len(per_model_ops)} models, " + f"{len(union_ops)} unique ops)", file=sys.stderr) + + if args.per_model: + for arch, ops in sorted(per_model_ops.items()): + spec = reference_models[arch] + label = spec["model_id"] + if spec["quantized"]: + label += " (quantized)" + print(f"\n=== {arch} ({label}) ===") + for op in sorted(ops): + print(f" {op}") + + if args.cpp: + print("\n// C++ initializer for SUPPORTED_OPERATIONS:") + print(format_cpp_initializer(union_ops)) + elif not args.golden: + print("\n// Sorted union of all operations:") + for op in sorted(union_ops): + print(op) + + +if __name__ == "__main__": + main() diff --git a/dev-tools/extract_model_ops/reference_models.json b/dev-tools/extract_model_ops/reference_models.json new file mode 100644 index 000000000..52556c2af --- /dev/null +++ b/dev-tools/extract_model_ops/reference_models.json @@ -0,0 +1,31 @@ +{ + "bert": "bert-base-uncased", + "roberta": "roberta-base", + "distilbert": "distilbert-base-uncased", + "electra": "google/electra-small-discriminator", + "mpnet": "microsoft/mpnet-base", + "deberta": "microsoft/deberta-base", + "dpr": "facebook/dpr-ctx_encoder-single-nq-base", + "mobilebert": "google/mobilebert-uncased", + "xlm-roberta": "xlm-roberta-base", + "elastic-bge-m3": "elastic/bge-m3", + "elastic-distilbert-cased-ner": "elastic/distilbert-base-cased-finetuned-conll03-english", + "elastic-distilbert-uncased-ner": "elastic/distilbert-base-uncased-finetuned-conll03-english", + "elastic-eis-elser-v2": "elastic/eis-elser-v2", + "elastic-elser-v2": "elastic/elser-v2", + "elastic-hugging-face-elser": "elastic/hugging-face-elser", + "elastic-multilingual-e5-small-optimized": "elastic/multilingual-e5-small-optimized", + "elastic-splade-v3": "elastic/splade-v3", + "elastic-test-elser-v2": "elastic/test-elser-v2", + "distilbert-sst2": "distilbert-base-uncased-finetuned-sst-2-english", + "all-distilroberta-v1": "sentence-transformers/all-distilroberta-v1", + + "_comment:prepacked": "Prepacked models: .rerank-v1 is an internal Elastic model hosted at ml-models.elastic.co, not on HuggingFace. Its ops are extracted from the TorchScript .pt file directly and added to the golden file manually.", + + "_comment:eland": "Eland-deployed variants: Eland wraps models with additional layers (pooling, normalization) before tracing. The -eland entries in the golden file capture the full Eland-traced op set. These are extracted separately using eland[pytorch] and added to the golden file manually since extract_model_ops.py traces base HuggingFace models only.", + + "_comment:quantized": "Quantized variants: Eland applies torch.quantization.quantize_dynamic on nn.Linear layers when importing models. These produce quantized::* ops not present in the standard traced graphs above.", + "elastic-elser-v2-quantized": {"model_id": "elastic/elser-v2", "quantized": true}, + "elastic-eis-elser-v2-quantized": {"model_id": "elastic/eis-elser-v2", "quantized": true}, + "elastic-test-elser-v2-quantized": {"model_id": "elastic/test-elser-v2", "quantized": true} +} diff --git a/dev-tools/extract_model_ops/requirements.txt b/dev-tools/extract_model_ops/requirements.txt new file mode 100644 index 000000000..70d0ebb78 --- /dev/null +++ b/dev-tools/extract_model_ops/requirements.txt @@ -0,0 +1,4 @@ +torch==2.7.1 +transformers>=4.40.0 +sentencepiece>=0.2.0 +protobuf>=5.0.0 diff --git a/dev-tools/extract_model_ops/torchscript_utils.py b/dev-tools/extract_model_ops/torchscript_utils.py new file mode 100644 index 000000000..33042f261 --- /dev/null +++ b/dev-tools/extract_model_ops/torchscript_utils.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0 and the following additional limitation. Functionality enabled by the +# files subject to the Elastic License 2.0 may only be used in production when +# invoked by an Elasticsearch process with a license key installed that permits +# use of machine learning features. You may not use this file except in +# compliance with the Elastic License 2.0 and the foregoing additional +# limitation. +# +"""Shared utilities for extracting and inspecting TorchScript operations.""" + +import json +import os +import sys +from pathlib import Path + +import torch +from transformers import AutoConfig, AutoModel, AutoTokenizer + + +def load_model_config(config_path: Path) -> dict[str, dict]: + """Load a model config JSON file and normalise entries. + + Each entry is either a plain model-name string or a dict with + ``model_id`` (required) and optional ``quantized`` boolean. All + entries are normalised to ``{"model_id": str, "quantized": bool}``. + Keys starting with ``_comment`` are silently skipped. + + Raises ``ValueError`` for malformed entries so that config problems + are caught early with an actionable message. + """ + with open(config_path) as f: + raw = json.load(f) + + models: dict[str, dict] = {} + for key, value in raw.items(): + if key.startswith("_comment"): + continue + if isinstance(value, str): + models[key] = {"model_id": value, "quantized": False} + elif isinstance(value, dict): + if "model_id" not in value: + raise ValueError( + f"Config entry {key!r} is a dict but missing required " + f"'model_id' key: {value!r}") + models[key] = { + "model_id": value["model_id"], + "quantized": value.get("quantized", False), + } + else: + raise ValueError( + f"Config entry {key!r} has unsupported type " + f"{type(value).__name__}: {value!r}. " + f"Expected a model name string or a dict with 'model_id'.") + return models + + +def collect_graph_ops(graph) -> set[str]: + """Collect all operation names from a TorchScript graph, including blocks.""" + ops = set() + for node in graph.nodes(): + ops.add(node.kind()) + for block in node.blocks(): + ops.update(collect_graph_ops(block)) + return ops + + +def collect_inlined_ops(module) -> set[str]: + """Clone the forward graph, inline all calls, and return the op set.""" + graph = module.forward.graph.copy() + torch._C._jit_pass_inline(graph) + return collect_graph_ops(graph) + + +def load_and_trace_hf_model(model_name: str, quantize: bool = False): + """Load a HuggingFace model, tokenize sample input, and trace to TorchScript. + + When *quantize* is True the model is dynamically quantized (nn.Linear + layers converted to quantized::linear_dynamic) before tracing. This + mirrors what Eland does when importing models for Elasticsearch. + + Returns the traced module, or None if the model could not be loaded or traced. + """ + token = os.environ.get("HF_TOKEN") + + try: + tokenizer = AutoTokenizer.from_pretrained(model_name, token=token) + config = AutoConfig.from_pretrained( + model_name, torchscript=True, token=token) + model = AutoModel.from_pretrained( + model_name, config=config, token=token) + model.eval() + except Exception as exc: + print(f" LOAD ERROR: {exc}", file=sys.stderr) + return None + + if quantize: + try: + model = torch.quantization.quantize_dynamic( + model, {torch.nn.Linear}, dtype=torch.qint8) + print(" Applied dynamic quantization (nn.Linear -> qint8)", + file=sys.stderr) + except Exception as exc: + print(f" QUANTIZE ERROR: {exc}", file=sys.stderr) + return None + + inputs = tokenizer( + "This is a sample input for graph extraction.", + return_tensors="pt", padding="max_length", + max_length=32, truncation=True) + + input_ids = inputs["input_ids"] + attention_mask = inputs["attention_mask"] + + try: + return torch.jit.trace( + model, (input_ids, attention_mask), strict=False) + except Exception as exc: + print(f" TRACE WARNING: {exc}", file=sys.stderr) + print(" Falling back to torch.jit.script...", file=sys.stderr) + try: + return torch.jit.script(model) + except Exception as exc2: + print(f" SCRIPT ERROR: {exc2}", file=sys.stderr) + return None diff --git a/dev-tools/extract_model_ops/validate_allowlist.py b/dev-tools/extract_model_ops/validate_allowlist.py new file mode 100644 index 000000000..dfb39021a --- /dev/null +++ b/dev-tools/extract_model_ops/validate_allowlist.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0 and the following additional limitation. Functionality enabled by the +# files subject to the Elastic License 2.0 may only be used in production when +# invoked by an Elasticsearch process with a license key installed that permits +# use of machine learning features. You may not use this file except in +# compliance with the Elastic License 2.0 and the foregoing additional +# limitation. +# +"""Validate that the C++ operation allowlist accepts all supported model architectures. + +Traces each model listed in a JSON config file, extracts its TorchScript +operations (using the same inlining approach as the C++ validator), and +checks every operation against the ALLOWED_OPERATIONS and FORBIDDEN_OPERATIONS +sets parsed from CSupportedOperations.cc. + +This is the Python-side equivalent of the C++ CModelGraphValidator and is +intended as an integration test: if any legitimate model produces an +operation that the C++ code would reject, this script exits non-zero. + +Exit codes: + 0 All models pass (no false positives). + 1 At least one model was rejected or a model failed to load/trace. + +Usage: + python3 validate_allowlist.py [--config CONFIG] [--verbose] +""" + +import argparse +import re +import sys +from pathlib import Path +from typing import Optional + +import torch + +from torchscript_utils import ( + collect_inlined_ops, + load_and_trace_hf_model, + load_model_config, +) + +SCRIPT_DIR = Path(__file__).resolve().parent +REPO_ROOT = SCRIPT_DIR.parents[1] +DEFAULT_CONFIG = SCRIPT_DIR / "validation_models.json" +SUPPORTED_OPS_CC = REPO_ROOT / "bin" / "pytorch_inference" / "CSupportedOperations.cc" + + +def parse_string_set_from_cc(path: Path, variable_name: str) -> set[str]: + """Extract a set of string literals from a C++ TStringViewSet definition.""" + text = path.read_text() + pattern = rf'{re.escape(variable_name)}\s*=\s*\{{(.*?)\}};' + match = re.search(pattern, text, re.DOTALL) + if not match: + raise RuntimeError(f"Could not find {variable_name} in {path}") + block = match.group(1) + return set(re.findall(r'"([^"]+)"', block)) + + +def load_cpp_sets() -> tuple[set[str], set[str]]: + """Parse ALLOWED_OPERATIONS and FORBIDDEN_OPERATIONS from the C++ source.""" + allowed = parse_string_set_from_cc(SUPPORTED_OPS_CC, "ALLOWED_OPERATIONS") + forbidden = parse_string_set_from_cc(SUPPORTED_OPS_CC, "FORBIDDEN_OPERATIONS") + return allowed, forbidden + + +def load_pt_and_collect_ops(pt_path: str) -> Optional[set[str]]: + """Load a saved TorchScript .pt file, inline, and return its op set.""" + try: + module = torch.jit.load(pt_path) + return collect_inlined_ops(module) + except Exception as exc: + print(f" LOAD ERROR: {exc}", file=sys.stderr) + return None + + +def check_ops(ops: set[str], + allowed: set[str], + forbidden: set[str], + verbose: bool) -> bool: + """Check an op set against allowed/forbidden lists. Returns True if all pass.""" + forbidden_found = sorted(ops & forbidden) + unrecognised = sorted(ops - allowed - forbidden) + + if verbose: + print(f" {len(ops)} distinct ops", file=sys.stderr) + + if forbidden_found: + print(f" FORBIDDEN: {forbidden_found}", file=sys.stderr) + if unrecognised: + print(f" UNRECOGNISED: {unrecognised}", file=sys.stderr) + + if not forbidden_found and not unrecognised: + print(f" PASS", file=sys.stderr) + return True + + print(f" FAIL", file=sys.stderr) + return False + + +def validate_model(model_name: str, + allowed: set[str], + forbidden: set[str], + verbose: bool, + quantize: bool = False) -> bool: + """Validate one HuggingFace model. Returns True if all ops pass.""" + label = f"{model_name} (quantized)" if quantize else model_name + print(f" {label}...", file=sys.stderr) + traced = load_and_trace_hf_model(model_name, quantize=quantize) + if traced is None: + print(f" FAILED (could not load/trace)", file=sys.stderr) + return False + ops = collect_inlined_ops(traced) + return check_ops(ops, allowed, forbidden, verbose) + + +def validate_pt_file(name: str, + pt_path: str, + allowed: set[str], + forbidden: set[str], + verbose: bool) -> bool: + """Validate a local TorchScript .pt file. Returns True if all ops pass.""" + print(f" {name} ({pt_path})...", file=sys.stderr) + ops = load_pt_and_collect_ops(pt_path) + if ops is None: + print(f" FAILED (could not load)", file=sys.stderr) + return False + return check_ops(ops, allowed, forbidden, verbose) + + +def main(): + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument( + "--config", type=Path, default=DEFAULT_CONFIG, + help="Path to validation_models.json (default: %(default)s)") + parser.add_argument( + "--pt-dir", type=Path, default=None, + help="Directory of pre-saved .pt TorchScript files to validate") + parser.add_argument( + "--verbose", action="store_true", + help="Print per-model op counts") + args = parser.parse_args() + + print(f"PyTorch version: {torch.__version__}", file=sys.stderr) + + allowed, forbidden = load_cpp_sets() + print(f"Parsed {len(allowed)} allowed ops and {len(forbidden)} " + f"forbidden ops from {SUPPORTED_OPS_CC.name}", file=sys.stderr) + + results: dict[str, bool] = {} + + models = load_model_config(args.config) + + print(f"Validating {len(models)} HuggingFace models from " + f"{args.config.name}...", file=sys.stderr) + + for arch, spec in models.items(): + results[arch] = validate_model( + spec["model_id"], allowed, forbidden, args.verbose, + quantize=spec["quantized"]) + + if args.pt_dir and args.pt_dir.is_dir(): + pt_files = sorted(args.pt_dir.glob("*.pt")) + if pt_files: + print(f"Validating {len(pt_files)} local .pt files from " + f"{args.pt_dir}...", file=sys.stderr) + for pt_path in pt_files: + name = pt_path.stem + results[f"pt:{name}"] = validate_pt_file( + name, str(pt_path), allowed, forbidden, args.verbose) + + print(file=sys.stderr) + print("=" * 60, file=sys.stderr) + all_pass = all(results.values()) + for key, passed in results.items(): + status = "PASS" if passed else "FAIL" + if key.startswith("pt:"): + print(f" {key}: {status}", file=sys.stderr) + else: + spec = models[key] + label = spec["model_id"] + if spec["quantized"]: + label += " (quantized)" + print(f" {key} ({label}): {status}", file=sys.stderr) + + print("=" * 60, file=sys.stderr) + if all_pass: + print("All models PASS - no false positives.", file=sys.stderr) + else: + failed = [a for a, p in results.items() if not p] + print(f"FAILED models: {', '.join(failed)}", file=sys.stderr) + + sys.exit(0 if all_pass else 1) + + +if __name__ == "__main__": + main() diff --git a/dev-tools/extract_model_ops/validation_models.json b/dev-tools/extract_model_ops/validation_models.json new file mode 100644 index 000000000..0c853cdc5 --- /dev/null +++ b/dev-tools/extract_model_ops/validation_models.json @@ -0,0 +1,33 @@ +{ + "bert": "bert-base-uncased", + "roberta": "roberta-base", + "distilbert": "distilbert-base-uncased", + "electra": "google/electra-small-discriminator", + "mpnet": "microsoft/mpnet-base", + "deberta": "microsoft/deberta-base", + "dpr": "facebook/dpr-ctx_encoder-single-nq-base", + "mobilebert": "google/mobilebert-uncased", + "xlm-roberta": "xlm-roberta-base", + + "elastic-bge-m3": "elastic/bge-m3", + "elastic-distilbert-cased-ner": "elastic/distilbert-base-cased-finetuned-conll03-english", + "elastic-distilbert-uncased-ner": "elastic/distilbert-base-uncased-finetuned-conll03-english", + "elastic-eis-elser-v2": "elastic/eis-elser-v2", + "elastic-elser-v2": "elastic/elser-v2", + "elastic-hugging-face-elser": "elastic/hugging-face-elser", + "elastic-multilingual-e5-small-optimized": "elastic/multilingual-e5-small-optimized", + "elastic-splade-v3": "elastic/splade-v3", + "elastic-test-elser-v2": "elastic/test-elser-v2", + + "elastic-elser-v2-quantized": {"model_id": "elastic/elser-v2", "quantized": true}, + "elastic-eis-elser-v2-quantized": {"model_id": "elastic/eis-elser-v2", "quantized": true}, + "elastic-test-elser-v2-quantized": {"model_id": "elastic/test-elser-v2", "quantized": true}, + + "ner-dslim-bert-base": "dslim/bert-base-NER", + "sentiment-distilbert-sst2": "distilbert-base-uncased-finetuned-sst-2-english", + + "es-multilingual-e5-small": "intfloat/multilingual-e5-small", + "es-all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2", + "es-cross-encoder-ms-marco": "cross-encoder/ms-marco-MiniLM-L-6-v2", + "es-dpr-question-encoder": "facebook/dpr-question_encoder-single-nq-base" +} diff --git a/dev-tools/generate_malicious_models.py b/dev-tools/generate_malicious_models.py new file mode 100644 index 000000000..21afe1110 --- /dev/null +++ b/dev-tools/generate_malicious_models.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python3 +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0 and the following additional limitation. Functionality enabled by the +# files subject to the Elastic License 2.0 may only be used in production when +# invoked by an Elasticsearch process with a license key installed that permits +# use of machine learning features. You may not use this file except in +# compliance with the Elastic License 2.0 and the foregoing additional +# limitation. +# +"""Generate malicious TorchScript model fixtures for validator integration tests. + +Each model is designed to exercise a specific attack vector that the +CModelGraphValidator must detect and reject. + +Usage: + python3 generate_malicious_models.py [output_dir] + +The output directory defaults to the same directory as this script. +""" + +import os +import sys +from pathlib import Path + +import torch +from torch import Tensor +from typing import Optional + + +# --- Malicious model definitions --- + + +class FileReaderModel(torch.nn.Module): + """Uses aten::from_file to read arbitrary files from disk.""" + def forward(self, x: Tensor) -> Tensor: + stolen = torch.from_file("/etc/passwd", size=100) + return stolen + + +class MixedFileReaderModel(torch.nn.Module): + """Mixes allowed ops with a forbidden aten::from_file call.""" + def forward(self, x: Tensor) -> Tensor: + y = x + x + z = torch.from_file("/etc/shadow", size=10) + return y + z + + +class HiddenInSubmodule(torch.nn.Module): + """Hides aten::sin (unrecognised) three levels deep in submodules.""" + def __init__(self): + super().__init__() + self.inner = _Inner() + + def forward(self, x: Tensor) -> Tensor: + y = x * x + return self.inner(y) + + +class _Inner(torch.nn.Module): + def __init__(self): + super().__init__() + self.leaf = _Leaf() + + def forward(self, x: Tensor) -> Tensor: + return self.leaf(x) + x + + +class _Leaf(torch.nn.Module): + def forward(self, x: Tensor) -> Tensor: + return torch.sin(x) + + +class ConditionalMalicious(torch.nn.Module): + """Hides an unrecognised op (aten::sin) inside one branch of a conditional.""" + def forward(self, x: Tensor) -> Tensor: + if x.sum() > 0: + return torch.sin(x) + else: + return x + x + + +class ManyUnrecognisedOps(torch.nn.Module): + """Uses several different unrecognised ops to simulate an unexpected arch.""" + def forward(self, x: Tensor) -> Tensor: + a = torch.sin(x) + b = torch.cos(x) + c = torch.tan(x) + d = torch.exp(x) + return a + b + c + d + + +class FileReaderInSubmodule(torch.nn.Module): + """Hides the forbidden aten::from_file inside a submodule.""" + def __init__(self): + super().__init__() + self.reader = _FileReaderChild() + + def forward(self, x: Tensor) -> Tensor: + return x + self.reader(x) + + +class _FileReaderChild(torch.nn.Module): + def forward(self, x: Tensor) -> Tensor: + return torch.from_file("/tmp/secret", size=10) + + +# --- Sandbox2 attack models (PR #2873) --- +# +# These reproduce real-world attack vectors that exploit torch.as_strided +# to leak heap addresses and build ROP chains. The graph validator must +# reject them because as_strided (and several helper ops) are not in the +# transformer-architecture allowlist. + + +class HeapLeakModel(torch.nn.Module): + """Leaks heap addresses via torch.as_strided with a malicious storage offset. + + The attack scans the heap for libtorch pointers by reading out-of-bounds + memory through a strided view. Key unrecognised ops: aten::as_strided, + aten::item, aten::__rshift__. + """ + def find_heap_overlap(self, tensors: Tensor) -> tuple[list[int], Tensor]: + evil_length = 0x900 * 8 + leak = torch.as_strided( + tensors, [evil_length], [1], 2305843009213693952 - evil_length) + index: list[int] = [-1] + for i in range(evil_length - 1 - 5, -1, -1): + if (leak[i + 3].item() == leak[i + 5].item() + and leak[i + 3].item() >> 40 == 127): + if (leak[i + 2].item() >= 0x100000000 + and leak[i + 2].item() <= 0x100000005): + index.append(i) + return (index, leak) + + def forward(self, a: Tensor, b: Tensor, c: Tensor, d: Tensor) -> Tensor: + tmp: list[Tensor] = [] + heap_len = 200 + array_len = 0x900 + for i in range(heap_len): + tmp_values = [0xdead00000 + i] + [ + 0xdead00000 + j for j in range(2, array_len)] + tmp.append(torch.tensor(tmp_values)) + for i in range(heap_len): + indexs, leak = self.find_heap_overlap(tmp[i]) + indexs = indexs[1:] + if indexs == []: + continue + for index in indexs: + heap_addr = leak[index + 3].item() + libtorch_python_addr = leak[index + 1].item() + libtorch_cpu_addr = libtorch_python_addr - 0xc878b50 + assert 1 == 0, "leak: " + hex(libtorch_cpu_addr) + for i in range(heap_len): + print(tmp[i].shape) + return torch.zeros(0) + + +class RopExploitModel(torch.nn.Module): + """Builds a ROP chain to mprotect heap memory and execute shellcode. + + Extends the heap-leak technique to overwrite function pointers and + chain gadgets that call mprotect followed by attacker-controlled + shellcode that writes files to disk. Key unrecognised ops: + aten::as_strided, aten::copy_, aten::Delete. + """ + def find_heap_overlap(self, tensors: Tensor) -> tuple[list[int], Tensor]: + evil_length = 0x900 * 8 + leak = torch.as_strided( + tensors, [evil_length], [1], 2305843009213693952 - evil_length) + index: list[int] = [-1] + for i in range(evil_length - 1 - 5, -1, -1): + if (leak[i + 3].item() == leak[i + 5].item() + and leak[i + 3].item() >> 40 == 127): + if (leak[i + 2].item() >= 0x100000000 + and leak[i + 2].item() <= 0x100000005): + index.append(i) + return (index, leak) + + def get_address_data(self, leak: Tensor, index: int, + tmp: list[Tensor], target_addr: int) -> int: + leak[index] = target_addr + return int(tmp[0].item()) + + def forward(self, a: Tensor, b: Tensor, c: Tensor, d: Tensor) -> Tensor: + tmp: list[Tensor] = [] + heap_len = 200 + array_len = 0x900 + for i in range(heap_len): + tmp_values = [0xdead00000 + i] + [ + 0xdead00000 + j for j in range(2, array_len)] + tmp.append(torch.tensor(tmp_values)) + for i in range(heap_len): + indexs, leak = self.find_heap_overlap(tmp[i]) + indexs = indexs[1:] + if indexs == []: + continue + for index in indexs: + heap_addr = leak[index + 3].item() + libtorch_python_addr = leak[index + 1].item() + libtorch_cpu_addr = libtorch_python_addr - 0xc878b50 + leak[index + 3] = int(heap_addr - 0x8) + for j in range(heap_len): + if tmp[j][2].item() == 0xDEAD00002 and i != j: + mprotect_got_addr = 0xc9785f0 + libtorch_cpu_addr + self.get_address_data( + leak, index + 3, tmp[j:j + 1], + int(mprotect_got_addr)) + del tmp[j] + assert 1 == 0, "exploit" + return torch.zeros(0) + for i in range(heap_len): + print(tmp[i].shape) + return torch.zeros(0) + + +# --- Generation logic --- + + +MODELS = { + "malicious_file_reader.pt": FileReaderModel, + "malicious_mixed_file_reader.pt": MixedFileReaderModel, + "malicious_hidden_in_submodule.pt": HiddenInSubmodule, + "malicious_conditional.pt": ConditionalMalicious, + "malicious_many_unrecognised.pt": ManyUnrecognisedOps, + "malicious_file_reader_in_submodule.pt": FileReaderInSubmodule, + "malicious_heap_leak.pt": HeapLeakModel, + "malicious_rop_exploit.pt": RopExploitModel, +} + + +def generate(output_dir: Path): + output_dir.mkdir(parents=True, exist_ok=True) + succeeded = [] + failed = [] + + for filename, cls in MODELS.items(): + print(f" {filename}...", end=" ") + try: + model = cls() + model.eval() + scripted = torch.jit.script(model) + path = output_dir / filename + torch.jit.save(scripted, str(path)) + size = path.stat().st_size + print(f"OK ({size} bytes)") + + # Show ops for verification + graph = scripted.forward.graph.copy() + torch._C._jit_pass_inline(graph) + ops = sorted(set(n.kind() for n in graph.nodes())) + print(f" ops: {ops}") + + succeeded.append(filename) + except Exception as exc: + print(f"FAILED: {exc}") + failed.append((filename, str(exc))) + + print(f"\nGenerated {len(succeeded)}/{len(MODELS)} models") + if failed: + print("Failed:") + for name, err in failed: + print(f" {name}: {err}") + return len(failed) == 0 + + +if __name__ == "__main__": + out_dir = (Path(sys.argv[1]) if len(sys.argv) > 1 + else Path(__file__).resolve().parent.parent + / "bin" / "pytorch_inference" / "unittest" / "testfiles" / "malicious_models") + print(f"Generating malicious model fixtures in {out_dir}") + success = generate(out_dir) + sys.exit(0 if success else 1) diff --git a/dev-tools/run_es_inference_tests.sh b/dev-tools/run_es_inference_tests.sh new file mode 100755 index 000000000..ebc65348b --- /dev/null +++ b/dev-tools/run_es_inference_tests.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0 and the following additional limitation. Functionality enabled by the +# files subject to the Elastic License 2.0 may only be used in production when +# invoked by an Elasticsearch process with a license key installed that permits +# use of machine learning features. You may not use this file except in +# compliance with the Elastic License 2.0 and the foregoing additional +# limitation. +# + +# Runs Elasticsearch inference integration tests that exercise the +# pytorch_inference process via inference API default endpoints (ELSER, E5, +# rerank) and semantic text. Designed to run as a separate Buildkite step +# in parallel with run_es_tests.sh. +# +# Arguments: +# $1 = Where to clone the elasticsearch repo +# $2 = Path to local Ivy repo + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +exec "$SCRIPT_DIR/run_es_tests_common.sh" "$1" "$2" \ + ':x-pack:plugin:inference:qa:inference-service-tests:javaRestTest' \ + --tests 'org.elasticsearch.xpack.inference.DefaultEndPointsIT' \ + --tests 'org.elasticsearch.xpack.inference.TextEmbeddingCrudIT' \ + '---' \ + ':x-pack:plugin:inference:yamlRestTest' \ + --tests 'org.elasticsearch.xpack.inference.InferenceRestIT.test {p0=inference/30_semantic_text_inference/*}' \ + --tests 'org.elasticsearch.xpack.inference.InferenceRestIT.test {p0=inference/40_semantic_text_query/*}' diff --git a/dev-tools/run_es_tests.sh b/dev-tools/run_es_tests.sh index 97f619ca6..225aa78a2 100755 --- a/dev-tools/run_es_tests.sh +++ b/dev-tools/run_es_tests.sh @@ -10,174 +10,33 @@ # limitation. # -# Runs some Elasticsearch CI tests using C++ artifacts from a local Ivy repo. -# The elasticsearch fork and branch that are tested are based on the author -# and branches of the current PR, as recorded in the $PR_AUTHOR, -# $PR_SOURCE_BRANCH and $PR_TARGET_BRANCH environment variables. +# Runs the core Elasticsearch ML integration tests: native multi-node Java +# REST tests and the ML YAML REST tests. # -# This is designed to run on a BuildKite worker where all required versions of -# Java are installed in the BuildKite user's home directory. +# When ES_TEST_SUITE is set to "javaRestTest" or "yamlRestTest", only that +# suite is run. Otherwise both suites are run sequentially. # # Arguments: # $1 = Where to clone the elasticsearch repo # $2 = Path to local Ivy repo -set -e +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -# Resolve the ml-cpp repo root before we cd away. -ML_CPP_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" - -function isCloneTargetValid { - FORK_TO_CHECK="$1" - BRANCH_TO_CHECK="$2" - echo "Checking for '$BRANCH_TO_CHECK' branch at $FORK_TO_CHECK/elasticsearch" - if [ -n "$(git ls-remote --heads "git@github.com:$FORK_TO_CHECK/elasticsearch.git" "$BRANCH_TO_CHECK" 2>/dev/null)" ]; then - echo "Will use '$BRANCH_TO_CHECK' branch at $FORK_TO_CHECK/elasticsearch for ES integration tests" - return 0 - fi - return 1 -} - -SELECTED_FORK=elastic -SELECTED_BRANCH=main - -function pickCloneTarget { - - if isCloneTargetValid "$GITHUB_PR_OWNER" "$GITHUB_PR_BRANCH" ; then - SELECTED_FORK="$GITHUB_PR_OWNER" - SELECTED_BRANCH="$GITHUB_PR_BRANCH" - return 0 - fi - - if isCloneTargetValid "$PR_AUTHOR" "$PR_SOURCE_BRANCH" ; then - SELECTED_FORK="$PR_AUTHOR" - SELECTED_BRANCH="$PR_SOURCE_BRANCH" - return 0 - fi - - if isCloneTargetValid "$SELECTED_FORK" "$PR_SOURCE_BRANCH" ; then - SELECTED_BRANCH="$PR_SOURCE_BRANCH" - return 0 - fi - - if isCloneTargetValid "$SELECTED_FORK" "$PR_TARGET_BRANCH" ; then - SELECTED_BRANCH="$PR_TARGET_BRANCH" - return 0 - fi - - if isCloneTargetValid "$SELECTED_FORK" "$SELECTED_BRANCH" ; then - return 0 - fi - - return 1 -} - -pickCloneTarget - -cd "$1" -rm -rf elasticsearch -git clone -b "$SELECTED_BRANCH" "git@github.com:${SELECTED_FORK}/elasticsearch.git" --depth=1 -cd elasticsearch - -if [ -z "${BUILDKITE}" ]; then - export ES_BUILD_JAVA="$(grep "^ES_BUILD_JAVA" .ci/java-versions.properties | awk -F= '{ print $2 }' | xargs echo)" - if [ -z "$ES_BUILD_JAVA" ]; then - echo "Unable to set JAVA_HOME, ES_BUILD_JAVA not present in .ci/java-versions.properties" - exit 1 - fi - - # On aarch64: - # - openjdk is built with a 64KB page size - # - adoptopenjdk is built with a 4KB page size - # It's necessary to use use the one that matches the page size of the - # distribution that it's running on, which is: - # - 4KB for Ubuntu, Debian and SLES - # - 64KB for RHEL and CentOS - # There's a link "jdk" pointing to the appropriate JDK on each CI worker, - # so strip any specifics from what was specified in .ci/java-versions.properties. - if [ `uname -m` = aarch64 ] ; then - export ES_BUILD_JAVA=$(echo $ES_BUILD_JAVA | sed 's/.*jdk/jdk/') - fi - - echo "Setting JAVA_HOME=$HOME/.java/$ES_BUILD_JAVA" - export JAVA_HOME="$HOME/.java/$ES_BUILD_JAVA" -fi - -# For the ES build we need to: -# 1. Convince it that this is not part of a PR build, becuase it will get -# confused that the PR is an ml-cpp PR rather than an elasticsearch PR -# 2. Set GIT_BRANCH to point at the elasticsearch branch, not the ml-cpp branch -# 3. Set GIT_COMMIT to point at the elasticsearch commit, not the ml-cpp commit -# 4. Set GIT_PREVIOUS_COMMIT the same as GIT_COMMIT as there are no changes to -# Elasticsearch code in the current ML PR -unset ROOT_BUILD_CAUSE_GHPRBCAUSE -export GIT_BRANCH="$SELECTED_BRANCH" -export GIT_COMMIT="$(git rev-parse HEAD)" -export GIT_PREVIOUS_COMMIT="$GIT_COMMIT" - -IVY_REPO_URL="file://$2" - -INIT_SCRIPT="$ML_CPP_ROOT/dev-tools/gradle-build-cache-init.gradle" -GRADLE_CACHE_DIR="$HOME/.gradle/caches/build-cache-1" -CACHE_ARGS="" -if [ -f "$INIT_SCRIPT" ]; then - CACHE_ARGS="--build-cache --init-script $INIT_SCRIPT" -fi - -# Restore Gradle build cache from GCS if credentials are available. -# This lets ephemeral CI agents reuse compilation outputs from prior builds. -CACHE_KEY="gradle-build-cache-$(uname -m)" -GCS_CACHE_PATH="" -if [ -n "${GRADLE_BUILD_CACHE_GCS_BUCKET:-}" ] && [ -n "${GOOGLE_APPLICATION_CREDENTIALS:-}" ]; then - GCS_CACHE_PATH="gs://${GRADLE_BUILD_CACHE_GCS_BUCKET}/${CACHE_KEY}.tar.gz" - if command -v gsutil &>/dev/null; then - if command -v gcloud &>/dev/null; then - gcloud auth activate-service-account --key-file="$GOOGLE_APPLICATION_CREDENTIALS" 2>/dev/null || true - fi - echo "--- Restoring Gradle build cache from $GCS_CACHE_PATH" - mkdir -p "$GRADLE_CACHE_DIR" - if gsutil -q stat "$GCS_CACHE_PATH" 2>/dev/null; then - gsutil cp "$GCS_CACHE_PATH" /tmp/gradle-cache.tar.gz \ - && tar xzf /tmp/gradle-cache.tar.gz -C "$HOME/.gradle/caches/" \ - && rm -f /tmp/gradle-cache.tar.gz \ - && echo "Gradle build cache restored ($(du -sh "$GRADLE_CACHE_DIR" 2>/dev/null | cut -f1))" \ - || echo "Warning: failed to restore Gradle build cache, continuing without it" - else - echo "No cached Gradle build cache found, will build from scratch" - fi - else - echo "gsutil not found, skipping Gradle build cache restore" - fi -fi - -# ES_TEST_SUITE selects which test suite to run: -# javaRestTest - native multi-node integration tests only -# yamlRestTest - ML YAML REST tests only -# (unset/empty) - both suites sequentially (backward compatible) case "${ES_TEST_SUITE:-}" in javaRestTest) - ./gradlew $GRADLE_JVM_OPTS $CACHE_ARGS -Dbuild.ml_cpp.repo="$IVY_REPO_URL" :x-pack:plugin:ml:qa:native-multi-node-tests:javaRestTest $EXTRA_TEST_OPTS + exec "$SCRIPT_DIR/run_es_tests_common.sh" "$1" "$2" \ + ':x-pack:plugin:ml:qa:native-multi-node-tests:javaRestTest' ;; yamlRestTest) - ./gradlew $GRADLE_JVM_OPTS $CACHE_ARGS -Dbuild.ml_cpp.repo="$IVY_REPO_URL" :x-pack:plugin:yamlRestTest --tests "org.elasticsearch.xpack.test.rest.XPackRestIT.test {p0=ml/*}" $EXTRA_TEST_OPTS + exec "$SCRIPT_DIR/run_es_tests_common.sh" "$1" "$2" \ + ':x-pack:plugin:yamlRestTest' \ + --tests 'org.elasticsearch.xpack.test.rest.XPackRestIT.test {p0=ml/*}' ;; *) - ./gradlew $GRADLE_JVM_OPTS $CACHE_ARGS -Dbuild.ml_cpp.repo="$IVY_REPO_URL" :x-pack:plugin:ml:qa:native-multi-node-tests:javaRestTest $EXTRA_TEST_OPTS - ./gradlew $GRADLE_JVM_OPTS $CACHE_ARGS -Dbuild.ml_cpp.repo="$IVY_REPO_URL" :x-pack:plugin:yamlRestTest --tests "org.elasticsearch.xpack.test.rest.XPackRestIT.test {p0=ml/*}" $EXTRA_TEST_OPTS + exec "$SCRIPT_DIR/run_es_tests_common.sh" "$1" "$2" \ + ':x-pack:plugin:ml:qa:native-multi-node-tests:javaRestTest' \ + '---' \ + ':x-pack:plugin:yamlRestTest' \ + --tests 'org.elasticsearch.xpack.test.rest.XPackRestIT.test {p0=ml/*}' ;; esac - -# Upload Gradle build cache to GCS for future builds. -if [ -n "$GCS_CACHE_PATH" ] && [ -d "$GRADLE_CACHE_DIR" ] && command -v gsutil &>/dev/null; then - echo "--- Uploading Gradle build cache to $GCS_CACHE_PATH" - CACHE_SIZE=$(du -sm "$GRADLE_CACHE_DIR" 2>/dev/null | cut -f1) - if [ "${CACHE_SIZE:-0}" -gt 0 ] && [ "${CACHE_SIZE:-0}" -lt 4096 ]; then - tar czf /tmp/gradle-cache.tar.gz -C "$HOME/.gradle/caches/" build-cache-1 \ - && gsutil -o "GSUtil:parallel_composite_upload_threshold=50M" cp /tmp/gradle-cache.tar.gz "$GCS_CACHE_PATH" \ - && rm -f /tmp/gradle-cache.tar.gz \ - && echo "Gradle build cache uploaded (${CACHE_SIZE}M)" \ - || echo "Warning: failed to upload Gradle build cache" - else - echo "Skipping cache upload (size=${CACHE_SIZE:-0}M, expected 1-4095M)" - fi -fi diff --git a/dev-tools/run_es_tests_common.sh b/dev-tools/run_es_tests_common.sh new file mode 100755 index 000000000..9dcb5f850 --- /dev/null +++ b/dev-tools/run_es_tests_common.sh @@ -0,0 +1,206 @@ +#!/bin/bash +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0 and the following additional limitation. Functionality enabled by the +# files subject to the Elastic License 2.0 may only be used in production when +# invoked by an Elasticsearch process with a license key installed that permits +# use of machine learning features. You may not use this file except in +# compliance with the Elastic License 2.0 and the foregoing additional +# limitation. +# + +# Common setup for running Elasticsearch integration tests using C++ artifacts +# from a local Ivy repo. Clones the appropriate elasticsearch fork/branch, +# configures the Java and Gradle environment, then executes the Gradle commands +# passed as remaining arguments. +# +# The elasticsearch fork and branch that are tested are based on the author +# and branches of the current PR, as recorded in the $PR_AUTHOR, +# $PR_SOURCE_BRANCH and $PR_TARGET_BRANCH environment variables. +# +# This is designed to run on a Buildkite worker where all required versions of +# Java are installed in the Buildkite user's home directory. +# +# Arguments: +# $1 = Where to clone the elasticsearch repo +# $2 = Path to local Ivy repo +# $3... = Gradle arguments. Multiple Gradle invocations are separated by +# a literal '---' argument. Each invocation's arguments are passed +# directly to ./gradlew (no eval/shell expansion), so callers must +# pass each token as a separate argument rather than embedding +# shell quoting inside a single string. + +set -e + +function isCloneTargetValid { + FORK_TO_CHECK="$1" + BRANCH_TO_CHECK="$2" + echo "Checking for '$BRANCH_TO_CHECK' branch at $FORK_TO_CHECK/elasticsearch" + if [ -n "$(git ls-remote --heads "git@github.com:$FORK_TO_CHECK/elasticsearch.git" "$BRANCH_TO_CHECK" 2>/dev/null)" ]; then + echo "Will use '$BRANCH_TO_CHECK' branch at $FORK_TO_CHECK/elasticsearch for ES integration tests" + return 0 + fi + return 1 +} + +SELECTED_FORK=elastic +SELECTED_BRANCH=main + +function pickCloneTarget { + + if isCloneTargetValid "$GITHUB_PR_OWNER" "$GITHUB_PR_BRANCH" ; then + SELECTED_FORK="$GITHUB_PR_OWNER" + SELECTED_BRANCH="$GITHUB_PR_BRANCH" + return 0 + fi + + if isCloneTargetValid "$PR_AUTHOR" "$PR_SOURCE_BRANCH" ; then + SELECTED_FORK="$PR_AUTHOR" + SELECTED_BRANCH="$PR_SOURCE_BRANCH" + return 0 + fi + + if isCloneTargetValid "$SELECTED_FORK" "$PR_SOURCE_BRANCH" ; then + SELECTED_BRANCH="$PR_SOURCE_BRANCH" + return 0 + fi + + if isCloneTargetValid "$SELECTED_FORK" "$PR_TARGET_BRANCH" ; then + SELECTED_BRANCH="$PR_TARGET_BRANCH" + return 0 + fi + + if isCloneTargetValid "$SELECTED_FORK" "$SELECTED_BRANCH" ; then + return 0 + fi + + return 1 +} + +CLONE_DIR="$1" +IVY_REPO_PATH="$2" +shift 2 + +pickCloneTarget + +cd "$CLONE_DIR" +rm -rf elasticsearch +git clone -b "$SELECTED_BRANCH" "git@github.com:${SELECTED_FORK}/elasticsearch.git" --depth=1 +cd elasticsearch + +if [ -z "${BUILDKITE}" ]; then + export ES_BUILD_JAVA="$(grep "^ES_BUILD_JAVA" .ci/java-versions.properties | awk -F= '{ print $2 }' | xargs echo)" + if [ -z "$ES_BUILD_JAVA" ]; then + echo "Unable to set JAVA_HOME, ES_BUILD_JAVA not present in .ci/java-versions.properties" + exit 1 + fi + + # On aarch64: + # - openjdk is built with a 64KB page size + # - adoptopenjdk is built with a 4KB page size + # It's necessary to use use the one that matches the page size of the + # distribution that it's running on, which is: + # - 4KB for Ubuntu, Debian and SLES + # - 64KB for RHEL and CentOS + # There's a link "jdk" pointing to the appropriate JDK on each CI worker, + # so strip any specifics from what was specified in .ci/java-versions.properties. + if [ `uname -m` = aarch64 ] ; then + export ES_BUILD_JAVA=$(echo $ES_BUILD_JAVA | sed 's/.*jdk/jdk/') + fi + + echo "Setting JAVA_HOME=$HOME/.java/$ES_BUILD_JAVA" + export JAVA_HOME="$HOME/.java/$ES_BUILD_JAVA" +fi + +# For the ES build we need to: +# 1. Convince it that this is not part of a PR build, because it will get +# confused that the PR is an ml-cpp PR rather than an elasticsearch PR +# 2. Set GIT_BRANCH to point at the elasticsearch branch, not the ml-cpp branch +# 3. Set GIT_COMMIT to point at the elasticsearch commit, not the ml-cpp commit +# 4. Set GIT_PREVIOUS_COMMIT the same as GIT_COMMIT as there are no changes to +# Elasticsearch code in the current ML PR +unset ROOT_BUILD_CAUSE_GHPRBCAUSE +export GIT_BRANCH="$SELECTED_BRANCH" +export GIT_COMMIT="$(git rev-parse HEAD)" +export GIT_PREVIOUS_COMMIT="$GIT_COMMIT" + +IVY_REPO_URL="file://$IVY_REPO_PATH" + +ML_CPP_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +INIT_SCRIPT="$ML_CPP_ROOT/dev-tools/gradle-build-cache-init.gradle" +GRADLE_CACHE_DIR="$HOME/.gradle/caches/build-cache-1" +CACHE_ARGS=() +if [ -f "$INIT_SCRIPT" ]; then + CACHE_ARGS=("--build-cache" "--init-script" "$INIT_SCRIPT") +fi + +# Restore Gradle build cache from GCS if credentials are available. +# This lets ephemeral CI agents reuse compilation outputs from prior builds. +CACHE_KEY="gradle-build-cache-$(uname -m)" +GCS_CACHE_PATH="" +if [ -n "${GRADLE_BUILD_CACHE_GCS_BUCKET:-}" ] && [ -n "${GOOGLE_APPLICATION_CREDENTIALS:-}" ]; then + GCS_CACHE_PATH="gs://${GRADLE_BUILD_CACHE_GCS_BUCKET}/${CACHE_KEY}.tar.gz" + if command -v gsutil &>/dev/null; then + if command -v gcloud &>/dev/null; then + gcloud auth activate-service-account --key-file="$GOOGLE_APPLICATION_CREDENTIALS" 2>/dev/null || true + fi + echo "--- Restoring Gradle build cache from $GCS_CACHE_PATH" + mkdir -p "$GRADLE_CACHE_DIR" + if gsutil -q stat "$GCS_CACHE_PATH" 2>/dev/null; then + gsutil cp "$GCS_CACHE_PATH" /tmp/gradle-cache.tar.gz \ + && tar xzf /tmp/gradle-cache.tar.gz -C "$HOME/.gradle/caches/" \ + && rm -f /tmp/gradle-cache.tar.gz \ + && echo "Gradle build cache restored ($(du -sh "$GRADLE_CACHE_DIR" 2>/dev/null | cut -f1))" \ + || echo "Warning: failed to restore Gradle build cache, continuing without it" + else + echo "No cached Gradle build cache found, will build from scratch" + fi + else + echo "gsutil not found, skipping Gradle build cache restore" + fi +fi + +# Build the base arguments array shared by every Gradle invocation. +BASE_ARGS=() +# shellcheck disable=SC2086 +BASE_ARGS+=($GRADLE_JVM_OPTS) +BASE_ARGS+=("${CACHE_ARGS[@]}") +BASE_ARGS+=("-Dbuild.ml_cpp.repo=$IVY_REPO_URL") + +run_gradle() { + local cmd_args=("$@") + # shellcheck disable=SC2086 + ./gradlew "${BASE_ARGS[@]}" "${cmd_args[@]}" $EXTRA_TEST_OPTS +} + +# Callers separate multiple Gradle invocations with '---'. +GRADLE_ARGS=() +for arg in "$@" ; do + if [ "$arg" = "---" ]; then + if [ ${#GRADLE_ARGS[@]} -gt 0 ]; then + run_gradle "${GRADLE_ARGS[@]}" + GRADLE_ARGS=() + fi + else + GRADLE_ARGS+=("$arg") + fi +done +if [ ${#GRADLE_ARGS[@]} -gt 0 ]; then + run_gradle "${GRADLE_ARGS[@]}" +fi + +# Upload Gradle build cache to GCS for future builds. +if [ -n "$GCS_CACHE_PATH" ] && [ -d "$GRADLE_CACHE_DIR" ] && command -v gsutil &>/dev/null; then + echo "--- Uploading Gradle build cache to $GCS_CACHE_PATH" + CACHE_SIZE=$(du -sm "$GRADLE_CACHE_DIR" 2>/dev/null | cut -f1) + if [ "${CACHE_SIZE:-0}" -gt 0 ] && [ "${CACHE_SIZE:-0}" -lt 4096 ]; then + tar czf /tmp/gradle-cache.tar.gz -C "$HOME/.gradle/caches/" build-cache-1 \ + && gsutil -o "GSUtil:parallel_composite_upload_threshold=50M" cp /tmp/gradle-cache.tar.gz "$GCS_CACHE_PATH" \ + && rm -f /tmp/gradle-cache.tar.gz \ + && echo "Gradle build cache uploaded (${CACHE_SIZE}M)" \ + || echo "Warning: failed to upload Gradle build cache" + else + echo "Skipping cache upload (size=${CACHE_SIZE:-0}M, expected 1-4095M)" + fi +fi diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 916d929bc..15e49d52a 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -36,6 +36,7 @@ === Enhancements +* Harden pytorch_inference with TorchScript model graph validation. (See {ml-pull}2936[#2936].) * Better handling of invalid JSON state documents (See {ml-pull}[]#2895].) * Better error handling regarding quantiles state documents (See {ml-pull}[#2894]) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5e571c729..b4d0ea821 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -57,6 +57,14 @@ else() set(_build_type_arg "") endif() +# Common arguments for the pytorch_inference allowlist validation script. +set(_validation_args + -DSOURCE_DIR=${CMAKE_SOURCE_DIR} + -DVALIDATE_CONFIG=${CMAKE_SOURCE_DIR}/dev-tools/extract_model_ops/validation_models.json + -DVALIDATE_PT_DIR=${CMAKE_SOURCE_DIR}/dev-tools/extract_model_ops/es_it_models + -DVALIDATE_VERBOSE=TRUE +) + add_custom_target(test_all_parallel DEPENDS build_tests COMMAND ${CMAKE_COMMAND} @@ -66,3 +74,17 @@ add_custom_target(test_all_parallel -P ${CMAKE_SOURCE_DIR}/cmake/run-all-tests-parallel.cmake WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} ) + +# Standalone target for the pytorch_inference allowlist validation. +# This creates a Python venv and may download HuggingFace models, so it +# is deliberately kept separate from precommit and test_all_parallel to +# avoid unexpected network/download side-effects. Run it explicitly: +# cmake --build -t validate_pytorch_inference_models +# See dev-tools/extract_model_ops/README.md for details. +add_custom_target(validate_pytorch_inference_models + COMMAND ${CMAKE_COMMAND} + ${_validation_args} + -P ${CMAKE_SOURCE_DIR}/cmake/run-validation.cmake + COMMENT "Validating pytorch_inference allowlist against HuggingFace models and ES integration test models" + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} +) From 251169fe0e633b436802675b9682d7ccb5c840ee Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 23 Mar 2026 09:23:34 +1300 Subject: [PATCH 2/5] [ML] Add aten::norm to graph validator allowlist The prepacked .multilingual-e5-small model uses aten::norm for normalization, which was not in the allowlist. This caused the model to be rejected with "Unrecognised operations: aten::norm". Made-with: Cursor --- bin/pytorch_inference/CSupportedOperations.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/pytorch_inference/CSupportedOperations.cc b/bin/pytorch_inference/CSupportedOperations.cc index 3ecd4bd47..5e3921ec8 100644 --- a/bin/pytorch_inference/CSupportedOperations.cc +++ b/bin/pytorch_inference/CSupportedOperations.cc @@ -35,7 +35,8 @@ const CSupportedOperations::TStringViewSet CSupportedOperations::FORBIDDEN_OPERA // google/mobilebert-uncased, xlm-roberta-base, elastic/bge-m3, // elastic/distilbert-base-{cased,uncased}-finetuned-conll03-english, // elastic/eis-elser-v2, elastic/elser-v2, elastic/hugging-face-elser, -// elastic/multilingual-e5-small-optimized, elastic/splade-v3, +// elastic/multilingual-e5-small-optimized, .multilingual-e5-small (prepacked), +// elastic/splade-v3, // elastic/test-elser-v2, .rerank-v1 (Elastic rerank model), // distilbert-base-uncased-finetuned-sst-2-english, // sentence-transformers/all-distilroberta-v1. @@ -99,6 +100,7 @@ const CSupportedOperations::TStringViewSet CSupportedOperations::ALLOWED_OPERATI "aten::ne"sv, "aten::neg"sv, "aten::new_ones"sv, + "aten::norm"sv, "aten::ones"sv, "aten::pad"sv, "aten::permute"sv, From 29f317a3c98790f70cb8b0766d1983e16e8cc088 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 23 Mar 2026 09:29:19 +1300 Subject: [PATCH 3/5] [ML] Add multilingual-e5-small model ops to reference files Extracted ops from intfloat/multilingual-e5-small (base and Eland text_embedding variant) and added both to the reference golden file. The base model uses standard XLM-RoBERTa ops. The Eland variant adds pooling/normalization ops (linalg_vector_norm, clamp, etc.). The prepacked .multilingual-e5-small model bundled with Elasticsearch uses aten::norm (added to the allowlist in the previous commit). Made-with: Cursor --- bin/pytorch_inference/CSupportedOperations.cc | 4 +- .../testfiles/reference_model_ops.json | 73 +++++++++++++++++++ .../extract_model_ops/reference_models.json | 1 + 3 files changed, 76 insertions(+), 2 deletions(-) diff --git a/bin/pytorch_inference/CSupportedOperations.cc b/bin/pytorch_inference/CSupportedOperations.cc index 5e3921ec8..250b86f79 100644 --- a/bin/pytorch_inference/CSupportedOperations.cc +++ b/bin/pytorch_inference/CSupportedOperations.cc @@ -35,8 +35,8 @@ const CSupportedOperations::TStringViewSet CSupportedOperations::FORBIDDEN_OPERA // google/mobilebert-uncased, xlm-roberta-base, elastic/bge-m3, // elastic/distilbert-base-{cased,uncased}-finetuned-conll03-english, // elastic/eis-elser-v2, elastic/elser-v2, elastic/hugging-face-elser, -// elastic/multilingual-e5-small-optimized, .multilingual-e5-small (prepacked), -// elastic/splade-v3, +// elastic/multilingual-e5-small-optimized, intfloat/multilingual-e5-small, +// .multilingual-e5-small (prepacked), elastic/splade-v3, // elastic/test-elser-v2, .rerank-v1 (Elastic rerank model), // distilbert-base-uncased-finetuned-sst-2-english, // sentence-transformers/all-distilroberta-v1. diff --git a/bin/pytorch_inference/unittest/testfiles/reference_model_ops.json b/bin/pytorch_inference/unittest/testfiles/reference_model_ops.json index 11ce46670..30e985582 100644 --- a/bin/pytorch_inference/unittest/testfiles/reference_model_ops.json +++ b/bin/pytorch_inference/unittest/testfiles/reference_model_ops.json @@ -933,6 +933,79 @@ "prim::ListConstruct", "prim::NumToTensor" ] + }, + "multilingual-e5-small": { + "model_id": "intfloat/multilingual-e5-small", + "quantized": false, + "ops": [ + "aten::Int", + "aten::ScalarImplicit", + "aten::__and__", + "aten::add", + "aten::arange", + "aten::contiguous", + "aten::dropout", + "aten::embedding", + "aten::expand", + "aten::gather", + "aten::ge", + "aten::gelu", + "aten::index", + "aten::layer_norm", + "aten::linear", + "aten::new_ones", + "aten::reshape", + "aten::scaled_dot_product_attention", + "aten::select", + "aten::size", + "aten::slice", + "aten::tanh", + "aten::to", + "aten::transpose", + "aten::unsqueeze", + "aten::view", + "prim::Constant", + "prim::DictConstruct", + "prim::GetAttr", + "prim::ListConstruct", + "prim::NumToTensor" + ] + }, + "multilingual-e5-small-eland": { + "model_id": "intfloat/multilingual-e5-small", + "quantized": false, + "eland_task_type": "text_embedding", + "ops": [ + "aten::Int", + "aten::add", + "aten::add_", + "aten::cat", + "aten::clamp", + "aten::clamp_min", + "aten::div", + "aten::embedding", + "aten::expand", + "aten::expand_as", + "aten::gelu", + "aten::layer_norm", + "aten::linalg_vector_norm", + "aten::linear", + "aten::masked_fill", + "aten::mul", + "aten::reshape", + "aten::scaled_dot_product_attention", + "aten::size", + "aten::slice", + "aten::sub", + "aten::sum", + "aten::to", + "aten::transpose", + "aten::unsqueeze", + "aten::view", + "prim::Constant", + "prim::ListConstruct", + "prim::NumToTensor" + ] } } } diff --git a/dev-tools/extract_model_ops/reference_models.json b/dev-tools/extract_model_ops/reference_models.json index 52556c2af..e2f270d35 100644 --- a/dev-tools/extract_model_ops/reference_models.json +++ b/dev-tools/extract_model_ops/reference_models.json @@ -15,6 +15,7 @@ "elastic-elser-v2": "elastic/elser-v2", "elastic-hugging-face-elser": "elastic/hugging-face-elser", "elastic-multilingual-e5-small-optimized": "elastic/multilingual-e5-small-optimized", + "multilingual-e5-small": "intfloat/multilingual-e5-small", "elastic-splade-v3": "elastic/splade-v3", "elastic-test-elser-v2": "elastic/test-elser-v2", "distilbert-sst2": "distilbert-base-uncased-finetuned-sst-2-english", From 37c10c1000764e964fa7750e070439340f5c3195 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 23 Mar 2026 09:58:04 +1300 Subject: [PATCH 4/5] [ML] Add graph validator test for prepacked e5 model with aten::norm The prepacked .multilingual-e5-small model uses aten::norm, which was missing from the allowlist and caused production failures. This test loads a tiny (24KB) model that mirrors the real prepacked model's graph structure (including aten::norm) and verifies graph validation passes. The test model was created by tracing a minimal XLM-RoBERTa-like architecture with normalization, then patching the TorchScript IR to use aten::norm (which modern PyTorch decomposes into aten::linalg_vector_norm, so it can't be generated via tracing). Made-with: Cursor --- .../unittest/CModelGraphValidatorTest.cc | 24 ++++++++++++++++++ .../unittest/testfiles/e5_with_norm.pt | Bin 0 -> 24434 bytes 2 files changed, 24 insertions(+) create mode 100644 bin/pytorch_inference/unittest/testfiles/e5_with_norm.pt diff --git a/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc b/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc index 7818e88f0..351f59276 100644 --- a/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc +++ b/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc @@ -435,6 +435,30 @@ BOOST_AUTO_TEST_CASE(testMaliciousRopExploit) { BOOST_REQUIRE(hasForbiddenOp(result, "aten::as_strided")); } +// --- Prepacked model compatibility tests --- +// +// These load TorchScript models that mirror the ops used by Elasticsearch's +// prepacked models (ELSER, E5, rerank). If a new op appears in a prepacked +// model that isn't in the allowlist, these tests will catch it before CI +// integration tests or production deployments. + +BOOST_AUTO_TEST_CASE(testPrepackedE5ModelWithNorm) { + // The prepacked .multilingual-e5-small model uses aten::norm for L2 + // normalization. This op was missing from the allowlist and caused + // production failures (the process exited with "Unrecognised operations: + // aten::norm"). This test model is a tiny (24KB) architecture-compatible + // replica with the same graph ops as the real 448MB prepacked model. + auto module = ::torch::jit::load("testfiles/e5_with_norm.pt"); + auto result = CModelGraphValidator::validate(module); + + BOOST_REQUIRE_MESSAGE(result.s_IsValid, + "e5_with_norm.pt should pass validation but failed. " + "Forbidden: " << result.s_ForbiddenOps.size() + << ", Unrecognised: " << result.s_UnrecognisedOps.size()); + BOOST_REQUIRE(result.s_ForbiddenOps.empty()); + BOOST_REQUIRE(result.s_UnrecognisedOps.empty()); +} + // --- Allowlist drift detection --- // // Validates that ALLOWED_OPERATIONS covers every operation observed in diff --git a/bin/pytorch_inference/unittest/testfiles/e5_with_norm.pt b/bin/pytorch_inference/unittest/testfiles/e5_with_norm.pt new file mode 100644 index 0000000000000000000000000000000000000000..a814f7bbd2a3b8fbed9f83511b6ea3e54e9aace4 GIT binary patch literal 24434 zcmb@t2{=_>+dqCXPi0KThz!Y0#0BbyA6l ziZWDEBq@;wit2y9&-8o0&-47=|8u?XZ(rBC_O>ou{Y(#G$TEZmT(a@!r3_%bR~F_8b2FlPjPSwZ-FWEnMJUxC zkH0^z0DJOJL21EasD5t(XE$cRof#iF?!d*707%RnLQ-8Y$9}X$<_) z>J_kkdIh?E%Y&l*4bXSPEijQ2j`PWCk{)gb4a0|lUuqrp4Ec#?&nZI|A&4(e)Whcz z=1|OX6`8^DnbZ>+MI0S^sAhL5T{^TI5~)?-yvtKEv*HZ23VICR9-+X_&Ck)}RRWAR z9k&R#ZOdTBi#_nsct5C#zlm(Lo)G2AN1(P;6WGuD4v$=&N2p=|dJKm{Tjxb=a#t2x zZ79ZN%L0%^;0g$uXMzdV4SnkBXvkHU_<8gS>ai}0oS2(*8ZgPqxqf}0$9_*TqQGR{B+ znC34*s#_1R)$zc&v3sFr_B7DbJp(hi?_p}rF_bp?5TA6thmRWy@NribETN5q zsKvvWvbh#Nd!md}It=j_kK5So=|1#L^bxWQl)~FzJOD3GuEB52L-3AUm*EW#G1%K` zjLi(|p_3;!ob~Gi4+ok+f$SZ`=sSir@J}MxXFtImmjG4H-6Rg1IOBG=Aly6g6tpXw z!MhRn!1~M>AbNTMr2jqz4BDgM+sH|9hNg*Q9Hv3=#rLq1rxa3!GVtm!d35PZ7``D~ zP3kPAKo9mUFyqJoUR7TJep#2n+DC8TvPV+zo(2scU-=BM2p@){S3GgiD+ioBa}hTM zD_|a}ZsfQ;1-`X?N@mA$(UK+(Le4w}-GDa_Umf6s3Q@zz>obI*cR#>@!wNLt);s8z z{~!?Zb_7|wpJC%o2(LNR2KJv@0c-kmNGm5zs59XR8Feh6KGy>)pxbEiyBzlI+X&=| zK0J1Q7?`OHfMXjE;r08*vE-Eu7-D6FbM`vonq5}`|0iA?wYCXe`>2A#m0sg-22Y{4 zp$J+uJptsR&Le>-RjkDwjbC(jVDlTda1oskz5?HX=(03&chomu^HqIuY9b8U{9SeN47%FH-FS_5c%!8vN2*4C@xzfRz`Y;F{X4u#uH1Lyclc zo?Q#;r@XP;!CZLp+k5AZ2BgNe=YJ8 z{DR^*;Yb=vW3fTo=$m09T^~fdz`YHoFmWsqTCkqOdvyYEWbH-V-uxO4 ze_0QEew>DS)z#1=bPz4t20*=c1dO+u1OYl;SXea`Z_;rxeL3k!{F`hd!f0-L3sBW0WXyFp;L7o;BJgN400JjxhIt2B2NU6 zm#@MRHYEH#;e<6hyU?x^n%Hd72(5Wvh$aV@(~8+MaY3^TQZZBm9AW3-(qR#XJbN0Z z<}%Pb?jsN!*^Vw<;9}h090!jju*1Tm8SvUkcU(}h8ho-WL9BuY@I=%&;B?9Yk9S{! z`V&Lg#k(II^ALo)-3##jkG8m?kHX+qTTM$e^u(SYzoH8nnLzi|DHzRl1kMJl;|Gi@ z_{Y6MICX}GDW&hp<8gzK{l*RY(R>E9x~h+dy4ZkZ5Car`5`;!-FYvDe->{BuJ294* zjP69;2M_M7#1|cTsb7;0z)gWl2Ky z_8}tpbt6#l-GEJZ&jEct2;+OCpkKuoIAksXf=oq-ou27rg90z3^vMc5kl=-@V}yX+ zfolA%d=3fSIYYGCAHk>hPs8Gg%`of7Z5WiY8Kww#z|jvK_(Swtw3Dk686IUpWv<+e zD<tjTQpfIbM}$qAtVF?nV%k;_onX$r4gyt!ZbEcJB7ySa``iGt1=Id z?A5{Dm$;$q$uLMUOUL%AzGTzKYoJ+GmkctKz-N6s;i&=xG*Hlswy8cx_qzxv`&E!( zyF&u+3w48Ut}tM4We2{{kwS#5EX7tWl2EWXng|;-rJjCv5VE$hGg8<>@wo386c8#P zHkZ5xZI9HjWx5MCn#;wWLs78!N-Avm{t%s0DZ)RKb8wML7*xyV_qG2=GG}J`qT|SV3z_(eQU9N{Try`K7eP;TCgs*#~+TA;n(k%!)*CO zD6l0N)_88mEdzx(`~^QM%5#AOAQfBE9C4Cp5;iDwB|{f%fx*&KAlYxm&}_=U9W#~C z?e#-krK!ZonDYc$tCOKl5C!tA|4J+~M`#6i48G5nMmB8YL1TG_IFM}zEWZ$p9o&=9 z_rY|qSjCPX_;o?cKr_T&%89&=+(sqB5Z*4@0eR~gxYSmkO2#VTXNs56l`LP#&^Un& zJ=&o@uQOElHADx|eH^so7HMC)mK6561TZ)X^q(i&zCsnS`GO8ErtH9( zmQqk9Nd}uVi4hm=5e{3e0AImJZ%duHy(o~ zCw=jj@DwzjQwp4{*2288lQ<*vJapne4U?;%px`Jwd@QsTUep)`>x$+nqmho82|Q@3jkfRA#y6w-f#bYCHj_C5 zYu`q~nRWVbChIhg6O6}&#bMC|p!b3DmsrF}!!X1+C7s zg_qQVF?pmNsvf?LlZ$f^5#9|{-YR1GM;y>At_62(RVNI?v|yO8Gsrrr1CO0wiwol) zA^kWb(4(5-mo%@8yr;``SIoJFYz0})CQzyent&{~l`pd42S4)&kL3qFN#c;_c9xAqw-o>hc(H?HF2DcOKw zatN>fNul)$Y=T2VtPE~tJ$#YtK8p6gi0^BBK=&l2;W{rCxWz~V75lQIFRT6F?{y{U zed#w)!Nr4zG`2ur$)otob!D)|rxPKoZ*8=hvzl`&Gx571!YheceEhPG*5Li!IKsopx-QXe-hxAKe zcO?>@IN<*Js&g#|+U%{a{~4Sv#QhjQEB z!-g>vY$}rio37NL_^+8*TCNCYWL9EHUJ0DS8;*ZpWrYvN4&l^l1vscvkrX~ngLgC5m;sRD3)PyMhpcvTqAe^e9h{^ zG$B8%CVUW{!JLeRb(zrMj6MAE^BQV8*MUpVzW}u$d{%(x4brsBR@CGR5FWru`!C4r0sQ0qAByB9~97J+|-9 z!M_w*fYFC`%+2!*{%F|Bv~>^%b@0=yJ1JP}iysbZHh}|&hoOVS1UUG43cnP51R%^r z#O-xBS%(4LlqzAJZ537zsD}40^Mlo=*h32ej_-&0Zb{mt% zdonzcgLfHHKWmMf66+vkWj1`=XoxqqGz0$874V`?G@Obzf}bRMsh#2O@JophmVbH= zb=i)Sk0?X9eP<4m$WDg4U%9|TRS@b(Jb*JLg_t{S0b6hooGQ_pAGFat6HP~Er6&_e2iH}^pi_|RpKq`~-v)A}S zE+#iCF)GoBv=}Z?It3iFHI1i#-p6W@O0*MGa&U_q7h|Pt1om4M)B4d#9Uc#~fdZB^ z;3$y~6rOnCQwr)>Y3%_1Ss=g~w{~sD30=PE7$4CDo`|<}%zxiGz)2WMED;0|s#1p`R8zLRfG= zfWb`{vCRBiV)Nn!TrFFI#m8!~&tWdczQHRvo#!1CGtL5I{A^4v^Z7p9H+7uww%=7^@|gw}C$7-r5)*OUg*ue6 z`UF%-S%(jDe=;t1+5oJEqk&&qA8v4%0p3gT;QTc{{EHH-Ik9 znc;8~0tHzM;;y5&@yXyUxZv}SINF_u0}^wfv)x_99;=O=WMd$VJcStn>>x*hi_z1u z0{XrC0HdtiiOlNLAoll49K$#Q_NUCFb7@Rx?a4db%A-WvI9g6jX1M^WO9fn?M1!7A z<0$BQI+{1Df-jWL;D{4j0Oj2T`0^wVCqH)tIi@o>UP%N$n$84S%KQu&B}XhClnK6W z3j?NMvQR4f5H4{&3sYeS-f>zT?!TFVld{ba*B~#$WnVH<(IfGm0C8-mb`Xy2uYl_N zAs#_bq3?M|*u+_lYOip?p?oW_$u|-IUbX_>f8h>qyiNp(r+dgeOF>|vlnDzt!hqUR zGrk&fj=1A|0+GXcutIqloH*$Y3o{SGeOof1`c_tand=~K;*5Z{n}1?9jSW~Kq6#;! z4ul574*lI zN5BU-6233!hw+m`_;$xFa*DqiJ?Plw|n7X@8zo&pRvnKbTdU z3F?1!{CU=XLfW44$Bq%R{WtHbv-aLf6ec=I9qrtMyiSDyT5vBZG^`2e=Tv~00fiWR zp@tr>)chXHy+q zZ2)b2ns}%cMOX&#gNJE}gtECasxOR3io6Nn!>gA>W{414+YkuW-t-1uyEH)gyUpl6 z3l#-OtpZ%~Wu)_tEx_j;D=I1B0FNa#K%Uc1<)AAeB-5yc%d?wI^xow-Hg5y`%u2D#}(h1q~z1K(28q$>5)9 zWxNq5e3B26hwldhhhAwUr|tu&OOC*SLj#m-upkn^5>ZpUNa#+fBDScn#Pd5!=(QaW zVn`RY*1-w-=u2CAc8C!W+rv&!!faY|A4dX-IxfI>QyGcPvXduDlaRB4Ke>lh4mosq z0t+>F)L`|7k_@vVtjZaf!hWVZsnj$i?~jXiWbJsA)@{FKmax=puJ*oP=; z%|uEZ2l!DW59)#!N!w$lh{k=BuCC67<|@`ApS}^Y&PE3%<*S0VM|Hsn%@53tSRvZT z5Z%jC4X~udfRG_?5Ei=!2naRO>w23A59Knt?4m!(*!hgA-ERgMtv*PX{TJD&vkNrc zlpsc3&J(h0vItEn3BpWxk!)JdiB`Nc1ImOVN$J@~N;r#w*#|tRJSqfP)SH1OC06vs z#}!c>UBJj!KOiRW1AN9}LI2lU!cyD|q|})Lu3HC*e2zp=@G6yb{we z3Y`ei5CDVpP%wL0kt})joGQCl1B^WKLH&~YQ5eZlr&bD$aTjJ60Fq7prljE!6l{>&O))Mf`7lQjNl z{|#&Co@i%9qMn@Sgpm_+wGaVjJpsUHCWolJluCNonxLoFdqLQy!-U<2+ho`QWzspk zoiu1^C2F=uqU59!^3SuLlV2YBA8^%%lBGRlZ0>ckoG%V7MAQ<>1y2Z{*h(Vovj_29 zWgj|%hDpsx3nGbs4PfgmC(6~N0AqUqIW8=NN@O|d@iEKLhwKgl)d>NCsvktbN>{+6 zM+I|_f&kk-elYB24pa#fP?YYC?kMda#N6%NnF{F*;o1ma0=YA11g$GoM z>iO1)Is!=8K;X=Kl~ne9K#U#`Cc`8gfuG4LaEFrug11bQ50`TRp-p?i^AEMei52F6 zvXatz=L;ViJ;qIP#9E-bm;rjp13!Y&xsH4+l?WELiXhq+X+S;loOC=yK~d$-Ah~0J znta8LPOAwcM9KkToP19-jRcYbsj5V=5&?M1;y|}jKUvJ!160!*iGmBlgu{D9Fu7d- zR7_AoK>mI*p?D=)$j~H`ox90+4=FUcX$_J4YzN@cv;{S%l8~n7dy*@cpKy+lAV0LM z1I~mvNM`Y+_kX%fF0?L?Yu_n=If;1CWUT@ynKX3g;2A>f^?DGy$qbdK?Pu!KJ+eW$ zl}Hvc2ZI42h_e0@HMs2o;jwFxSlF};c&%V&J|-^`6gC&5rP@*=GnTnF#JZa8>Ow$C zW)u=(TqH)XSAr?TDHXbyQgEijqIBB`9C+(w8o6 z23~BO$ZlekuKDE^vBy0D(P-nwNtet(^0yh|IqnrG^YJKA>7$1@%!r5*uN)eYs3rFoiz#&0oB&~vc1eZv$r-P(Hg;D27iS9I>|mCVhe6d1NwKUt$NMyswki&hP$-HLg~l zE&GF~{%HRVSLx+l5JX2d#1ekJ9LVwhed4maABdgqB)&ZrMzcIhWbSk}Vb*k(bj=J! zY?~s08>5+sB7A`CbOLDEX!=hK845dd{{yblr${-YV1t!h5kqb31NXNI#cRZd$Ay_q;&uV$!e>c6K_BpOuLdgk&`mb2p_9Yv`hemR z21=?}Ny<+%dh)4H{bHKGZZpnniteZ z_@W6lDb(@U76r^^kcZ_Yk;F7WQfE#OM}DvnQFR3*jdjr2EKnT99}oi8CB7(gN(H%I zrlQP+Ug8%sizvG=NqY1=qL+Q*LosJoqBe`oph%P(J-m+Txx3;~K)MyE6D=d{Vy6h@ z6iHB*>qOM7*@mXfb&$1~GKwF!2jw1OsN$_MkaZBHI=_q{eh>iM38*A;d4)+veg?JJ zF99UwNP@gn0wlXe8Ry@yL8D2f1mE)AKobWezMIV?i?cVVe4|g)UfTg0PZ*)Rp#h?D z*>CcW*auR1E{fy=6J+j3lE{~2`ajyI=>q3xsLorXh*7qch--y2NZ@E8-Dow;%wGap zSez%1ESVvSXB*YkekW*R2%ucDhS+`-pqc|Sq*N>-M7S{VF~|hOj;)q!1JKE0Jot z2k5yG2rA=e$)!48Fy0%09=?-90V8b0^8sTJ>v)?Oboxys+t`BKW~PnLenAxScp?}=ec}7eM9zbrV znHlaeVNjIaOfu@kiH9Crfx3(om`rg2lOp-#p5%Ri8n8szaVJv`+sPu2#xq1&9}C!i zR|QBN@c_xbJ;qC{nM7H&6$mpPB58{(=*Jv0rp=KB@r81L!Z>5#Grb+%ku?O#8yiXM zAr+AMx|lFI!vcgjcAcOGh4xPX3dG_96{zT1{1Su)j>+nI%XWGO{eL16O=Z33!9P! zqMureG?mtqG$;>bzwi*Lv4&t&=s2~+)fEhjt0TE0ZO~%I2Y$#%gOXMTk!);DI>YZ| z!#gYF9JvhCd8ASk9@mliiYq~Knl;%_P(ldsWDr8<5)t39DQJ3Sg($6VAXh|%$pKWl1z5EEqB~1`fL_u|e!QlG zgobv5Hm1F5C}GCj^NyhAnJ98z_KS)4Q_F1QG3tU%JXyK%0QjP)2q;qPTLb)}P<*)x z5-TN$6V(Rj@;at}Kk$?2S29CUM(*I~Hc7DH{+y8AFhs3&Fh}D;gJf+)9H0oM8OIB{ z1I^dUWX+2s1fU;4OZiucf`@`+d2S>!h+@W+Hv-5HpW6t#vjzDz9QFBYKV{qd(<;v03O$E zLdnin^vgUBX!Zm<8r)h(EbXHKTHq;T4`B(ADii@qGW3zp0TV=_gg1-PwxN1`mVfea zUOoSw@OQ5Iqr~|CQd_L0Y4BN{xWB;xWwz;}F#9q3T&pEmTgMLWR1Ff7Pr3<VKIUdnuy3S# zWSD0}q%V*8-=F3QXZy>99Lyp1`3Cw2M1`|^|FxHUbW~t)B#(Q9uUB+nu#bC`Z%AZl zgnNuOPdJCNvNI>MM^uM&dZq3Xohn;vog=#VbS3{ zHZ+bX`*2<>in5iDl|YoTm4MZ=BYb=#e0|IUy`y--`PMl^ zIfO3{Se6mMVjJxhzQUP@IaIJ`oNvTm%EZI@|GSs{qmbM)I56JxkCAz-|LFBsAq)KR z&H?NJ%=ZZ34EQTft#H8r9wsLHaG^io{Q-^%E#R*Rgq{CLMS2B#Muv<0sh4*6%D?pz z{nuWr{()P}nVs1`JlZ!RE?oS-gB%qL;d1}~5Y^rPzJ~XDhWH2jx@-OewEVvT zy&-(<-$5(2IKw{}uGQe}Go`i)3RwgQI=J*Z&V-YyAVZ>c4@lAFlRy z*y{ftw#GlfHVD`J8*HtAg{}P$*gDRCXk2J?R9JLWxbFV|xAs49Z}>O34a4>Rj$8lV z<2Lv&xLKG`v^2vF{|4OXUx6F{12~nbg@pPaewqlg*B?B^Jdz@86qq{R$Rr|p=Hx;TuTs@v``H&^qwLAA)vP#!psk*7bTQT>? z24)rnTRmIf*sl@nO?cU@KPDb(_(W2O_)C*P0t~M*God5cpGcl*y>-lXG!_ZiY07(yIgi@ghlOi zqonIVaptw-dlrv`zxuVxMC8iJ3%tu6bk%A(bu|s+F7ogSBo^)wOueq}rL#&u-*ZQ&i?k$etW51U9Zi1T_tmO)?4{md-5ZK?g|Hl8qjKm#3fpsy*^JtHc^b*>!$5c9$BfG&qh}XO_0bta6xqF?zP-UGHpjL|w^uo*~CH zXqY}@S=V|<=h<#f`2?er_?kx)r#}!&8?_SUwA?1T^u;6B!_BJBKSO8T;aVTTg5Ybs zhJ}ptId(%_w)QNqwOjm0^UtXp-5q}27&=ovV?7aSb6tOG^vxaNV#>@eX{WuR0hEv8 zi9KPbF6>vdKI|{+!E?zaTQq0^X}r*Po_L=d7m~K|clLdw-(@u~HoJV@`1Jd|di5IB zeFK})b|u*l{wl9KwsG)HdYec7PqBN+ri z?G57!0$Wy%lr}nu{qP!jUq3E2oqk2__nET{W1|MAm9IqGSH}+pIp2$zJrMoi#Wo@D z!*8;m?OOhR*RERr+08mtKOg)2(n_2+yQRInW#V_-OO>3={r)?%em;NrD)pw&@C4YGnrz5czlvotp=jWv?+x2E7Lmt0?>DvOd3O3%b$s=v>N8)zU8c6X)bNb$J>7Qk#sLJX2P& z<27wC_a5iDqMz)4*0TMv=U6T?S;JA9LQ(tAv`pQHX}kU1nLY=3+9mj`ljyy3@ma0q zi-nK#LtG?`Z40mDH3YtEKi#Kbv(0qXn1St8$$Q^!VBO`%Mwc1%S|-zK_~ms?R38Tj zY-O-6@_G5`YTkBDeMs%0^0m?!S&7PTPl9&|8fHg*hUVuN&+pHfA-^XK(8u?j@7*^$ zD8R>e$yP?3S>R(xjT)aU?Vn|!TdczadAhg3uG7I<8sYovMSAnD2O6KfA9L|T@LgLQ z(R=mZodYYXu3w59X#eCDXS8eev8lMuR=>1c&jSp16=%q(UQ)W=cX!TAAt|AK`(2aZ zrGc%l<%c2^cBneOK6lMbuDCa=-O8!_N8^d>*Y{qG@{>zkvonKPPHu6RH?674=#;)p z|8D;-rT7@dZZ?Ng3E@W3(GtJLr6Y-~kJ&?kK99dAsOY=g-xMty*X!0>T0MO9T||rH z{p?T8Y$s12xY_L1J@hGWm#zPvlarJ0bWT>HtxqOIpOlVYG*o%{Vt{y-;`)7f`1WRE zM^I?!%tG|Ez4YeBE&i7-b&a+L?V8G&`r&iIW3n=xb*PJfJb~XtW|e4%$=5r(R*J?8 z7&X;(9D3N9#=&p0YL)1IftKdFJBLP_-sZ;(NWHwR&7U&1-&vh^SN4o&8t1J?7f-Eb z+jM$+oPg9W*XfO6QGqt~Us}&S`BY`nu4mEWs{d%`-hHLc#hFt#=BTaGoa<(9^k%H)XCEr7oJs>fHJ) zuzGfV1(@7{S0-9^51CZRyfPFV*%C&1rLcR#S37aX0QrF4U%JvP_S~V+qY91NG6dS5 zJzuI0&^s<}5u1K?=BS^}8NpN>{&^66Z@{tA<>coLQj%TdnYlkA)x@ zo!C^?e5m_7mrUg6ZI6QGBbT+U3`v>@xb-%nQp=7vt0g_eR9ZfAjY&1vJ?`D9b@C+z z!=fJMV_vq#XO?quni*`I^-A^JJ>f|E*?7Kp?)YafKTcoujZJB1FO&^!o^VLz)4yKU ze6rh@YwS5)z%DpLrNnukdPJCs>Lc@sh*YXRZ`qK|1W&4*{*8cdxy?eQ-8MUQ_jkIb zY2JNcl`ZNvVX9#_nK1uwx^l6@Eo-)rYep_Mt66~cXjSNJS!jmg!13m%Mb?5NHnT>l zzvr}nk1c)=0&`ai;s{?oxrE*aGG^D$cYo!A5ucSFJvW_TR^3JRt#S;`h~9X&WsF~2 zQy_242ZcyhliahOkECQH=uNZ)oDYQMy{rz;iW=*}~de(p)PAS1UI4E&21VtFS#b zwJv3!T(?30PSc|{3o$&dA6AQ)5+-4%zjLo?yTtv`Vqu%5*4N&0_3#~Il_%NeWtXf{ zL*<$VsCwxl+n`)@RqJr}55JVfZgqv&GsZ=<#KWVzxFp+xSB3IfO}J=ZH66V3;6Yhj zcJ~XeolVU)y1ggkh_;vzy<>hyyOUByLmCgS4B99uGbLymx+appO;-PN*%`%1jkdjV z)Gf2gLk~Rf_8Z4OGQL~HcVKL1hR;x*>5*HGD^@{nMydV<1Q$#%@+UET{;kn?C zwQp)7df%B6n`dR)*2}rcigoYmIF_VbXs(@8xc}r?nN1?RKi8HGCe$9OQb@^Jlkmn4 zuZh0V+QNH7aiOW1F%>i`5p+cot@zPa*>(89{aK~%t??QOX$xzvw@$6RzIEXG8zs_g zXzh8krZZjsQq6DaW;TK_GdE^Xyk7E!H`VH6e#powJ=Hh2~fNfuX_q=$u z>dzt)oa#SCGR1S08Dg6M??m)p{Dr)(5MeEm!{-_TB~QQ4R`|ejBwXF5Og7a~Anm?1 zZ4FCk?wf9dtPKD5n*#N;DUJu%XH$bebw9jpyj{BU@$tljr2}u}+;uxyDU8naz*y=9 zcU%9N$xB0DzWm&2WSkWJ&En9CPa!3ec1C$Qx9Oj8arqMdKHo(`RekG2 z|B#vvhhrQYucnt5$UE&`cXPYJFY9XocYB)6)pZ^TU0ml{)7SA`R+06_^q2J8BV%v* z5F<7+7pNaL9Yf#=z{Hr@i?LnXUAQ=1Wf6b~3p$8#;3oFO>P6N^q*tdldah&7 zg<_h{dL?hHoyl*%I&1Ik5*x;<|7pd@T{ZfA@5eFL=j{TAGoxf&0U(g!*qqAA7Cro|b$(VmWmoSN4j$?Yp%6y+;LVBo%vHlFn!a z_n1?4s#fm#dU$PHi23_+*5r>O{+=wMpv)<3zAkHT#rWucSs9B+j+;r6^pNi*-9p^% z-T@I~c_m9xmrH{_=6tQN_dld^AV$v2e|y%_ zIJ}&VDdbM3lWzV0M9}|LA9sfKX^PzI3x1&>y)QR*{95vnv%!0vQgaXA@tCXMSjBKt z(~r)mI(sMEYRB4gvSc=T;jqDGPC=oJPv$RwYyMafoUELlem`#^Dnk?1LwLw!A%Fw088a3A=qj&el*ykczNq z8uwtMG-sF|TllZFP|^Akr`9K>aUGlFqvbNmnrve(lk26;nxr)fLwW;WB*vznWItt) zIo>a%Xw=-X)EA&T_CwCf!eG%tTdP^*QuV1!c|)ZLpNvy`d_?rRW}@~ z>AUUTnY_16+1VP_ORFj<+fOW{?Ya4^@62Z%^`qDBaQtw!|6zOe}jve%3bln85|NjzV!+2^q;YNrZ)3XK|x)=^wnGH&K)6R1>c2< zm#;EE3Hp`3R<+M>db~7PewlxHK>6!EAz$9!a?OqNSY8;$UfX8mwsSbTX_Le1^5`3cq^myYzm)3H^EgfH!yC`S^4{ z?ue+>9S;=7O{Yjz(;t3zulcRi6+$k43;pwIOBJ}Xb(Fcv68^`Q@vpf?ras%`5FxiU z`O@Z#Qf7TvYaiBdJQi-4e5|I-HB=G2re4HOJL0Eh-lER&grx;bTibhFGfz)We3S2P zy%1usOm<|$`z)bwvs7NUvz$Y|Ha7eZ0;0P=?Q9CypID{h@WvxqV@@k^-i&KFy?p;!H+jPMWrq^=w^aYFu4l(rq;xc|QvP^(TyC|ybi|eNgI8T@&c1K? z7HWPV^Xuy&?%uqZZ*d>Ku>DzZ<=>4d1x%sw{g3VCKLqz*^z&rz);6Yo8nd1H{(16| z{2n)H9y4!i-A|lVl^e^*f{IMJwE8Q%RvH$>YAxQJ*3%lHHkWr=z0661Gvt>G$@&z^ zs+-eK1JkK`#yiGQU&x5tiJo^uUz^!h2Di~%WB2gUCw7@X5!tP5npC{?*6t4AaduSN%Md7<$#{{fm5G`saZY5}p3WJrz-2R+9XY zKc^!$clol>b#H5t25ZMhI)ZB!+st#q+<8|A?yPkzR`?X~;_SH0b` z2c>c!)X?GXJ)L^Y`TUU$m(84=pmEPhfx5$>FwU*Ya}rhhYZL#02F1T? z(~kewBK=qW;yrqQDelXp;D%a0_I@#8o264nh1L!q)AJRn%G*1AwBuIlu_3(oX4R*O zP6eCYKa##I#?4&pqweOU6uoSeP}(~e_5`inH~sX<1NOl4MZ3>$wrf-5^I5Sq^WXse zLS-Q%(mXx*L-j_kv%)XtiPrh)gUo-WJfAmd3Nl_HLT5d+R-yT@+x2tFEv!GfO)3-L zd@Fr>tMsKb_n5$QZ^xlq<>QW*&!$Tf?pak+uG%@5TqZB+J-Rw$$dR1dCUa6IQ+%m- zgT?>1wL-}|LO2e!H*R>M^Y6s0g|m8+)?^55c$Yto>0Vdl7lVd%Ng>9ER=I2r5Dq@) z60>*r|LXqYA8+qJh~LrXqWx;wm8riP1O48qyQHIi*iG^Wk(K;9}=1MVyT0CpY-&n{n>2HjsYZv*U%(yUc^m zxhJ1qO746q#>C-Lq-yxgWY*3)8Ar=0?#3BnqWj*+%yQ>m(cuu2yzslnI;9H*T!K|y zQkP@?KX3I|=lV?@H8J?loZ&)~{AGxlzZOOgjr?U}AOGgT)3+Y&O(M+VC%%WA>#q1= z^CW62+deg7@|Wvm`G3=)wG~zK7br-P^Ov_bv!TH8%~$tbdwy~8@l`ttSLX)01Z>dC z+y*&2MJBb$sFaEqgb=F0cXZWvu{OT&W+!qr@G>S2G5el^ofnS_rgQYCInyPoL!YAbKsm$zs+^2e-#gygBicy zVUUa3#>@KN+1}H09pm+!`^(SGu-nX#qoRDagRtVy?>Fqxxr}soP$uWD8*aQr~Cr)ac)tSqrE2B&}NUe)GZn zX*TzIBflJJ?*rwpESJp=lUK~?-0|zfkMDBaKmNk<*ODt&-PX*x<9Vw`v-^jx^3Pv| zw&3iwx+OJiue?l1eU*8t33Ok?#xR^^k@Dn+qvke7L(tkMAy9$N1mUv zxE1k#YB%G9b|l@hD-dc^cwij24HV?fPbtJVdv-UF7g71XZ2j6m7T-w;%N-TBOQXfp2( zDJv-tQ0=u1SbIeSYp*GaCr^CW4)nX&`Q&58p4l=-Ip$9@ZT)uoeEa3Q1&j`NG%qZh z%qJ+gt{{KjExqh4VD;r~cwPQsZ@Xdr5~ff&o`TCVwI@6t{+}PLaC+MGp5A-Am6DZb z9k!Iotg_KOv!m+&iQn&SBhNp1*BWspJ@>$m0E^r!=Xh>9eM*cHoEma;jTxuV)X6bi z@l$#$3Z?Fc^4>`YuFCp7G47pJ#czw-j@usn<$5|*Yi~pP5#e={b(;L#6IQsr`X2V> z_vdBe2j6`{YX|Vf+Xm63(gFn}a;Abb3b~tLl@=fCB15%VKeRu698CUT8?~gPDt*#eQZ1htVg#`#tZkjU`I0#ocV8dKXx0y#>`;hY+<^ zweT~Cg8sl%_Qk7$SLe&DvHz{LXjS@2CAsGd=N}E(zw@~SgAkX!QZTUAnrZH~cv=Fq z)=J^5e|k2+e{F{dFVnT(XM>K-=IoYZyAaxw+hA9Wlg@P8+kCgzBb5Ma@*lE zOO?H&ewbd*oX#Dkr}SW%+oAM=Id`<4d_w>=5^y75$rG$TvW&N*zG zqrSJ+6*Y6M=tv9ToLc`#XO{El6-^F7$qUa%tW&s9bjq-*D>`tYeXZiO)f@lvf^row z8$m&?K4&f1U#xF{ zYCp0G#Ar7})qbutJzO7HAb>FT$#)dnp$m{8OMJ32OZ4IkGLy4&Qn`v!L92OE<4Yd6lt+ylp|H5ulX}p!G)4#U({L3UJBTLcX@3LVmQ^7UNKp0xg&T zVeHGx%2JDpfeVgIxKJai)%LL@Xwe4f6B=$DalDi)xPau=uQow zHWx($E}8Tbdgf8k?J$8yFav8d(~d85>xb7?~Oy z7@C@z7zKDUg1iI==tp$IBpN^whsE)b13S@8LEo~9FvScQc$j_wn*vPXsBS^uf{HL@ z4Q^8~e1yKG6k&=IaKRo9w-}?l1$~Pd!juR(oTiwdn}WV&3}MPS+@_eKn}WWD1Yrsb zuqeRcBQtbU(6@vjOsT?ciaEL|=u5;Arfg8h=@tugQ_z=uBTP{PCVU(|vP3rpeF-SS zlo%bHrWgXtWz=MczN8aj%w?R$fXY{hIk?wHAO zh&Bqk(dZLy2%|M^2pf%O`VHM8^l2-EMLxC^S%lkf=rbV*D|3MRME4^4U@F3*8W)Nz!tFQoK|F+&pj{itj;y8mHTF)*O_S70&?j6DI^9f8z)f%pif1(jAr;x;gz z#cCKzUjo#VL9H>7OkhAYjwcYC+fa?eX!oJ_ZD7VXFnSX<4p9@sv@tNCH{M|~4UD^k zu(=P~lt=d_dQ%=@%onW2z#I1H2BA0V5e69tV{;GoCVqf7E2vWe3~@z<-P{Ze3xRqW F7ywhtCB^^% literal 0 HcmV?d00001 From a644e54fe9b916420b66067fa66673c5c66f103f Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 23 Mar 2026 10:17:41 +1300 Subject: [PATCH 5/5] [ML] Fix clang-format in CModelGraphValidatorTest Made-with: Cursor --- .../unittest/CModelGraphValidatorTest.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc b/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc index 351f59276..5180fb403 100644 --- a/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc +++ b/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc @@ -451,10 +451,10 @@ BOOST_AUTO_TEST_CASE(testPrepackedE5ModelWithNorm) { auto module = ::torch::jit::load("testfiles/e5_with_norm.pt"); auto result = CModelGraphValidator::validate(module); - BOOST_REQUIRE_MESSAGE(result.s_IsValid, - "e5_with_norm.pt should pass validation but failed. " - "Forbidden: " << result.s_ForbiddenOps.size() - << ", Unrecognised: " << result.s_UnrecognisedOps.size()); + BOOST_REQUIRE_MESSAGE(result.s_IsValid, "e5_with_norm.pt should pass validation but failed. " + "Forbidden: " + << result.s_ForbiddenOps.size() << ", Unrecognised: " + << result.s_UnrecognisedOps.size()); BOOST_REQUIRE(result.s_ForbiddenOps.empty()); BOOST_REQUIRE(result.s_UnrecognisedOps.empty()); }