diff --git a/src/infer_request.cc b/src/infer_request.cc index e5733662..ce6004db 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -1,4 +1,4 @@ -// Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -418,7 +418,7 @@ InferRequest::Exec(const bool is_decoupled) py::gil_scoped_release release; // BLS should not be used in "initialize" or "finalize" function. - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); if (!stub->IsInitialized() || stub->IsFinalizing()) { throw PythonBackendException( "BLS is only supported during the 'execute' function."); diff --git a/src/metric.cc b/src/metric.cc index 4c055910..8ee6fa17 100644 --- a/src/metric.cc +++ b/src/metric.cc @@ -1,4 +1,4 @@ -// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -147,7 +147,7 @@ void Metric::SendCreateMetricRequest() { // Send the request to create the Metric to the parent process - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); SaveToSharedMemory(stub->ShmPool()); CustomMetricsMessage* custom_metrics_msg = nullptr; AllocatedSharedMemory custom_metrics_shm; @@ -170,7 +170,7 @@ Metric::SendIncrementRequest(const double& value) py::gil_scoped_release release; try { CheckIfCleared(); - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); operation_value_ = value; SaveToSharedMemory(stub->ShmPool()); AllocatedSharedMemory custom_metrics_shm; @@ -189,7 +189,7 @@ Metric::SendSetValueRequest(const double& value) { try { CheckIfCleared(); - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); operation_value_ = value; SaveToSharedMemory(stub->ShmPool()); AllocatedSharedMemory custom_metrics_shm; @@ -208,7 +208,7 @@ Metric::SendObserveRequest(const double& value) py::gil_scoped_release release; try { CheckIfCleared(); - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); operation_value_ = value; SaveToSharedMemory(stub->ShmPool()); AllocatedSharedMemory custom_metrics_shm; @@ -228,7 +228,7 @@ Metric::SendGetValueRequest() AllocatedSharedMemory custom_metrics_shm; try { CheckIfCleared(); - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); SaveToSharedMemory(stub->ShmPool()); stub->SendMessage( custom_metrics_shm, PYTHONSTUB_MetricRequestValue, shm_handle_); @@ -251,7 +251,7 @@ Metric::Clear() // scope/being deleted. if (!is_cleared_) { is_cleared_ = true; - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); SaveToSharedMemory(stub->ShmPool()); AllocatedSharedMemory custom_metrics_shm; try { diff --git a/src/metric_family.cc b/src/metric_family.cc index 222a0e23..a7ca5e1a 100644 --- a/src/metric_family.cc +++ b/src/metric_family.cc @@ -1,4 +1,4 @@ -// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -55,7 +55,7 @@ MetricFamily::~MetricFamily() } // Send the request to delete the MetricFamily to the parent process - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); SaveToSharedMemory(stub->ShmPool()); AllocatedSharedMemory custom_metrics_shm; try { @@ -147,7 +147,7 @@ MetricFamily::CreateMetricFamily( void MetricFamily::SendCreateMetricFamilyRequest() { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); SaveToSharedMemory(stub->ShmPool()); CustomMetricsMessage* custom_metrics_msg = nullptr; AllocatedSharedMemory custom_metrics_shm; diff --git a/src/model_loader.cc b/src/model_loader.cc index 0be45fa5..f85213e0 100644 --- a/src/model_loader.cc +++ b/src/model_loader.cc @@ -1,4 +1,4 @@ -// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -101,7 +101,7 @@ ModelLoader::ModelLoader( void ModelLoader::SendLoadModelRequest() { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); SaveToSharedMemory(stub->ShmPool()); AllocatedSharedMemory model_loader_msg_shm; @@ -118,7 +118,7 @@ ModelLoader::SendLoadModelRequest() void ModelLoader::SendUnloadModelRequest() { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); SaveToSharedMemory(stub->ShmPool()); AllocatedSharedMemory model_loader_msg_shm; try { @@ -134,7 +134,7 @@ ModelLoader::SendUnloadModelRequest() bool ModelLoader::SendModelReadinessRequest() { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); SaveToSharedMemory(stub->ShmPool()); ModelLoaderMessage* model_loader_msg = nullptr; AllocatedSharedMemory model_loader_msg_shm; diff --git a/src/pb_bls_cancel.cc b/src/pb_bls_cancel.cc index 4341c037..cff878c7 100644 --- a/src/pb_bls_cancel.cc +++ b/src/pb_bls_cancel.cc @@ -1,4 +1,4 @@ -// Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -67,7 +67,7 @@ PbBLSCancel::Cancel() return; } if (!updating_) { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); if (!stub->StubToParentServiceActive()) { LOG_ERROR << "Cannot communicate with parent service"; return; diff --git a/src/pb_cancel.cc b/src/pb_cancel.cc index da9daf98..193b6735 100644 --- a/src/pb_cancel.cc +++ b/src/pb_cancel.cc @@ -1,4 +1,4 @@ -// Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -68,7 +68,7 @@ PbCancel::IsCancelled() return is_cancelled_; } if (!updating_) { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); if (!stub->StubToParentServiceActive()) { LOG_ERROR << "Cannot communicate with parent service"; return false; diff --git a/src/pb_response_iterator.cc b/src/pb_response_iterator.cc index 536d4232..2f69c6e2 100644 --- a/src/pb_response_iterator.cc +++ b/src/pb_response_iterator.cc @@ -1,4 +1,4 @@ -// Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -133,7 +133,7 @@ ResponseIterator::Id() void ResponseIterator::Clear() { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); stub->EnqueueCleanupId(id_, PYTHONSTUB_BLSDecoupledInferPayloadCleanup); { std::lock_guard lock{mu_}; diff --git a/src/pb_stub.cc b/src/pb_stub.cc index 2fe9a73f..173a9c3e 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -130,7 +130,7 @@ PyParametersToJSON(const py::dict& parameters) void AsyncEventFutureDoneCallback(const py::object& py_future) { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); stub->BackgroundFutureDone(py_future); } @@ -514,7 +514,7 @@ Stub::AutoCompleteModelConfig( python_backend_utils.def( "get_model_dir", []() { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); return stub->GetModelDir(); }, py::return_value_policy::reference); @@ -568,7 +568,7 @@ Stub::Initialize(bi::managed_external_buffer::handle_t map_handle) python_backend_utils.def( "get_model_dir", []() { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); return stub->GetModelDir(); }, py::return_value_policy::reference); @@ -1073,16 +1073,22 @@ Stub::~Stub() memory_manager_message_queue_.reset(); } -std::unique_ptr Stub::stub_instance_; +static std::shared_ptr stub_instance{nullptr}; -std::unique_ptr& +std::shared_ptr Stub::GetOrCreateInstance() { - if (Stub::stub_instance_.get() == nullptr) { - Stub::stub_instance_ = std::make_unique(); + if (!stub_instance) { + stub_instance.reset(new triton::backend::python::Stub()); } - return Stub::stub_instance_; + return stub_instance; +} + +void +Stub::DestroyInstance() +{ + stub_instance.reset(); } void @@ -1822,7 +1828,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) "exec", [](std::shared_ptr& infer_request, const bool decoupled) { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); std::shared_ptr response = infer_request->Exec(decoupled); py::object response_object; @@ -1840,7 +1846,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) "async_exec", [](std::shared_ptr& infer_request, const bool decoupled) { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); py::object loop = py::module_::import("asyncio").attr("get_running_loop")(); py::cpp_function callback = [&stub, infer_request, decoupled]() { @@ -2125,7 +2131,7 @@ main(int argc, char** argv) std::string name = argv[8]; std::string runtime_modeldir = argv[9]; - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); try { stub->Instantiate( shm_growth_size, shm_default_size, shm_region_name, model_path, @@ -2135,7 +2141,7 @@ main(int argc, char** argv) catch (const PythonBackendException& pb_exception) { LOG_INFO << "Failed to preinitialize Python stub: " << pb_exception.what(); logger.reset(); - stub.reset(); + Stub::DestroyInstance(); exit(1); } @@ -2148,7 +2154,7 @@ main(int argc, char** argv) #endif std::atomic background_thread_running = {true}; std::thread background_thread = - std::thread([&parent_pid, &background_thread_running, &stub, &logger] { + std::thread([stub, &parent_pid, &background_thread_running, &logger] { // Send a dummy message after the stub process is launched to notify the // parent process that the health thread has started. std::unique_ptr ipc_message = IPCMessage::Create( @@ -2180,7 +2186,7 @@ main(int argc, char** argv) // Destroy stub and exit. logger.reset(); - stub.reset(); + Stub::DestroyInstance(); exit(1); } } @@ -2213,7 +2219,7 @@ main(int argc, char** argv) // this process will no longer hold the GIL lock and destruction of the stub // will result in segfault. logger.reset(); - stub.reset(); + Stub::DestroyInstance(); return 0; } diff --git a/src/pb_stub.h b/src/pb_stub.h index bb1fc91a..e39014f8 100644 --- a/src/pb_stub.h +++ b/src/pb_stub.h @@ -96,7 +96,9 @@ struct UtilsMessagePayload { class Stub { public: Stub() : stub_to_parent_thread_(false), parent_to_stub_thread_(false){}; - static std::unique_ptr& GetOrCreateInstance(); + static std::shared_ptr GetOrCreateInstance(); + + static void DestroyInstance(); /// Instantiate a new Python backend Stub. void Instantiate( @@ -296,7 +298,6 @@ class Stub { std::unique_ptr> memory_manager_message_queue_; bool initialized_; bool finalizing_; - static std::unique_ptr stub_instance_; std::vector> gpu_tensors_; std::queue> stub_to_parent_buffer_; std::thread stub_to_parent_queue_monitor_; diff --git a/src/pb_stub_log.cc b/src/pb_stub_log.cc index d0b1ff97..4e4f669e 100644 --- a/src/pb_stub_log.cc +++ b/src/pb_stub_log.cc @@ -1,4 +1,4 @@ -// Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -51,7 +51,7 @@ Logger::GetOrCreateInstance() void Logger::Log(const std::string& message, LogLevel level) { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); py::object frame = py::module_::import("inspect").attr("currentframe"); py::object caller_frame = frame(); py::object info = py::module_::import("inspect").attr("getframeinfo"); @@ -110,7 +110,7 @@ Logger::Log( #endif } else { // Ensure we do not create a stub instance before it has initialized - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); std::unique_ptr log_msg(new PbLog(filename, lineno, message, level)); stub->EnqueueLogRequest(log_msg); } diff --git a/src/pb_tensor.cc b/src/pb_tensor.cc index 26e77586..02c37845 100644 --- a/src/pb_tensor.cc +++ b/src/pb_tensor.cc @@ -1,4 +1,4 @@ -// Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -419,7 +419,7 @@ PbTensor::FromDLPack(const std::string& name, const py::object& tensor) #ifdef TRITON_ENABLE_GPU int current_device; cudaError_t err = cudaGetDevice(¤t_device); - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); if (err != cudaSuccess) { throw PythonBackendException("Failed to get current CUDA device id."); } diff --git a/src/response_sender.cc b/src/response_sender.cc index ef3b09dd..9fe46cc4 100644 --- a/src/response_sender.cc +++ b/src/response_sender.cc @@ -1,4 +1,4 @@ -// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -129,7 +129,7 @@ ResponseSender::Send( infer_response->PruneOutputTensors(requested_output_names_); } - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); AllocatedSharedMemory response_send_message = shm_pool_->Construct( @@ -279,7 +279,7 @@ ResponseSender::DeleteResponseFactory() { bool already_deleted = response_factory_deleted_.exchange(true); if (!already_deleted) { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); + auto stub = Stub::GetOrCreateInstance(); stub->EnqueueCleanupId( reinterpret_cast(response_factory_address_), PYTHONSTUB_DecoupledResponseFactoryCleanup);