Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/infer_request.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -418,7 +418,7 @@ InferRequest::Exec(const bool is_decoupled)
py::gil_scoped_release release;

// BLS should not be used in "initialize" or "finalize" function.
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
if (!stub->IsInitialized() || stub->IsFinalizing()) {
throw PythonBackendException(
"BLS is only supported during the 'execute' function.");
Expand Down
14 changes: 7 additions & 7 deletions src/metric.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -147,7 +147,7 @@ void
Metric::SendCreateMetricRequest()
{
// Send the request to create the Metric to the parent process
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
SaveToSharedMemory(stub->ShmPool());
CustomMetricsMessage* custom_metrics_msg = nullptr;
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
Expand All @@ -170,7 +170,7 @@ Metric::SendIncrementRequest(const double& value)
py::gil_scoped_release release;
try {
CheckIfCleared();
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
operation_value_ = value;
SaveToSharedMemory(stub->ShmPool());
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
Expand All @@ -189,7 +189,7 @@ Metric::SendSetValueRequest(const double& value)
{
try {
CheckIfCleared();
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
operation_value_ = value;
SaveToSharedMemory(stub->ShmPool());
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
Expand All @@ -208,7 +208,7 @@ Metric::SendObserveRequest(const double& value)
py::gil_scoped_release release;
try {
CheckIfCleared();
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
operation_value_ = value;
SaveToSharedMemory(stub->ShmPool());
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
Expand All @@ -228,7 +228,7 @@ Metric::SendGetValueRequest()
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
try {
CheckIfCleared();
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
SaveToSharedMemory(stub->ShmPool());
stub->SendMessage<CustomMetricsMessage>(
custom_metrics_shm, PYTHONSTUB_MetricRequestValue, shm_handle_);
Expand All @@ -251,7 +251,7 @@ Metric::Clear()
// scope/being deleted.
if (!is_cleared_) {
is_cleared_ = true;
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
SaveToSharedMemory(stub->ShmPool());
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
try {
Expand Down
6 changes: 3 additions & 3 deletions src/metric_family.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -55,7 +55,7 @@ MetricFamily::~MetricFamily()
}

// Send the request to delete the MetricFamily to the parent process
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
SaveToSharedMemory(stub->ShmPool());
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
try {
Expand Down Expand Up @@ -147,7 +147,7 @@ MetricFamily::CreateMetricFamily(
void
MetricFamily::SendCreateMetricFamilyRequest()
{
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
SaveToSharedMemory(stub->ShmPool());
CustomMetricsMessage* custom_metrics_msg = nullptr;
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
Expand Down
8 changes: 4 additions & 4 deletions src/model_loader.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -101,7 +101,7 @@ ModelLoader::ModelLoader(
void
ModelLoader::SendLoadModelRequest()
{
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
SaveToSharedMemory(stub->ShmPool());
AllocatedSharedMemory<ModelLoaderMessage> model_loader_msg_shm;

Expand All @@ -118,7 +118,7 @@ ModelLoader::SendLoadModelRequest()
void
ModelLoader::SendUnloadModelRequest()
{
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
SaveToSharedMemory(stub->ShmPool());
AllocatedSharedMemory<ModelLoaderMessage> model_loader_msg_shm;
try {
Expand All @@ -134,7 +134,7 @@ ModelLoader::SendUnloadModelRequest()
bool
ModelLoader::SendModelReadinessRequest()
{
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
SaveToSharedMemory(stub->ShmPool());
ModelLoaderMessage* model_loader_msg = nullptr;
AllocatedSharedMemory<ModelLoaderMessage> model_loader_msg_shm;
Expand Down
4 changes: 2 additions & 2 deletions src/pb_bls_cancel.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -67,7 +67,7 @@ PbBLSCancel::Cancel()
return;
}
if (!updating_) {
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
if (!stub->StubToParentServiceActive()) {
LOG_ERROR << "Cannot communicate with parent service";
return;
Expand Down
4 changes: 2 additions & 2 deletions src/pb_cancel.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -68,7 +68,7 @@ PbCancel::IsCancelled()
return is_cancelled_;
}
if (!updating_) {
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
if (!stub->StubToParentServiceActive()) {
LOG_ERROR << "Cannot communicate with parent service";
return false;
Expand Down
4 changes: 2 additions & 2 deletions src/pb_response_iterator.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -133,7 +133,7 @@ ResponseIterator::Id()
void
ResponseIterator::Clear()
{
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
stub->EnqueueCleanupId(id_, PYTHONSTUB_BLSDecoupledInferPayloadCleanup);
{
std::lock_guard<std::mutex> lock{mu_};
Expand Down
36 changes: 21 additions & 15 deletions src/pb_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ PyParametersToJSON(const py::dict& parameters)
void
AsyncEventFutureDoneCallback(const py::object& py_future)
{
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
stub->BackgroundFutureDone(py_future);
}

Expand Down Expand Up @@ -514,7 +514,7 @@ Stub::AutoCompleteModelConfig(
python_backend_utils.def(
"get_model_dir",
[]() {
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
return stub->GetModelDir();
},
py::return_value_policy::reference);
Expand Down Expand Up @@ -568,7 +568,7 @@ Stub::Initialize(bi::managed_external_buffer::handle_t map_handle)
python_backend_utils.def(
"get_model_dir",
[]() {
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
return stub->GetModelDir();
},
py::return_value_policy::reference);
Expand Down Expand Up @@ -1073,16 +1073,22 @@ Stub::~Stub()
memory_manager_message_queue_.reset();
}

std::unique_ptr<Stub> Stub::stub_instance_;
static std::shared_ptr<triton::backend::python::Stub> stub_instance{nullptr};

std::unique_ptr<Stub>&
std::shared_ptr<triton::backend::python::Stub>
Stub::GetOrCreateInstance()
{
if (Stub::stub_instance_.get() == nullptr) {
Stub::stub_instance_ = std::make_unique<Stub>();
if (!stub_instance) {
stub_instance.reset(new triton::backend::python::Stub());
}

return Stub::stub_instance_;
return stub_instance;
}

void
Stub::DestroyInstance()
{
stub_instance.reset();
}

void
Expand Down Expand Up @@ -1822,7 +1828,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
"exec",
[](std::shared_ptr<InferRequest>& infer_request,
const bool decoupled) {
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
std::shared_ptr<InferResponse> response =
infer_request->Exec(decoupled);
py::object response_object;
Expand All @@ -1840,7 +1846,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
"async_exec",
[](std::shared_ptr<InferRequest>& infer_request,
const bool decoupled) {
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
py::object loop =
py::module_::import("asyncio").attr("get_running_loop")();
py::cpp_function callback = [&stub, infer_request, decoupled]() {
Expand Down Expand Up @@ -2125,7 +2131,7 @@ main(int argc, char** argv)
std::string name = argv[8];
std::string runtime_modeldir = argv[9];

std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
try {
stub->Instantiate(
shm_growth_size, shm_default_size, shm_region_name, model_path,
Expand All @@ -2135,7 +2141,7 @@ main(int argc, char** argv)
catch (const PythonBackendException& pb_exception) {
LOG_INFO << "Failed to preinitialize Python stub: " << pb_exception.what();
logger.reset();
stub.reset();
Stub::DestroyInstance();
exit(1);
}

Expand All @@ -2148,7 +2154,7 @@ main(int argc, char** argv)
#endif
std::atomic<bool> background_thread_running = {true};
std::thread background_thread =
std::thread([&parent_pid, &background_thread_running, &stub, &logger] {
std::thread([stub, &parent_pid, &background_thread_running, &logger] {
// Send a dummy message after the stub process is launched to notify the
// parent process that the health thread has started.
std::unique_ptr<IPCMessage> ipc_message = IPCMessage::Create(
Expand Down Expand Up @@ -2180,7 +2186,7 @@ main(int argc, char** argv)

// Destroy stub and exit.
logger.reset();
stub.reset();
Stub::DestroyInstance();
exit(1);
}
}
Expand Down Expand Up @@ -2213,7 +2219,7 @@ main(int argc, char** argv)
// this process will no longer hold the GIL lock and destruction of the stub
// will result in segfault.
logger.reset();
stub.reset();
Stub::DestroyInstance();

return 0;
}
Expand Down
5 changes: 3 additions & 2 deletions src/pb_stub.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,9 @@ struct UtilsMessagePayload {
class Stub {
public:
Stub() : stub_to_parent_thread_(false), parent_to_stub_thread_(false){};
static std::unique_ptr<Stub>& GetOrCreateInstance();
static std::shared_ptr<Stub> GetOrCreateInstance();

static void DestroyInstance();

/// Instantiate a new Python backend Stub.
void Instantiate(
Expand Down Expand Up @@ -296,7 +298,6 @@ class Stub {
std::unique_ptr<MessageQueue<uint64_t>> memory_manager_message_queue_;
bool initialized_;
bool finalizing_;
static std::unique_ptr<Stub> stub_instance_;
std::vector<std::shared_ptr<PbTensor>> gpu_tensors_;
std::queue<std::unique_ptr<UtilsMessagePayload>> stub_to_parent_buffer_;
std::thread stub_to_parent_queue_monitor_;
Expand Down
6 changes: 3 additions & 3 deletions src/pb_stub_log.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -51,7 +51,7 @@ Logger::GetOrCreateInstance()
void
Logger::Log(const std::string& message, LogLevel level)
{
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
py::object frame = py::module_::import("inspect").attr("currentframe");
py::object caller_frame = frame();
py::object info = py::module_::import("inspect").attr("getframeinfo");
Expand Down Expand Up @@ -110,7 +110,7 @@ Logger::Log(
#endif
} else {
// Ensure we do not create a stub instance before it has initialized
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
std::unique_ptr<PbLog> log_msg(new PbLog(filename, lineno, message, level));
stub->EnqueueLogRequest(log_msg);
}
Expand Down
4 changes: 2 additions & 2 deletions src/pb_tensor.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -419,7 +419,7 @@ PbTensor::FromDLPack(const std::string& name, const py::object& tensor)
#ifdef TRITON_ENABLE_GPU
int current_device;
cudaError_t err = cudaGetDevice(&current_device);
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
if (err != cudaSuccess) {
throw PythonBackendException("Failed to get current CUDA device id.");
}
Expand Down
6 changes: 3 additions & 3 deletions src/response_sender.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -129,7 +129,7 @@ ResponseSender::Send(
infer_response->PruneOutputTensors(requested_output_names_);
}

std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();

AllocatedSharedMemory<ResponseSendMessage> response_send_message =
shm_pool_->Construct<ResponseSendMessage>(
Expand Down Expand Up @@ -279,7 +279,7 @@ ResponseSender::DeleteResponseFactory()
{
bool already_deleted = response_factory_deleted_.exchange(true);
if (!already_deleted) {
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
auto stub = Stub::GetOrCreateInstance();
stub->EnqueueCleanupId(
reinterpret_cast<void*>(response_factory_address_),
PYTHONSTUB_DecoupledResponseFactoryCleanup);
Expand Down
Loading