Skip to content

Commit 47f9e03

Browse files
authored
fix: Use Correct ptr Type to Avoid UAF (#433)
* fix: Use Correct ptr Type This change changes the managed pointer type used to manage the lifecyle of the stub instance from unique to shared. This avoids the dereference of the oft used unique pointer reference after the unique pointer has been free / deallocated. * add destroy instance function Adding a destroy instance function to stub to force invalidate the shared ptr. * remove old static member but not really a member because it's static field. * update copyrights
1 parent 65529b0 commit 47f9e03

12 files changed

Lines changed: 54 additions & 47 deletions

src/infer_request.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -418,7 +418,7 @@ InferRequest::Exec(const bool is_decoupled)
418418
py::gil_scoped_release release;
419419

420420
// BLS should not be used in "initialize" or "finalize" function.
421-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
421+
auto stub = Stub::GetOrCreateInstance();
422422
if (!stub->IsInitialized() || stub->IsFinalizing()) {
423423
throw PythonBackendException(
424424
"BLS is only supported during the 'execute' function.");

src/metric.cc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -147,7 +147,7 @@ void
147147
Metric::SendCreateMetricRequest()
148148
{
149149
// Send the request to create the Metric to the parent process
150-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
150+
auto stub = Stub::GetOrCreateInstance();
151151
SaveToSharedMemory(stub->ShmPool());
152152
CustomMetricsMessage* custom_metrics_msg = nullptr;
153153
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
@@ -170,7 +170,7 @@ Metric::SendIncrementRequest(const double& value)
170170
py::gil_scoped_release release;
171171
try {
172172
CheckIfCleared();
173-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
173+
auto stub = Stub::GetOrCreateInstance();
174174
operation_value_ = value;
175175
SaveToSharedMemory(stub->ShmPool());
176176
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
@@ -189,7 +189,7 @@ Metric::SendSetValueRequest(const double& value)
189189
{
190190
try {
191191
CheckIfCleared();
192-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
192+
auto stub = Stub::GetOrCreateInstance();
193193
operation_value_ = value;
194194
SaveToSharedMemory(stub->ShmPool());
195195
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
@@ -208,7 +208,7 @@ Metric::SendObserveRequest(const double& value)
208208
py::gil_scoped_release release;
209209
try {
210210
CheckIfCleared();
211-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
211+
auto stub = Stub::GetOrCreateInstance();
212212
operation_value_ = value;
213213
SaveToSharedMemory(stub->ShmPool());
214214
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
@@ -228,7 +228,7 @@ Metric::SendGetValueRequest()
228228
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
229229
try {
230230
CheckIfCleared();
231-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
231+
auto stub = Stub::GetOrCreateInstance();
232232
SaveToSharedMemory(stub->ShmPool());
233233
stub->SendMessage<CustomMetricsMessage>(
234234
custom_metrics_shm, PYTHONSTUB_MetricRequestValue, shm_handle_);
@@ -251,7 +251,7 @@ Metric::Clear()
251251
// scope/being deleted.
252252
if (!is_cleared_) {
253253
is_cleared_ = true;
254-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
254+
auto stub = Stub::GetOrCreateInstance();
255255
SaveToSharedMemory(stub->ShmPool());
256256
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
257257
try {

src/metric_family.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -55,7 +55,7 @@ MetricFamily::~MetricFamily()
5555
}
5656

5757
// Send the request to delete the MetricFamily to the parent process
58-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
58+
auto stub = Stub::GetOrCreateInstance();
5959
SaveToSharedMemory(stub->ShmPool());
6060
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
6161
try {
@@ -147,7 +147,7 @@ MetricFamily::CreateMetricFamily(
147147
void
148148
MetricFamily::SendCreateMetricFamilyRequest()
149149
{
150-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
150+
auto stub = Stub::GetOrCreateInstance();
151151
SaveToSharedMemory(stub->ShmPool());
152152
CustomMetricsMessage* custom_metrics_msg = nullptr;
153153
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;

src/model_loader.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -101,7 +101,7 @@ ModelLoader::ModelLoader(
101101
void
102102
ModelLoader::SendLoadModelRequest()
103103
{
104-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
104+
auto stub = Stub::GetOrCreateInstance();
105105
SaveToSharedMemory(stub->ShmPool());
106106
AllocatedSharedMemory<ModelLoaderMessage> model_loader_msg_shm;
107107

@@ -118,7 +118,7 @@ ModelLoader::SendLoadModelRequest()
118118
void
119119
ModelLoader::SendUnloadModelRequest()
120120
{
121-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
121+
auto stub = Stub::GetOrCreateInstance();
122122
SaveToSharedMemory(stub->ShmPool());
123123
AllocatedSharedMemory<ModelLoaderMessage> model_loader_msg_shm;
124124
try {
@@ -134,7 +134,7 @@ ModelLoader::SendUnloadModelRequest()
134134
bool
135135
ModelLoader::SendModelReadinessRequest()
136136
{
137-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
137+
auto stub = Stub::GetOrCreateInstance();
138138
SaveToSharedMemory(stub->ShmPool());
139139
ModelLoaderMessage* model_loader_msg = nullptr;
140140
AllocatedSharedMemory<ModelLoaderMessage> model_loader_msg_shm;

src/pb_bls_cancel.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -67,7 +67,7 @@ PbBLSCancel::Cancel()
6767
return;
6868
}
6969
if (!updating_) {
70-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
70+
auto stub = Stub::GetOrCreateInstance();
7171
if (!stub->StubToParentServiceActive()) {
7272
LOG_ERROR << "Cannot communicate with parent service";
7373
return;

src/pb_cancel.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -68,7 +68,7 @@ PbCancel::IsCancelled()
6868
return is_cancelled_;
6969
}
7070
if (!updating_) {
71-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
71+
auto stub = Stub::GetOrCreateInstance();
7272
if (!stub->StubToParentServiceActive()) {
7373
LOG_ERROR << "Cannot communicate with parent service";
7474
return false;

src/pb_response_iterator.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -133,7 +133,7 @@ ResponseIterator::Id()
133133
void
134134
ResponseIterator::Clear()
135135
{
136-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
136+
auto stub = Stub::GetOrCreateInstance();
137137
stub->EnqueueCleanupId(id_, PYTHONSTUB_BLSDecoupledInferPayloadCleanup);
138138
{
139139
std::lock_guard<std::mutex> lock{mu_};

src/pb_stub.cc

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ PyParametersToJSON(const py::dict& parameters)
130130
void
131131
AsyncEventFutureDoneCallback(const py::object& py_future)
132132
{
133-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
133+
auto stub = Stub::GetOrCreateInstance();
134134
stub->BackgroundFutureDone(py_future);
135135
}
136136

@@ -514,7 +514,7 @@ Stub::AutoCompleteModelConfig(
514514
python_backend_utils.def(
515515
"get_model_dir",
516516
[]() {
517-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
517+
auto stub = Stub::GetOrCreateInstance();
518518
return stub->GetModelDir();
519519
},
520520
py::return_value_policy::reference);
@@ -568,7 +568,7 @@ Stub::Initialize(bi::managed_external_buffer::handle_t map_handle)
568568
python_backend_utils.def(
569569
"get_model_dir",
570570
[]() {
571-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
571+
auto stub = Stub::GetOrCreateInstance();
572572
return stub->GetModelDir();
573573
},
574574
py::return_value_policy::reference);
@@ -1073,16 +1073,22 @@ Stub::~Stub()
10731073
memory_manager_message_queue_.reset();
10741074
}
10751075

1076-
std::unique_ptr<Stub> Stub::stub_instance_;
1076+
static std::shared_ptr<triton::backend::python::Stub> stub_instance{nullptr};
10771077

1078-
std::unique_ptr<Stub>&
1078+
std::shared_ptr<triton::backend::python::Stub>
10791079
Stub::GetOrCreateInstance()
10801080
{
1081-
if (Stub::stub_instance_.get() == nullptr) {
1082-
Stub::stub_instance_ = std::make_unique<Stub>();
1081+
if (!stub_instance) {
1082+
stub_instance.reset(new triton::backend::python::Stub());
10831083
}
10841084

1085-
return Stub::stub_instance_;
1085+
return stub_instance;
1086+
}
1087+
1088+
void
1089+
Stub::DestroyInstance()
1090+
{
1091+
stub_instance.reset();
10861092
}
10871093

10881094
void
@@ -1822,7 +1828,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
18221828
"exec",
18231829
[](std::shared_ptr<InferRequest>& infer_request,
18241830
const bool decoupled) {
1825-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
1831+
auto stub = Stub::GetOrCreateInstance();
18261832
std::shared_ptr<InferResponse> response =
18271833
infer_request->Exec(decoupled);
18281834
py::object response_object;
@@ -1840,7 +1846,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
18401846
"async_exec",
18411847
[](std::shared_ptr<InferRequest>& infer_request,
18421848
const bool decoupled) {
1843-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
1849+
auto stub = Stub::GetOrCreateInstance();
18441850
py::object loop =
18451851
py::module_::import("asyncio").attr("get_running_loop")();
18461852
py::cpp_function callback = [&stub, infer_request, decoupled]() {
@@ -2125,7 +2131,7 @@ main(int argc, char** argv)
21252131
std::string name = argv[8];
21262132
std::string runtime_modeldir = argv[9];
21272133

2128-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
2134+
auto stub = Stub::GetOrCreateInstance();
21292135
try {
21302136
stub->Instantiate(
21312137
shm_growth_size, shm_default_size, shm_region_name, model_path,
@@ -2135,7 +2141,7 @@ main(int argc, char** argv)
21352141
catch (const PythonBackendException& pb_exception) {
21362142
LOG_INFO << "Failed to preinitialize Python stub: " << pb_exception.what();
21372143
logger.reset();
2138-
stub.reset();
2144+
Stub::DestroyInstance();
21392145
exit(1);
21402146
}
21412147

@@ -2148,7 +2154,7 @@ main(int argc, char** argv)
21482154
#endif
21492155
std::atomic<bool> background_thread_running = {true};
21502156
std::thread background_thread =
2151-
std::thread([&parent_pid, &background_thread_running, &stub, &logger] {
2157+
std::thread([stub, &parent_pid, &background_thread_running, &logger] {
21522158
// Send a dummy message after the stub process is launched to notify the
21532159
// parent process that the health thread has started.
21542160
std::unique_ptr<IPCMessage> ipc_message = IPCMessage::Create(
@@ -2180,7 +2186,7 @@ main(int argc, char** argv)
21802186

21812187
// Destroy stub and exit.
21822188
logger.reset();
2183-
stub.reset();
2189+
Stub::DestroyInstance();
21842190
exit(1);
21852191
}
21862192
}
@@ -2213,7 +2219,7 @@ main(int argc, char** argv)
22132219
// this process will no longer hold the GIL lock and destruction of the stub
22142220
// will result in segfault.
22152221
logger.reset();
2216-
stub.reset();
2222+
Stub::DestroyInstance();
22172223

22182224
return 0;
22192225
}

src/pb_stub.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,9 @@ struct UtilsMessagePayload {
9696
class Stub {
9797
public:
9898
Stub() : stub_to_parent_thread_(false), parent_to_stub_thread_(false){};
99-
static std::unique_ptr<Stub>& GetOrCreateInstance();
99+
static std::shared_ptr<Stub> GetOrCreateInstance();
100+
101+
static void DestroyInstance();
100102

101103
/// Instantiate a new Python backend Stub.
102104
void Instantiate(
@@ -296,7 +298,6 @@ class Stub {
296298
std::unique_ptr<MessageQueue<uint64_t>> memory_manager_message_queue_;
297299
bool initialized_;
298300
bool finalizing_;
299-
static std::unique_ptr<Stub> stub_instance_;
300301
std::vector<std::shared_ptr<PbTensor>> gpu_tensors_;
301302
std::queue<std::unique_ptr<UtilsMessagePayload>> stub_to_parent_buffer_;
302303
std::thread stub_to_parent_queue_monitor_;

src/pb_stub_log.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -51,7 +51,7 @@ Logger::GetOrCreateInstance()
5151
void
5252
Logger::Log(const std::string& message, LogLevel level)
5353
{
54-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
54+
auto stub = Stub::GetOrCreateInstance();
5555
py::object frame = py::module_::import("inspect").attr("currentframe");
5656
py::object caller_frame = frame();
5757
py::object info = py::module_::import("inspect").attr("getframeinfo");
@@ -110,7 +110,7 @@ Logger::Log(
110110
#endif
111111
} else {
112112
// Ensure we do not create a stub instance before it has initialized
113-
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
113+
auto stub = Stub::GetOrCreateInstance();
114114
std::unique_ptr<PbLog> log_msg(new PbLog(filename, lineno, message, level));
115115
stub->EnqueueLogRequest(log_msg);
116116
}

0 commit comments

Comments
 (0)