From 6bd0214a47ba276049562ac1d610373daee93cdf Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 14 Apr 2026 12:31:23 -0400 Subject: [PATCH 01/10] PYTHON-5794 - Add prose tests to verify correct retry behavior when a mix of overload and non-overload errors are encountered --- pymongo/asynchronous/mongo_client.py | 10 ++- pymongo/synchronous/mongo_client.py | 10 ++- test/asynchronous/test_retryable_reads.py | 71 +++++++++++++++++++++- test/asynchronous/test_retryable_writes.py | 53 ++++++++++++++++ test/test_retryable_reads.py | 71 +++++++++++++++++++++- test/test_retryable_writes.py | 53 ++++++++++++++++ 6 files changed, 262 insertions(+), 6 deletions(-) diff --git a/pymongo/asynchronous/mongo_client.py b/pymongo/asynchronous/mongo_client.py index 03e2d6073a..412a13ec70 100644 --- a/pymongo/asynchronous/mongo_client.py +++ b/pymongo/asynchronous/mongo_client.py @@ -2779,7 +2779,7 @@ def __init__( self._last_error: Optional[Exception] = None self._retrying = False self._always_retryable = False - self._multiple_retries = _csot.get_timeout() is not None + self._max_retries = float("inf") if _csot.get_timeout() is not None else 1 self._client = mongo_client self._retry_policy = mongo_client._retry_policy self._func = func @@ -2852,6 +2852,8 @@ async def run(self) -> T: # ConnectionFailures do not supply a code property exc_code = getattr(exc, "code", None) overloaded = exc.has_error_label("SystemOverloadedError") + if overloaded: + self._max_retries = self._client.options.max_adaptive_retries always_retryable = exc.has_error_label("RetryableError") and overloaded if not self._client.options.retry_reads or ( not always_retryable @@ -2890,6 +2892,8 @@ async def run(self) -> T: exc_to_check = exc.error retryable_write_label = exc_to_check.has_error_label("RetryableWriteError") overloaded = exc_to_check.has_error_label("SystemOverloadedError") + if overloaded: + self._max_retries = self._client.options.max_adaptive_retries always_retryable = exc_to_check.has_error_label("RetryableError") and overloaded # Always retry abortTransaction and commitTransaction up to once @@ -2943,7 +2947,9 @@ async def run(self) -> T: def _is_not_eligible_for_retry(self) -> bool: """Checks if the exchange is not eligible for retry""" - return not self._retryable or (self._is_retrying() and not self._multiple_retries) + return not self._retryable or ( + self._is_retrying() and self._attempt_number >= self._max_retries + ) def _is_retrying(self) -> bool: """Checks if the exchange is currently undergoing a retry""" diff --git a/pymongo/synchronous/mongo_client.py b/pymongo/synchronous/mongo_client.py index c049dcaeae..2bd6f31b72 100644 --- a/pymongo/synchronous/mongo_client.py +++ b/pymongo/synchronous/mongo_client.py @@ -2769,7 +2769,7 @@ def __init__( self._last_error: Optional[Exception] = None self._retrying = False self._always_retryable = False - self._multiple_retries = _csot.get_timeout() is not None + self._max_retries = float("inf") if _csot.get_timeout() is not None else 1 self._client = mongo_client self._retry_policy = mongo_client._retry_policy self._func = func @@ -2842,6 +2842,8 @@ def run(self) -> T: # ConnectionFailures do not supply a code property exc_code = getattr(exc, "code", None) overloaded = exc.has_error_label("SystemOverloadedError") + if overloaded: + self._max_retries = self._client.options.max_adaptive_retries always_retryable = exc.has_error_label("RetryableError") and overloaded if not self._client.options.retry_reads or ( not always_retryable @@ -2880,6 +2882,8 @@ def run(self) -> T: exc_to_check = exc.error retryable_write_label = exc_to_check.has_error_label("RetryableWriteError") overloaded = exc_to_check.has_error_label("SystemOverloadedError") + if overloaded: + self._max_retries = self._client.options.max_adaptive_retries always_retryable = exc_to_check.has_error_label("RetryableError") and overloaded # Always retry abortTransaction and commitTransaction up to once @@ -2933,7 +2937,9 @@ def run(self) -> T: def _is_not_eligible_for_retry(self) -> bool: """Checks if the exchange is not eligible for retry""" - return not self._retryable or (self._is_retrying() and not self._multiple_retries) + return not self._retryable or ( + self._is_retrying() and self._attempt_number >= self._max_retries + ) def _is_retrying(self) -> bool: """Checks if the exchange is currently undergoing a retry""" diff --git a/test/asynchronous/test_retryable_reads.py b/test/asynchronous/test_retryable_reads.py index 259cd9cff5..20f008b73d 100644 --- a/test/asynchronous/test_retryable_reads.py +++ b/test/asynchronous/test_retryable_reads.py @@ -21,7 +21,9 @@ import threading from test.asynchronous.utils import async_set_fail_point -from pymongo.errors import OperationFailure +from pymongo import AsyncMongoClient, MongoClient +from pymongo.common import MAX_ADAPTIVE_RETRIES +from pymongo.errors import OperationFailure, PyMongoError sys.path[0:0] = [""] @@ -38,6 +40,7 @@ ) from pymongo.monitoring import ( + CommandFailedEvent, ConnectionCheckedOutEvent, ConnectionCheckOutFailedEvent, ConnectionCheckOutFailedReason, @@ -145,6 +148,20 @@ async def test_pool_paused_error_is_retryable(self): class TestRetryableReads(AsyncIntegrationTest): + async def asyncSetUp(self) -> None: + await super().asyncSetUp() + self.setup_client = MongoClient(**async_client_context.default_client_options) + self.addCleanup(self.setup_client.close) + + # TODO: After PYTHON-4595 we can use async event handlers and remove this workaround. + def configure_fail_point_sync(self, command_args, off=False) -> None: + cmd = {"configureFailPoint": "failCommand"} + cmd.update(command_args) + if off: + cmd["mode"] = "off" + cmd.pop("data", None) + self.setup_client.admin.command(cmd) + @async_client_context.require_multiple_mongoses @async_client_context.require_failCommand_fail_point async def test_retryable_reads_are_retried_on_a_different_mongos_when_one_is_available(self): @@ -383,6 +400,58 @@ async def test_03_03_retryable_reads_caused_by_overload_errors_are_retried_on_th # 6. Assert that both events occurred on the same server. assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id + @async_client_context.require_failCommand_fail_point + @async_client_context.require_version_min(4, 4, 0) + async def test_overload_then_nonoverload_retries_increased_reads(self) -> None: + # Create a client. + listener = OvertCommandListener() + + # Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error + # code `91` (NotWritablePrimary) and `RetryableError` and `SystemOverloadedError` labels. + overload_fail_point = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["find"], + "errorLabels": ["RetryableError", "SystemOverloadedError"], + "errorCode": 91, + }, + } + + # Configure a fail point with error code `91` (ShutdownInProgress) with only the `RetryableError` error label. + non_overload_fail_point = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["find"], + "errorCode": 91, + "errorLabels": ["RetryableError"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + # Configure the fail point command only if the failed event is for the 91 error configured in step 2. + if listener.failed_events: + return + assert event.failure["code"] == 91 + self.configure_fail_point_sync(non_overload_fail_point) + self.addCleanup(self.configure_fail_point_sync, {}, off=True) + listener.failed_events.append(event) + + listener.failed = failed + + client = await self.async_rs_client(event_listeners=[listener]) + await client.test.test.insert_one({}) + + self.configure_fail_point_sync(overload_fail_point) + self.addCleanup(self.configure_fail_point_sync, {}, off=True) + + with self.assertRaises(PyMongoError): + await client.test.test.find_one() + + started_finds = [e for e in listener.started_events if e.command_name == "find"] + self.assertEqual(len(started_finds), MAX_ADAPTIVE_RETRIES + 1) + if __name__ == "__main__": unittest.main() diff --git a/test/asynchronous/test_retryable_writes.py b/test/asynchronous/test_retryable_writes.py index 6e2072a2ad..c8af7c2786 100644 --- a/test/asynchronous/test_retryable_writes.py +++ b/test/asynchronous/test_retryable_writes.py @@ -22,6 +22,8 @@ import threading from test.asynchronous.utils import async_set_fail_point, flaky +from pymongo.common import MAX_ADAPTIVE_RETRIES + sys.path[0:0] = [""] from test.asynchronous import ( @@ -784,6 +786,57 @@ def failed(event: CommandFailedEvent) -> None: # Assert that the error does not contain the error label `NoWritesPerformed`. assert "NoWritesPerformed" not in exc.exception.errors["errorLabels"] + @async_client_context.require_failCommand_fail_point + @async_client_context.require_version_min(4, 4, 0) + async def test_overload_then_nonoverload_retries_increased_writes(self) -> None: + # Create a client with retryWrites=true. + listener = OvertCommandListener() + + # Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error + # code `91` (NotWritablePrimary) and `RetryableError` and `SystemOverloadedError` labels. + overload_fail_point = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["insert"], + "errorLabels": ["RetryableError", "SystemOverloadedError"], + "errorCode": 91, + }, + } + + # Configure a fail point with error code `91` (ShutdownInProgress) with the `RetryableError` and `RetryableWriteError` error labels. + non_overload_fail_point = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["insert"], + "errorCode": 91, + "errorLabels": ["RetryableError", "RetryableWriteError"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + # Configure the fail point command only if the failed event is for the 91 error configured in step 2. + if listener.failed_events: + return + assert event.failure["code"] == 91 + self.configure_fail_point_sync(non_overload_fail_point) + self.addCleanup(self.configure_fail_point_sync, {}, off=True) + listener.failed_events.append(event) + + listener.failed = failed + + client = await self.async_rs_client(retryWrites=True, event_listeners=[listener]) + + self.configure_fail_point_sync(overload_fail_point) + self.addCleanup(self.configure_fail_point_sync, {}, off=True) + + with self.assertRaises(PyMongoError): + await client.test.test.insert_one({"x": 1}) + + started_inserts = [e for e in listener.started_events if e.command_name == "insert"] + self.assertEqual(len(started_inserts), MAX_ADAPTIVE_RETRIES + 1) + if __name__ == "__main__": unittest.main() diff --git a/test/test_retryable_reads.py b/test/test_retryable_reads.py index 9e6aac821c..52f5924d0e 100644 --- a/test/test_retryable_reads.py +++ b/test/test_retryable_reads.py @@ -21,7 +21,9 @@ import threading from test.utils import set_fail_point -from pymongo.errors import OperationFailure +from pymongo import MongoClient +from pymongo.common import MAX_ADAPTIVE_RETRIES +from pymongo.errors import OperationFailure, PyMongoError sys.path[0:0] = [""] @@ -38,6 +40,7 @@ ) from pymongo.monitoring import ( + CommandFailedEvent, ConnectionCheckedOutEvent, ConnectionCheckOutFailedEvent, ConnectionCheckOutFailedReason, @@ -145,6 +148,20 @@ def test_pool_paused_error_is_retryable(self): class TestRetryableReads(IntegrationTest): + def setUp(self) -> None: + super().setUp() + self.setup_client = MongoClient(**client_context.default_client_options) + self.addCleanup(self.setup_client.close) + + # TODO: After PYTHON-4595 we can use async event handlers and remove this workaround. + def configure_fail_point_sync(self, command_args, off=False) -> None: + cmd = {"configureFailPoint": "failCommand"} + cmd.update(command_args) + if off: + cmd["mode"] = "off" + cmd.pop("data", None) + self.setup_client.admin.command(cmd) + @client_context.require_multiple_mongoses @client_context.require_failCommand_fail_point def test_retryable_reads_are_retried_on_a_different_mongos_when_one_is_available(self): @@ -381,6 +398,58 @@ def test_03_03_retryable_reads_caused_by_overload_errors_are_retried_on_the_same # 6. Assert that both events occurred on the same server. assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id + @client_context.require_failCommand_fail_point + @client_context.require_version_min(4, 4, 0) + def test_overload_then_nonoverload_retries_increased_reads(self) -> None: + # Create a client. + listener = OvertCommandListener() + + # Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error + # code `91` (NotWritablePrimary) and `RetryableError` and `SystemOverloadedError` labels. + overload_fail_point = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["find"], + "errorLabels": ["RetryableError", "SystemOverloadedError"], + "errorCode": 91, + }, + } + + # Configure a fail point with error code `91` (ShutdownInProgress) with only the `RetryableError` error label. + non_overload_fail_point = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["find"], + "errorCode": 91, + "errorLabels": ["RetryableError"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + # Configure the fail point command only if the failed event is for the 91 error configured in step 2. + if listener.failed_events: + return + assert event.failure["code"] == 91 + self.configure_fail_point_sync(non_overload_fail_point) + self.addCleanup(self.configure_fail_point_sync, {}, off=True) + listener.failed_events.append(event) + + listener.failed = failed + + client = self.rs_client(event_listeners=[listener]) + client.test.test.insert_one({}) + + self.configure_fail_point_sync(overload_fail_point) + self.addCleanup(self.configure_fail_point_sync, {}, off=True) + + with self.assertRaises(PyMongoError): + client.test.test.find_one() + + started_finds = [e for e in listener.started_events if e.command_name == "find"] + self.assertEqual(len(started_finds), MAX_ADAPTIVE_RETRIES + 1) + if __name__ == "__main__": unittest.main() diff --git a/test/test_retryable_writes.py b/test/test_retryable_writes.py index 5509083162..00469f198a 100644 --- a/test/test_retryable_writes.py +++ b/test/test_retryable_writes.py @@ -22,6 +22,8 @@ import threading from test.utils import flaky, set_fail_point +from pymongo.common import MAX_ADAPTIVE_RETRIES + sys.path[0:0] = [""] from test import ( @@ -780,6 +782,57 @@ def failed(event: CommandFailedEvent) -> None: # Assert that the error does not contain the error label `NoWritesPerformed`. assert "NoWritesPerformed" not in exc.exception.errors["errorLabels"] + @client_context.require_failCommand_fail_point + @client_context.require_version_min(4, 4, 0) + def test_overload_then_nonoverload_retries_increased_writes(self) -> None: + # Create a client with retryWrites=true. + listener = OvertCommandListener() + + # Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error + # code `91` (NotWritablePrimary) and `RetryableError` and `SystemOverloadedError` labels. + overload_fail_point = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["insert"], + "errorLabels": ["RetryableError", "SystemOverloadedError"], + "errorCode": 91, + }, + } + + # Configure a fail point with error code `91` (ShutdownInProgress) with the `RetryableError` and `RetryableWriteError` error labels. + non_overload_fail_point = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["insert"], + "errorCode": 91, + "errorLabels": ["RetryableError", "RetryableWriteError"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + # Configure the fail point command only if the failed event is for the 91 error configured in step 2. + if listener.failed_events: + return + assert event.failure["code"] == 91 + self.configure_fail_point_sync(non_overload_fail_point) + self.addCleanup(self.configure_fail_point_sync, {}, off=True) + listener.failed_events.append(event) + + listener.failed = failed + + client = self.rs_client(retryWrites=True, event_listeners=[listener]) + + self.configure_fail_point_sync(overload_fail_point) + self.addCleanup(self.configure_fail_point_sync, {}, off=True) + + with self.assertRaises(PyMongoError): + client.test.test.insert_one({"x": 1}) + + started_inserts = [e for e in listener.started_events if e.command_name == "insert"] + self.assertEqual(len(started_inserts), MAX_ADAPTIVE_RETRIES + 1) + if __name__ == "__main__": unittest.main() From ed52a0cff1354926d9c6e127640f74ec089f9223 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 14 Apr 2026 12:43:25 -0400 Subject: [PATCH 02/10] Fix typing --- test/asynchronous/test_retryable_reads.py | 2 +- test/asynchronous/test_retryable_writes.py | 2 -- test/test_retryable_reads.py | 2 +- test/test_retryable_writes.py | 2 -- 4 files changed, 2 insertions(+), 6 deletions(-) diff --git a/test/asynchronous/test_retryable_reads.py b/test/asynchronous/test_retryable_reads.py index 20f008b73d..1d0e92c270 100644 --- a/test/asynchronous/test_retryable_reads.py +++ b/test/asynchronous/test_retryable_reads.py @@ -401,7 +401,7 @@ async def test_03_03_retryable_reads_caused_by_overload_errors_are_retried_on_th assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id @async_client_context.require_failCommand_fail_point - @async_client_context.require_version_min(4, 4, 0) + @async_client_context.require_version_min(4, 4, 0) # type:ignore[untyped-decorator] async def test_overload_then_nonoverload_retries_increased_reads(self) -> None: # Create a client. listener = OvertCommandListener() diff --git a/test/asynchronous/test_retryable_writes.py b/test/asynchronous/test_retryable_writes.py index c8af7c2786..2b5380b0e3 100644 --- a/test/asynchronous/test_retryable_writes.py +++ b/test/asynchronous/test_retryable_writes.py @@ -786,8 +786,6 @@ def failed(event: CommandFailedEvent) -> None: # Assert that the error does not contain the error label `NoWritesPerformed`. assert "NoWritesPerformed" not in exc.exception.errors["errorLabels"] - @async_client_context.require_failCommand_fail_point - @async_client_context.require_version_min(4, 4, 0) async def test_overload_then_nonoverload_retries_increased_writes(self) -> None: # Create a client with retryWrites=true. listener = OvertCommandListener() diff --git a/test/test_retryable_reads.py b/test/test_retryable_reads.py index 52f5924d0e..34423c0185 100644 --- a/test/test_retryable_reads.py +++ b/test/test_retryable_reads.py @@ -399,7 +399,7 @@ def test_03_03_retryable_reads_caused_by_overload_errors_are_retried_on_the_same assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id @client_context.require_failCommand_fail_point - @client_context.require_version_min(4, 4, 0) + @client_context.require_version_min(4, 4, 0) # type:ignore[untyped-decorator] def test_overload_then_nonoverload_retries_increased_reads(self) -> None: # Create a client. listener = OvertCommandListener() diff --git a/test/test_retryable_writes.py b/test/test_retryable_writes.py index 00469f198a..9c212527e9 100644 --- a/test/test_retryable_writes.py +++ b/test/test_retryable_writes.py @@ -782,8 +782,6 @@ def failed(event: CommandFailedEvent) -> None: # Assert that the error does not contain the error label `NoWritesPerformed`. assert "NoWritesPerformed" not in exc.exception.errors["errorLabels"] - @client_context.require_failCommand_fail_point - @client_context.require_version_min(4, 4, 0) def test_overload_then_nonoverload_retries_increased_writes(self) -> None: # Create a client with retryWrites=true. listener = OvertCommandListener() From 10d709bda097e79d23869d045ea18fd094a6b700 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 14 Apr 2026 13:42:00 -0400 Subject: [PATCH 03/10] Update test/asynchronous/test_retryable_writes.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- test/asynchronous/test_retryable_writes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/asynchronous/test_retryable_writes.py b/test/asynchronous/test_retryable_writes.py index 2b5380b0e3..d21927abcd 100644 --- a/test/asynchronous/test_retryable_writes.py +++ b/test/asynchronous/test_retryable_writes.py @@ -791,7 +791,7 @@ async def test_overload_then_nonoverload_retries_increased_writes(self) -> None: listener = OvertCommandListener() # Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error - # code `91` (NotWritablePrimary) and `RetryableError` and `SystemOverloadedError` labels. + # code `91` (ShutdownInProgress) and `RetryableError` and `SystemOverloadedError` labels. overload_fail_point = { "configureFailPoint": "failCommand", "mode": {"times": 1}, From 4ff01169f5a37358b1e7317ea772fab23e33fc1c Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 14 Apr 2026 13:42:13 -0400 Subject: [PATCH 04/10] Update test/asynchronous/test_retryable_reads.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- test/asynchronous/test_retryable_reads.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/asynchronous/test_retryable_reads.py b/test/asynchronous/test_retryable_reads.py index 1d0e92c270..79e6660d47 100644 --- a/test/asynchronous/test_retryable_reads.py +++ b/test/asynchronous/test_retryable_reads.py @@ -407,7 +407,7 @@ async def test_overload_then_nonoverload_retries_increased_reads(self) -> None: listener = OvertCommandListener() # Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error - # code `91` (NotWritablePrimary) and `RetryableError` and `SystemOverloadedError` labels. + # code `91` (ShutdownInProgress) and `RetryableError` and `SystemOverloadedError` labels. overload_fail_point = { "configureFailPoint": "failCommand", "mode": {"times": 1}, From 9cce4c92f9ec660f3b3f45a4b70612bdf022bb9d Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 14 Apr 2026 13:42:30 -0400 Subject: [PATCH 05/10] Update test/test_retryable_reads.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- test/test_retryable_reads.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_retryable_reads.py b/test/test_retryable_reads.py index 34423c0185..50c334e965 100644 --- a/test/test_retryable_reads.py +++ b/test/test_retryable_reads.py @@ -405,7 +405,7 @@ def test_overload_then_nonoverload_retries_increased_reads(self) -> None: listener = OvertCommandListener() # Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error - # code `91` (NotWritablePrimary) and `RetryableError` and `SystemOverloadedError` labels. + # code `91` (ShutdownInProgress) and `RetryableError` and `SystemOverloadedError` labels. overload_fail_point = { "configureFailPoint": "failCommand", "mode": {"times": 1}, From f76e12dfae3b97043c16c9df74537343ad9b462b Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 14 Apr 2026 13:42:38 -0400 Subject: [PATCH 06/10] Update test/test_retryable_writes.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- test/test_retryable_writes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_retryable_writes.py b/test/test_retryable_writes.py index 9c212527e9..ce0570dd48 100644 --- a/test/test_retryable_writes.py +++ b/test/test_retryable_writes.py @@ -787,7 +787,7 @@ def test_overload_then_nonoverload_retries_increased_writes(self) -> None: listener = OvertCommandListener() # Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error - # code `91` (NotWritablePrimary) and `RetryableError` and `SystemOverloadedError` labels. + # code `91` (ShutdownInProgress) and `RetryableError` and `SystemOverloadedError` labels. overload_fail_point = { "configureFailPoint": "failCommand", "mode": {"times": 1}, From ac6d114a3c15eb0198cbf175b1d540d1d26e71af Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 14 Apr 2026 13:42:50 -0400 Subject: [PATCH 07/10] Update test/asynchronous/test_retryable_reads.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- test/asynchronous/test_retryable_reads.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/asynchronous/test_retryable_reads.py b/test/asynchronous/test_retryable_reads.py index 79e6660d47..33a51b68bf 100644 --- a/test/asynchronous/test_retryable_reads.py +++ b/test/asynchronous/test_retryable_reads.py @@ -21,7 +21,7 @@ import threading from test.asynchronous.utils import async_set_fail_point -from pymongo import AsyncMongoClient, MongoClient +from pymongo import MongoClient from pymongo.common import MAX_ADAPTIVE_RETRIES from pymongo.errors import OperationFailure, PyMongoError From bf5ea70562a717dcc139726b613c73a350b808d2 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 14 Apr 2026 13:43:01 -0400 Subject: [PATCH 08/10] Update test/test_retryable_reads.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- test/test_retryable_reads.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_retryable_reads.py b/test/test_retryable_reads.py index 50c334e965..4cc7753f8d 100644 --- a/test/test_retryable_reads.py +++ b/test/test_retryable_reads.py @@ -150,7 +150,7 @@ def test_pool_paused_error_is_retryable(self): class TestRetryableReads(IntegrationTest): def setUp(self) -> None: super().setUp() - self.setup_client = MongoClient(**client_context.default_client_options) + self.setup_client = MongoClient(**client_context.client_options) self.addCleanup(self.setup_client.close) # TODO: After PYTHON-4595 we can use async event handlers and remove this workaround. From e738bde2325ad83aaa70db8ea4d6ced23038d03b Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 14 Apr 2026 13:43:15 -0400 Subject: [PATCH 09/10] Update test/asynchronous/test_retryable_reads.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- test/asynchronous/test_retryable_reads.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/asynchronous/test_retryable_reads.py b/test/asynchronous/test_retryable_reads.py index 33a51b68bf..42c1175070 100644 --- a/test/asynchronous/test_retryable_reads.py +++ b/test/asynchronous/test_retryable_reads.py @@ -150,7 +150,7 @@ async def test_pool_paused_error_is_retryable(self): class TestRetryableReads(AsyncIntegrationTest): async def asyncSetUp(self) -> None: await super().asyncSetUp() - self.setup_client = MongoClient(**async_client_context.default_client_options) + self.setup_client = MongoClient(**async_client_context.client_options) self.addCleanup(self.setup_client.close) # TODO: After PYTHON-4595 we can use async event handlers and remove this workaround. From 409c626dec4733ebedd0a207f0e8c647bac7ef3d Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 14 Apr 2026 15:39:31 -0400 Subject: [PATCH 10/10] Add new prose test --- test/asynchronous/test_client_backpressure.py | 39 ++++++++++++++++++- test/test_client_backpressure.py | 39 ++++++++++++++++++- 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/test/asynchronous/test_client_backpressure.py b/test/asynchronous/test_client_backpressure.py index 3e75ed9b0d..da2ef0d265 100644 --- a/test/asynchronous/test_client_backpressure.py +++ b/test/asynchronous/test_client_backpressure.py @@ -19,6 +19,7 @@ import pathlib import sys from time import perf_counter +from unittest import mock from unittest.mock import patch from pymongo.common import MAX_ADAPTIVE_RETRIES @@ -228,7 +229,7 @@ async def test_01_operation_retry_uses_exponential_backoff(self, random_func): self.assertTrue(abs((end1 - start1) - (end0 - start0 + 0.3)) < 0.3) @async_client_context.require_failCommand_appName - async def test_03_overload_retries_limited(self): + async def test_02_overload_retries_limited(self): # Drivers should test that overload errors are retried a maximum of two times. # 1. Let `client` be a `MongoClient`. @@ -260,7 +261,7 @@ async def test_03_overload_retries_limited(self): self.assertEqual(len(self.listener.started_events), MAX_ADAPTIVE_RETRIES + 1) @async_client_context.require_failCommand_appName - async def test_04_overload_retries_limited_configured(self): + async def test_03_overload_retries_limited_configured(self): # Drivers should test that overload errors are retried a maximum of maxAdaptiveRetries times. max_retries = 1 @@ -294,6 +295,40 @@ async def test_04_overload_retries_limited_configured(self): # 6. Assert that the total number of started commands is max_retries + 1. self.assertEqual(len(self.listener.started_events), max_retries + 1) + @async_client_context.require_failCommand_fail_point + async def test_04_backoff_is_not_applied_for_non_overload_errors(self): + # Drivers should test that backoff is not applied for non-overload retryable errors. + if _IS_SYNC: + mock_target = "pymongo.synchronous.helpers._RetryPolicy.backoff" + else: + mock_target = "pymongo.asynchronous.helpers._RetryPolicy.backoff" + + # 1. Let `client` be a `MongoClient`. + client = self.client + + # 2. Let `coll` be a collection. + coll = client.test.test + await coll.insert_one({}) + + # 3. Configure a failpoint with a retryable error that is NOT an overload error. + failpoint = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["find"], + "errorCode": 91, # ShutdownInProgress + "errorLabels": ["RetryableError"], + }, + } + + # 4. Perform a find operation with `coll` that succeeds on its first retry attempt. + with mock.patch(mock_target, return_value=1) as mock_backoff: + async with self.fail_point(failpoint): + await coll.find_one({}) + + # 5. Assert that no backoff was used for the retry attempt. + mock_backoff.assert_not_called() + # Location of JSON test specifications. if _IS_SYNC: diff --git a/test/test_client_backpressure.py b/test/test_client_backpressure.py index 61334a1218..091baadc7b 100644 --- a/test/test_client_backpressure.py +++ b/test/test_client_backpressure.py @@ -19,6 +19,7 @@ import pathlib import sys from time import perf_counter +from unittest import mock from unittest.mock import patch from pymongo.common import MAX_ADAPTIVE_RETRIES @@ -228,7 +229,7 @@ def test_01_operation_retry_uses_exponential_backoff(self, random_func): self.assertTrue(abs((end1 - start1) - (end0 - start0 + 0.3)) < 0.3) @client_context.require_failCommand_appName - def test_03_overload_retries_limited(self): + def test_02_overload_retries_limited(self): # Drivers should test that overload errors are retried a maximum of two times. # 1. Let `client` be a `MongoClient`. @@ -260,7 +261,7 @@ def test_03_overload_retries_limited(self): self.assertEqual(len(self.listener.started_events), MAX_ADAPTIVE_RETRIES + 1) @client_context.require_failCommand_appName - def test_04_overload_retries_limited_configured(self): + def test_03_overload_retries_limited_configured(self): # Drivers should test that overload errors are retried a maximum of maxAdaptiveRetries times. max_retries = 1 @@ -292,6 +293,40 @@ def test_04_overload_retries_limited_configured(self): # 6. Assert that the total number of started commands is max_retries + 1. self.assertEqual(len(self.listener.started_events), max_retries + 1) + @client_context.require_failCommand_fail_point + def test_04_backoff_is_not_applied_for_non_overload_errors(self): + # Drivers should test that backoff is not applied for non-overload retryable errors. + if _IS_SYNC: + mock_target = "pymongo.synchronous.helpers._RetryPolicy.backoff" + else: + mock_target = "pymongo.helpers._RetryPolicy.backoff" + + # 1. Let `client` be a `MongoClient`. + client = self.client + + # 2. Let `coll` be a collection. + coll = client.test.test + coll.insert_one({}) + + # 3. Configure a failpoint with a retryable error that is NOT an overload error. + failpoint = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["find"], + "errorCode": 91, # ShutdownInProgress + "errorLabels": ["RetryableError"], + }, + } + + # 4. Perform a find operation with `coll` that succeeds on its first retry attempt. + with mock.patch(mock_target, return_value=1) as mock_backoff: + with self.fail_point(failpoint): + coll.find_one({}) + + # 5. Assert that no backoff was used for the retry attempt. + mock_backoff.assert_not_called() + # Location of JSON test specifications. if _IS_SYNC: