triton-inference-server · whoisj · Apr 6, 2026 · Feb 24, 2026 · Mar 2, 2026 · Mar 4, 2026
diff --git a/qa/L0_http/generate_endpoint_test.py b/qa/L0_http/generate_endpoint_test.py
@@ -1,5 +1,5 @@
 #!/usr/bin/python3
-# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -29,6 +29,7 @@
 
 sys.path.append("../common")
 
+import base64
 import json
 import threading
 import time
@@ -274,6 +275,37 @@ def test_invalid_input_types(self):
             self.generate_expect_failure(self._model_name, inputs, error_msg)
             self.generate_stream_expect_failure(self._model_name, inputs, error_msg)
 
+    def test_json_dtype_size_expansion_exceeds_limit_error(self):
+        """
+        Test that when the client sends a JSON input of byte[], that when it
+        expands to dtype[], it exceeds the maximum allowed input size and
+        returns an appropriate error message. The test sends a large base64
+        encoded string as input, which simulates a byte[] input that would
+        expand to a much larger dtype[] input on the server side when
+        `sizeof(dtype) > 1`.
+        The test checks that the error message indicates that the input size
+        exceeds the limit.
+        This is important to prevent clients from sending inputs that could
+        cause excessive memory usage on the server.
+        """
+
+        input_data = [1] * (
+            64 * 1024 * 1024
+        )  # 64MB input, which is large but still reasonable for HTTP request body
+        input_bytes = bytes(input_data)
+        input_str = base64.b64encode(input_bytes).decode("utf-8")
+        inputs = {"PROMPT": input_str, "STREAM": False}
+        error_msg = " bytes exceeds the maximum allowed input size of "
+        self.generate_expect_failure(self._model_name, inputs, error_msg)
+
+        inputs = {
+            "INPUT0": input_str[0 : (len(input_str) // 2)],
+            "INPUT1": input_str[(len(input_str) // 2) :],
+            "STREAM": False,
+        }
+        error_msg = " bytes exceeds the maximum allowed input size of "
+        self.generate_expect_failure(self._model_name, inputs, error_msg)
+
     def test_duplicate_inputs(self):
         dupe_prompt = "input 'PROMPT' already exists in request"
         dupe_stream = "input 'STREAM' already exists in request"

diff --git a/qa/L0_http/http_input_size_limit_test.py b/qa/L0_http/http_input_size_limit_test.py
@@ -1,5 +1,5 @@
 #!/usr/bin/python
-# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -29,6 +29,7 @@
 
 sys.path.append("../common")
 
+import base64
 import gzip
 import io
 import json
@@ -41,6 +42,9 @@
 # Constants for size calculations
 # Each FP32 value is 4 bytes, so we need to divide target byte sizes by 4 to get element counts
 BYTES_PER_FP32 = 4
+BYTES_PER_INT64 = (
+    8  # For the type size explosion test, we use int64 which is 8 bytes per element
+)
 MB = 2**20  # 1 MB = 1,048,576 bytes
 GB = 2**30  # 1 GB = 1,073,741,824 bytes
 DEFAULT_LIMIT_BYTES = 64 * MB  # 64MB default limit
@@ -58,7 +62,120 @@
 
 class InferSizeLimitTest(tu.TestResultCollector):
     def _get_infer_url(self, model_name):
-        return "http://localhost:8000/v2/models/{}/infer".format(model_name)
+        return f"http://localhost:8000/v2/models/{model_name}/infer"
+
+    def test_json_dtype_size_expansion_exceeds_limit_error(self):
+        """
+        Test that when the client sends a JSON input of byte[], that when it
+        expands to dtype[], it exceeds the maximum allowed input size and
+        returns an appropriate error message. The test sends a large base64
+        encoded string as input, which simulates a byte[] input that would
+        expand to a much larger dtype[] input on the server side when
+        `sizeof(dtype) > 1`.
+        The test checks that the error message indicates that the input size
+        exceeds the limit.
+        This is important to prevent clients from sending inputs that could
+        cause excessive memory usage on the server.
+        """
+        model = "onnx_zero_1_float32"
+
+        # Provided data is 64MB of int8, but the model expects FP32,
+        # which would expand to 256MB when interpreted as FP32.
+        bytes_input = np.ones(DEFAULT_LIMIT_BYTES, dtype=np.int8)
+        input_bytes = bytes_input.tobytes()
+        data_str = base64.b64encode(input_bytes).decode("utf-8")
+        headers = {
+            "Content-Type": "application/json",
+            "Inference-Header-Content-Length": f"{len(input_bytes)}",
+        }
+        shape_size = (
+            DEFAULT_LIMIT_ELEMENTS // BYTES_PER_INT64
+        )  # Calculate shape size based on int64 element count to match the byte size
+
+        payload = {
+            "inputs": [
+                {
+                    "name": "INPUT0",
+                    "datatype": "INT64",
+                    "shape": [1, shape_size],
+                    "data": data_str,
+                }
+            ]
+        }
+
+        response = requests.post(
+            f"http://localhost:8000/v2/models/{model}/generate",
+            headers=headers,
+            json=payload,
+        )
+
+        self.assertEqual(
+            400,
+            response.status_code,
+            f"Expected error code for type/size mismatch, got: {response.status_code}",
+        )
+        error_msg = response.content.decode()
+        print(
+            f"Error message: {error_msg}", flush=True
+        )  # Print the error message for debugging
+        self.assertIn(
+            "Request JSON size of ",
+            error_msg,
+        )
+        self.assertIn(
+            " bytes exceeds the maximum allowed input size of ",
+            error_msg,
+        )
+        self.assertIn(
+            "Use --http-max-input-size to increase the limit.",
+            error_msg,
+        )
+
+        # Test multiple inputs with one that causes size explosion.
+        payload = {
+            "inputs": [
+                {
+                    "name": "INPUT0",
+                    "datatype": "INT64",
+                    "shape": [1, shape_size // 2],
+                    "data": data_str[: len(data_str) // 2],
+                },
+                {
+                    "name": "INPUT1",
+                    "datatype": "INT64",
+                    "shape": [1, shape_size // 2],
+                    "data": data_str[len(data_str) // 2 :],
+                },
+            ]
+        }
+
+        response = requests.post(
+            f"http://localhost:8000/v2/models/{model}/generate",
+            headers=headers,
+            json=payload,
+        )
+
+        self.assertEqual(
+            400,
+            response.status_code,
+            f"Expected error code for type/size mismatch, got: {response.status_code}",
+        )
+        error_msg = response.content.decode()
+        print(
+            f"Error message: {error_msg}", flush=True
+        )  # Print the error message for debugging
+        self.assertIn(
+            "request JSON size of ",
+            error_msg,
+        )
+        self.assertIn(
+            " bytes exceeds the maximum allowed input size of ",
+            error_msg,
+        )
+        self.assertIn(
+            "Use --http-max-input-size to increase the limit.",
+            error_msg,
+        )
 
     def test_default_limit_raw_binary(self):
         """Test raw binary inputs with default limit"""
@@ -165,9 +282,16 @@ def test_default_limit_json(self):
         # Verify error message contains size limit info
         error_msg = response.content.decode()
         self.assertIn(
-            "exceeds the maximum allowed value",
+            "Request JSON size of ",
+            error_msg,
+        )
+        self.assertIn(
+            " bytes exceeds the maximum allowed input size of ",
+            error_msg,
+        )
+        self.assertIn(
+            "Use --http-max-input-size to increase the limit.",
             error_msg,
-            "Expected error message about exceeding max input size",
         )
 
         # Test case 2: Input just under the 64MB limit (should succeed)
@@ -320,9 +444,16 @@ def test_large_input_json(self):
         # Verify error message contains size limit info
         error_msg = response.content.decode()
         self.assertIn(
-            "exceeds the maximum allowed value",
+            "request JSON size of ",
+            error_msg,
+        )
+        self.assertIn(
+            " bytes exceeds the maximum allowed input size of ",
+            error_msg,
+        )
+        self.assertIn(
+            "Use --http-max-input-size to increase the limit.",
             error_msg,
-            "Expected error message about exceeding max input size",
         )
 
         # Test case 2: Input just under the 128MB configured limit (should succeed)
@@ -405,15 +536,15 @@ def test_large_string_in_json(self):
         # Verify error message
         error_msg = response.content.decode()
         self.assertIn(
-            "Request JSON size",
+            "Request JSON size of ",
             error_msg,
         )
         self.assertIn(
-            "exceeds the maximum allowed value",
+            " bytes exceeds the maximum allowed input size of ",
             error_msg,
         )
         self.assertIn(
-            "Use --http-max-input-size to increase the limit",
+            "Use --http-max-input-size to increase the limit.",
             error_msg,
         )
 

diff --git a/qa/L0_http/http_test.py b/qa/L0_http/http_test.py
@@ -1,5 +1,5 @@
 #!/usr/bin/python
-# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -368,7 +368,7 @@ def test_loading_large_invalid_model(self):
                 error_message,
             )
             self.assertIn(
-                "exceeds the maximum allowed value",
+                " exceeds the maximum allowed input size. ",
                 error_message,
             )
         except ValueError: