Skip to content

Commit d111c65

Browse files
ivanbasovclaude
andcommitted
test(onnx): add end-to-end mq.quantize() tests for modelopt
Previous coverage only verified that modelopt.onnx.quantization was importable. Add TestModeloptQuantize with two tests that actually call mq.quantize() on a real ONNX model: - test_mq_quantize_int8_produces_valid_onnx: verifies the output file is created and passes onnx.checker (confirms modelopt works at runtime, not just at import time — this is the key Python 3.13 regression check) - test_mq_quantize_int8_output_differs_from_fp32: verifies QDQ nodes were inserted (output graph has more nodes than the FP32 source) Both tests share a _build_tiny_model() helper that creates a minimal Gemm ONNX model with input "dets" and 16 calibration rows, matching the production calibration_data={"dets": calib_dets} call convention. model.ir_version is pinned to 8 for onnxruntime-gpu 1.22.0 compatibility. Tests are skipped when nvidia-modelopt is not installed. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent af466ea commit d111c65

1 file changed

Lines changed: 94 additions & 0 deletions

File tree

code/tests/test_onnx_quant_workflow.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,100 @@ def test_fp8_raises_on_modelopt_import_error(self):
283283
)
284284

285285

286+
_HAS_MODELOPT = __import__("importlib").util.find_spec("modelopt") is not None
287+
288+
289+
class TestModeloptQuantize(unittest.TestCase):
290+
"""End-to-end tests that call mq.quantize() on a real ONNX model.
291+
292+
Skipped when nvidia-modelopt is not installed. On Python 3.13+ modelopt
293+
must be installed with --ignore-requires-python (done by check_python_compat.sh
294+
when MODE=train); these tests confirm it actually works at runtime, not just
295+
that the import succeeds.
296+
"""
297+
298+
@unittest.skipUnless(_HAS_MODELOPT, "nvidia-modelopt not installed")
299+
def _build_tiny_model(self):
300+
"""Return (fp32_path, calib_dets) for a minimal Gemm ONNX model."""
301+
import tempfile
302+
303+
import numpy as np
304+
import onnx
305+
import onnx.helper as oh
306+
307+
X = oh.make_tensor_value_info("dets", onnx.TensorProto.FLOAT, [1, 4])
308+
W_data = np.ones((4, 4), dtype=np.float32)
309+
B_data = np.zeros((4,), dtype=np.float32)
310+
W = oh.make_tensor("W", onnx.TensorProto.FLOAT, W_data.shape, W_data.flatten().tolist())
311+
B = oh.make_tensor("B", onnx.TensorProto.FLOAT, B_data.shape, B_data.flatten().tolist())
312+
Y = oh.make_tensor_value_info("Y", onnx.TensorProto.FLOAT, [1, 4])
313+
node = oh.make_node("Gemm", inputs=["dets", "W", "B"], outputs=["Y"])
314+
graph = oh.make_graph([node], "tiny", [X], [Y], initializer=[W, B])
315+
model = oh.make_model(graph, opset_imports=[oh.make_opsetid("", 17)])
316+
model.ir_version = 8
317+
onnx.checker.check_model(model)
318+
319+
with tempfile.NamedTemporaryFile(suffix=".onnx", delete=False) as f:
320+
fp32_path = f.name
321+
self.addCleanup(os.unlink, fp32_path)
322+
onnx.save(model, fp32_path)
323+
324+
calib = np.random.randint(0, 2, (16, 4), dtype=np.uint8)
325+
return fp32_path, calib
326+
327+
@unittest.skipUnless(_HAS_MODELOPT, "nvidia-modelopt not installed")
328+
def test_mq_quantize_int8_produces_valid_onnx(self):
329+
"""mq.quantize(quantize_mode='int8') must write a valid ONNX file."""
330+
import tempfile
331+
332+
import modelopt.onnx.quantization as mq
333+
import onnx
334+
335+
fp32_path, calib = self._build_tiny_model()
336+
with tempfile.NamedTemporaryFile(suffix=".onnx", delete=False) as f:
337+
out_path = f.name
338+
self.addCleanup(os.unlink, out_path)
339+
340+
mq.quantize(
341+
onnx_path=fp32_path,
342+
quantize_mode="int8",
343+
calibration_data={"dets": calib},
344+
output_path=out_path,
345+
)
346+
347+
self.assertTrue(os.path.isfile(out_path), "quantized ONNX output file not created")
348+
quant_model = onnx.load(out_path)
349+
onnx.checker.check_model(quant_model)
350+
351+
@unittest.skipUnless(_HAS_MODELOPT, "nvidia-modelopt not installed")
352+
def test_mq_quantize_int8_output_differs_from_fp32(self):
353+
"""The quantized model must differ from the FP32 source (QDQ nodes added)."""
354+
import tempfile
355+
356+
import modelopt.onnx.quantization as mq
357+
import onnx
358+
359+
fp32_path, calib = self._build_tiny_model()
360+
with tempfile.NamedTemporaryFile(suffix=".onnx", delete=False) as f:
361+
out_path = f.name
362+
self.addCleanup(os.unlink, out_path)
363+
364+
mq.quantize(
365+
onnx_path=fp32_path,
366+
quantize_mode="int8",
367+
calibration_data={"dets": calib},
368+
output_path=out_path,
369+
)
370+
371+
fp32_model = onnx.load(fp32_path)
372+
quant_model = onnx.load(out_path)
373+
self.assertNotEqual(
374+
len(fp32_model.graph.node),
375+
len(quant_model.graph.node),
376+
"quantized model should have more nodes (QDQ pairs) than the FP32 source",
377+
)
378+
379+
286380
class TestModeloptPrerequisite(unittest.TestCase):
287381
"""Verify quantization package prerequisites are correctly declared."""
288382

0 commit comments

Comments
 (0)