tighten compilation tests for quantization

sayakpaul · sayakpaul · commit 8d431dc967a4 · 2025-07-28T13:27:20.000+05:30
diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py
@@ -886,6 +886,7 @@ def quantization_config(self):
             components_to_quantize=["transformer", "text_encoder_2"],
         )
 
+    @require_bitsandbytes_version_greater("0.46.1")
     def test_torch_compile(self):
         torch._dynamo.config.capture_dynamic_output_shape_ops = True
         super().test_torch_compile()
diff --git a/tests/quantization/test_torch_compile_utils.py b/tests/quantization/test_torch_compile_utils.py
@@ -56,12 +56,18 @@ def _test_torch_compile(self, torch_dtype=torch.bfloat16):
         pipe.transformer.compile(fullgraph=True)
 
         # small resolutions to ensure speedy execution.
-        pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)
+        with torch._dynamo.config.patch(error_on_recompile=True):
+            pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)
 
     def _test_torch_compile_with_cpu_offload(self, torch_dtype=torch.bfloat16):
         pipe = self._init_pipeline(self.quantization_config, torch_dtype)
         pipe.enable_model_cpu_offload()
-        pipe.transformer.compile()
+        # regional compilation is better for offloading.
+        # see: https://pytorch.org/blog/torch-compile-and-diffusers-a-hands-on-guide-to-peak-performance/
+        if getattr(pipe.transformer, "_repeated_blocks"):
+            pipe.transformer.compile_repeated_blocks(fullgraph=True)
+        else:
+            pipe.transformer.compile()
 
         # small resolutions to ensure speedy execution.
         pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)

Original file line number	Diff line number	Diff line change
`@@ -886,6 +886,7 @@ def quantization_config(self):`
`886`	`886`	`components_to_quantize=["transformer", "text_encoder_2"],`
`887`	`887`	`)`
`888`	`888`
	`889`	`+ @require_bitsandbytes_version_greater("0.46.1")`
`889`	`890`	`def test_torch_compile(self):`
`890`	`891`	`torch._dynamo.config.capture_dynamic_output_shape_ops = True`
`891`	`892`	`super().test_torch_compile()`