From 8d431dc967a4118168af74aae9c41f2a68764851 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 28 Jul 2025 13:27:20 +0530
Subject: [PATCH 1/3] tighten compilation tests for quantization

---
 tests/quantization/bnb/test_4bit.py            |  1 +
 tests/quantization/test_torch_compile_utils.py | 10 ++++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py
index 8e2a8515c662..08c0fee43b80 100644
--- a/tests/quantization/bnb/test_4bit.py
+++ b/tests/quantization/bnb/test_4bit.py
@@ -886,6 +886,7 @@ def quantization_config(self):
             components_to_quantize=["transformer", "text_encoder_2"],
         )
 
+    @require_bitsandbytes_version_greater("0.46.1")
     def test_torch_compile(self):
         torch._dynamo.config.capture_dynamic_output_shape_ops = True
         super().test_torch_compile()
diff --git a/tests/quantization/test_torch_compile_utils.py b/tests/quantization/test_torch_compile_utils.py
index c742927646b6..91ed173fc69b 100644
--- a/tests/quantization/test_torch_compile_utils.py
+++ b/tests/quantization/test_torch_compile_utils.py
@@ -56,12 +56,18 @@ def _test_torch_compile(self, torch_dtype=torch.bfloat16):
         pipe.transformer.compile(fullgraph=True)
 
         # small resolutions to ensure speedy execution.
-        pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)
+        with torch._dynamo.config.patch(error_on_recompile=True):
+            pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)
 
     def _test_torch_compile_with_cpu_offload(self, torch_dtype=torch.bfloat16):
         pipe = self._init_pipeline(self.quantization_config, torch_dtype)
         pipe.enable_model_cpu_offload()
-        pipe.transformer.compile()
+        # regional compilation is better for offloading.
+        # see: https://pytorch.org/blog/torch-compile-and-diffusers-a-hands-on-guide-to-peak-performance/
+        if getattr(pipe.transformer, "_repeated_blocks"):
+            pipe.transformer.compile_repeated_blocks(fullgraph=True)
+        else:
+            pipe.transformer.compile()
 
         # small resolutions to ensure speedy execution.
         pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)

From 16acec5c2c396c042df9532c4fe1aa5fbd6a16bd Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 28 Jul 2025 14:59:52 +0530
Subject: [PATCH 2/3] up

---
 tests/quantization/bnb/test_mixed_int8.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py
index 64f56b02b0dd..8020fcfe206f 100644
--- a/tests/quantization/bnb/test_mixed_int8.py
+++ b/tests/quantization/bnb/test_mixed_int8.py
@@ -847,6 +847,10 @@ def quantization_config(self):
             components_to_quantize=["transformer", "text_encoder_2"],
         )
 
+    @pytest.mark.xfail(
+        reason="Test fails because of an offloading problem from Accelerate with confusion in hooks."
+        " Test passes without recompilation context manager."
+    )
     def test_torch_compile(self):
         torch._dynamo.config.capture_dynamic_output_shape_ops = True
         super()._test_torch_compile(torch_dtype=torch.float16)

From ff8e68dddce100e340428174506e140fe30c1995 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 30 Jul 2025 14:23:49 +0530
Subject: [PATCH 3/3] up

---
 tests/quantization/bnb/test_mixed_int8.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py
index 8020fcfe206f..8ddbf11cfd62 100644
--- a/tests/quantization/bnb/test_mixed_int8.py
+++ b/tests/quantization/bnb/test_mixed_int8.py
@@ -849,7 +849,7 @@ def quantization_config(self):
 
     @pytest.mark.xfail(
         reason="Test fails because of an offloading problem from Accelerate with confusion in hooks."
-        " Test passes without recompilation context manager."
+        " Test passes without recompilation context manager. Refer to https://github.com/huggingface/diffusers/pull/12002/files#r2240462757 for details."
     )
     def test_torch_compile(self):
         torch._dynamo.config.capture_dynamic_output_shape_ops = True