[Backend Tester] Add SNR validation (pytorch#12924)

GregoryComer · GregoryComer · commit 26017498b628 · 2025-07-30T23:26:53.000-07:00
Add SNR validation for model outputs. This is a generally more robust
than tight element-wise tolerances, as it takes the entire tensor into
account. I'm relaxing atol to handle outliers and reduce noise.
diff --git a/backends/test/harness/tester.py b/backends/test/harness/tester.py
@@ -1,3 +1,4 @@
+import math
 import random
 from collections import Counter, OrderedDict
 from typing import Any, Callable, Dict, List, Optional, Tuple
@@ -18,6 +19,7 @@
     ToExecutorch,
 )
 from executorch.exir.dim_order_utils import get_memory_format
+from torch.ao.ns.fx.utils import compute_sqnr
 
 from torch.export import ExportedProgram
 from torch.testing import FileCheck
@@ -304,13 +306,14 @@ def run_method_and_compare_outputs(
         rtol=1e-03,
         qtol=0,
         statistics_callback: Callable[[ErrorStatistics], None] | None = None,
+        snr: float | None = None,
     ):
         number_of_runs = 1 if inputs is not None else num_runs
         reference_stage = self.stages[StageType.EXPORT]
 
         stage = stage or self.cur
 
-        for _ in range(number_of_runs):
+        for run_iteration in range(number_of_runs):
             inputs_to_run = inputs if inputs else next(self.generate_random_inputs())
 <<<<<<< HEAD
             input_shapes = [
@@ -338,6 +341,7 @@ def run_method_and_compare_outputs(
                 atol,
                 rtol,
                 qtol,
+                snr,
                 statistics_callback,
             )
 
@@ -349,6 +353,7 @@ def _assert_outputs_equal(
         ref_output,
         atol=1e-03,
         rtol=1e-03,
+        snr: float | None = None,
         statistics_callback: Callable[[ErrorStatistics], None] | None = None,
     ):
         """
@@ -380,15 +385,22 @@ def _assert_outputs_equal(
                     f"\tMismatched count: {(model != ref).sum().item()} / {model.numel()}\n"
                 )
             else:
-                assert torch.allclose(
-                    model,
-                    ref,
-                    atol=atol,
-                    rtol=rtol,
-                    equal_nan=True,
+                computed_snr = compute_sqnr(model.to(torch.float), ref.to(torch.float))
+                snr = snr or float("-inf")
+
+                assert (
+                    torch.allclose(
+                        model,
+                        ref,
+                        atol=atol,
+                        rtol=rtol,
+                        equal_nan=True,
+                    )
+                    and computed_snr >= snr
+                    or math.isnan(computed_snr)
                 ), (
                     f"Output {i} does not match reference output.\n"
-                    f"\tGiven atol: {atol}, rtol: {rtol}.\n"
+                    f"\tGiven atol: {atol}, rtol: {rtol}, snr: {snr}.\n"
                     f"\tOutput tensor shape: {model.shape}, dtype: {model.dtype}\n"
                     f"\tDifference: max: {torch.max(model-ref)}, abs: {torch.max(torch.abs(model-ref))}, mean abs error: {torch.mean(torch.abs(model-ref).to(torch.double))}.\n"
                     f"\t-- Model vs. Reference --\n"
@@ -397,6 +409,7 @@ def _assert_outputs_equal(
                     f"\t  Mean: {model.to(torch.double).mean()}, {ref.to(torch.double).mean()}\n"
                     f"\t   Max: {model.max()}, {ref.max()}\n"
                     f"\t   Min: {model.min()}, {ref.min()}\n"
+                    f"\t   SNR: {computed_snr}\n"
                 )
 
     @staticmethod
@@ -407,6 +420,7 @@ def _compare_outputs(
         atol=1e-03,
         rtol=1e-03,
         qtol=0,
+        snr: float | None = None,
         statistics_callback: Callable[[ErrorStatistics], None] | None = None,
     ):
         """
@@ -430,6 +444,7 @@ def _compare_outputs(
             reference_output,
             atol=atol,
             rtol=rtol,
+            snr=snr,
             statistics_callback=statistics_callback,
         )
 
diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py
@@ -136,7 +136,10 @@ def build_result(
         # AssertionErrors to catch output mismatches, but this might catch more than that.
         try:
             tester.run_method_and_compare_outputs(
-                inputs=None if generate_random_test_inputs else inputs
+                inputs=None if generate_random_test_inputs else inputs,
+                atol=5e-2,
+                rtol=5e-2,
+                snr=40,
                 statistics_callback=lambda stats: error_statistics.append(stats)
             )
         except AssertionError as e: