if all neighbors outside of filter dist threshold, should return zero gradients

lucidrains · lucidrains · commit a2c5a64dc695 · 2025-01-21T08:54:19.000-08:00
diff --git a/GAF_microbatch_pytorch/GAF.py b/GAF_microbatch_pytorch/GAF.py
@@ -73,7 +73,7 @@ def filter_gradients_by_agreement(
     else:
         raise ValueError(f'unknown strategy {strategy}')
 
-    if not accept_mask.any():
+    if accept_mask.sum().item() <= 1:
         return torch.zeros_like(grads)
 
     if accept_mask.all():
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "GAF-microbatch-pytorch"
-version = "0.0.4"
+version = "0.0.5"
 description = "Gradient Agreement Filtering"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
diff --git a/tests/test_gaf.py b/tests/test_gaf.py
@@ -7,7 +7,7 @@
 
 from GAF_microbatch_pytorch import GAFWrapper, set_filter_gradients_
 
-def test_gaf():
+def test_unfiltered_gaf():
 
     net = nn.Sequential(
         nn.Linear(512, 256),
@@ -47,7 +47,7 @@ def test_gaf():
 
     gaf_net = GAFWrapper(
         deepcopy(net),
-        filter_distance_thres = 0.
+        filter_distance_thres = 0.7
     )
 
     x = torch.randn(8, 1024, 512)
@@ -65,4 +65,27 @@ def test_gaf():
     grad = net[0].weight.grad
     grad_filtered = gaf_net.net[0].weight.grad
 
-    assert not torch.allclose(grad, grad_filtered, atol = 1e-6)
+    assert not (grad_filtered == 0.).all() and not torch.allclose(grad, grad_filtered, atol = 1e-6)
+
+def test_all_filtered_gaf():
+
+    net = nn.Sequential(
+        nn.Linear(512, 256),
+        nn.SiLU(),
+        nn.Linear(256, 128)
+    )
+
+    gaf_net = GAFWrapper(
+        deepcopy(net),
+        filter_distance_thres = 0.
+    )
+
+    x = torch.randn(8, 1024, 512)
+    x.requires_grad_()
+
+    out = gaf_net(x)
+    out.sum().backward()
+
+    grad_filtered = gaf_net.net[0].weight.grad
+
+    assert (grad_filtered == 0.).all()