[Ref Mode] Make Tile apis work in ref eager mode

yf225 · yf225 · commit 3af8a829102a · 2025-07-26T14:01:19.000-07:00
stack-info: PR: #378, branch: yf225/stack/40
diff --git a/helion/language/creation_ops.py b/helion/language/creation_ops.py
@@ -150,7 +150,16 @@ def _(
     value: float,
     dtype: torch.dtype = torch.float32,
 ) -> torch.Tensor:
-    processed_shape = [s.stop - s.start if isinstance(s, slice) else s for s in shape]
+    from .tile_proxy import RefTile
+    processed_shape = []
+    for s in shape:
+        if isinstance(s, RefTile):
+            # RefTile is a slice subclass with a block_size property
+            processed_shape.append(s.block_size)
+        elif isinstance(s, slice):
+            processed_shape.append(s.stop - s.start)
+        else:
+            processed_shape.append(s)
     return torch.full(processed_shape, value, dtype=dtype, device="cuda")
 
 
diff --git a/helion/language/loops.py b/helion/language/loops.py
@@ -44,6 +44,7 @@
 from ..autotuner.config_spec import StaticRangeSpec
 from . import _decorators
 from .tile_proxy import Tile
+from .tile_proxy import RefTile
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -455,7 +456,7 @@ def _(
     begin_or_end: int | torch.Tensor | list[int | torch.Tensor],
     end_or_none: int | torch.Tensor | list[int | torch.Tensor] | None = None,
     block_size: int | torch.Tensor | list[int | torch.Tensor] | None = None,
-) -> Iterator[slice | tuple[slice, ...]]:
+) -> Iterator[RefTile | tuple[RefTile, ...]]:
     # Convert tensor values to int
     def _to_int(value):
         if value is None:
@@ -511,14 +512,14 @@ def _normalize_to_list(
         e = end_list[0]
         bs = block_size_list[0]
         for i in range(b, e, bs):
-            yield slice(i, min(i + bs, e))
+            yield RefTile(i, min(i + bs, e))
     else:
         # Handle multi-dimensional case
         ranges = []
         for b, e, bs in zip(begin_list, end_list, block_size_list, strict=False):
             dim_ranges = []
             for i in range(b, e, bs):
-                dim_ranges.append(slice(i, min(i + bs, e)))
+                dim_ranges.append(RefTile(i, min(i + bs, e)))
             ranges.append(dim_ranges)
 
         for combo in itertools.product(*ranges):
diff --git a/helion/language/memory_ops.py b/helion/language/memory_ops.py
@@ -85,7 +85,13 @@ def _handle_mixed_indices(
     for i, idx in enumerate(indices):
         if isinstance(idx, slice):
             # Handle slice indices
-            shape_size = idx.stop - idx.start
+            if idx.start is None and idx.stop is None:
+                # Full slice like `:` 
+                shape_size = tensor_shape[i] if i < len(tensor_shape) else 1
+            else:
+                start = idx.start or 0
+                stop = idx.stop or (tensor_shape[i] if i < len(tensor_shape) else 1)
+                shape_size = stop - start
             expected_shape.append(shape_size)
             actual_indices.append(idx)
         elif isinstance(idx, torch.Tensor):
@@ -204,6 +210,16 @@ def _(
     value: torch.Tensor,
     extra_mask: torch.Tensor | None = None,
 ) -> None:
+    # Convert RefTile objects to slices
+    from .tile_proxy import RefTile
+    processed_indices = []
+    for idx in indices:
+        if isinstance(idx, RefTile):
+            processed_indices.append(idx._slice)
+        else:
+            processed_indices.append(idx)
+    indices = processed_indices
+    
     normalized_indices = _normalize_indices(indices)
 
     if extra_mask is not None:
@@ -269,6 +285,16 @@ def _(
     other = 0
 
     assert isinstance(indices, (list, tuple))
+    
+    # Convert RefTile objects to slices
+    from .tile_proxy import RefTile
+    processed_indices = []
+    for idx in indices:
+        if isinstance(idx, RefTile):
+            processed_indices.append(idx._slice)
+        else:
+            processed_indices.append(idx)
+    indices = processed_indices
 
     # Case 1: Single tensor index (jagged indexing)
     if len(indices) == 1 and isinstance(indices[0], torch.Tensor):
@@ -400,6 +426,16 @@ def _(
     value: torch.Tensor | float,
     sem: str = "relaxed",
 ) -> None:
+    # Convert RefTile objects to slices
+    from .tile_proxy import RefTile
+    processed_indices = []
+    for idx in indices:
+        if isinstance(idx, RefTile):
+            processed_indices.append(idx._slice)
+        else:
+            processed_indices.append(idx)
+    indices = processed_indices
+    
     # Special handling for scatter-add pattern (`tensor[tensor_idx, slice] += value`)
     if isinstance(indices, (list, tuple)) and len(indices) == 2:
         idx0, idx1 = indices
diff --git a/helion/language/tile_ops.py b/helion/language/tile_ops.py
@@ -49,9 +49,16 @@ def _(state: CodegenState) -> ast.AST:
 
 
 @_decorators.ref(tile_index)
-def _(tile: slice) -> torch.Tensor:
+def _(tile: slice | int) -> torch.Tensor:
     # Handle different tile representations in ref mode
-    return torch.arange(tile.start, tile.stop, dtype=torch.int64, device="cuda")
+    from .tile_proxy import RefTile
+    if isinstance(tile, RefTile):
+        return tile.index
+    elif isinstance(tile, slice):
+        return torch.arange(tile.start, tile.stop, dtype=torch.int64, device="cuda")
+    else:
+        # tiles_as_sizes=True means we get an int
+        return torch.arange(0, tile, dtype=torch.int64, device="cuda")
 
 
 @_decorators.api(tiles_as_sizes=True)
@@ -91,7 +98,10 @@ def _(state: CodegenState) -> ast.AST:
 @_decorators.ref(tile_begin)
 def _(tile: int | slice) -> int:
     # Handle different tile representations in ref mode
-    if isinstance(tile, slice):
+    from .tile_proxy import RefTile
+    if isinstance(tile, RefTile):
+        return tile.begin
+    elif isinstance(tile, slice):
         return tile.start
     # In ref mode with tiles_as_sizes=True, we lost the begin info
     # This is a limitation - we return 0 as we don't know the actual begin
@@ -140,7 +150,10 @@ def _(state: CodegenState) -> ast.AST:
 @_decorators.ref(tile_end)
 def _(tile: int | slice) -> int:
     # Handle different tile representations in ref mode
-    if isinstance(tile, slice):
+    from .tile_proxy import RefTile
+    if isinstance(tile, RefTile):
+        return tile.end
+    elif isinstance(tile, slice):
         return tile.stop
     # In ref mode with tiles_as_sizes=True, we get the size
     # We lost the begin info, so we assume end = size
@@ -168,7 +181,10 @@ def _(tile: torch.SymInt) -> torch.SymInt:
 @_decorators.ref(tile_block_size)
 def _(tile: int | slice) -> int:
     # Handle different tile representations in ref mode
-    if isinstance(tile, slice):
+    from .tile_proxy import RefTile
+    if isinstance(tile, RefTile):
+        return tile.block_size
+    elif isinstance(tile, slice):
         return tile.stop - tile.start
     # In ref mode with tiles_as_sizes=True, the tile IS the size
     return tile
@@ -206,5 +222,8 @@ def _(state: CodegenState) -> ast.AST:
 @_decorators.ref(tile_id)
 def _(tile: int | slice) -> int:
     # tile_id is the index of the tile in the grid
+    from .tile_proxy import RefTile
+    if isinstance(tile, RefTile):
+        return tile.id
     # For ref mode we don't have the original block_size, so we return 0
     return 0
diff --git a/helion/language/tile_proxy.py b/helion/language/tile_proxy.py
@@ -10,6 +10,7 @@
 
 import torch
 from torch.utils._pytree import tree_map_only
+import functools
 
 from .. import exc
 from .._compiler.compile_environment import CompileEnvironment
@@ -182,3 +183,147 @@ def __enter__(self) -> Self:
 
     def __exit__(self, *args: object) -> None:
         _tls.index_calls = None
+
+
+class RefTile(torch.Tensor):
+    """
+    A tile-like object used in reference eager mode that behaves like a slice.
+    This allows tile.index and other tile operations to work properly in ref eager mode.
+    """
+    
+    def __new__(cls, start: int, stop: int, step: int | None = None):
+        # Create a tensor instance
+        instance = super().__new__(cls)
+        return instance
+    
+    def __init__(self, start: int, stop: int, step: int | None = None) -> None:
+        super().__init__()
+        # Store slice data
+        self.start = start
+        self.stop = stop
+        self.step = step
+        self._slice = slice(start, stop, step)
+        # We need to set block_id to something for compatibility
+        self.block_id = -1  # Special value for ref mode
+        
+    @property 
+    def index(self) -> torch.Tensor:
+        """Return a tensor containing the offsets for this tile."""
+        return torch.arange(
+            self.start, 
+            self.stop, 
+            dtype=torch.int64, 
+            device="cuda"
+        )
+    
+    @property
+    def begin(self) -> int:
+        """Return the start offset of this tile."""
+        return self.start
+    
+    @property
+    def end(self) -> int:
+        """Return the end offset of this tile."""
+        return self.stop
+    
+    @property
+    def block_size(self) -> int:
+        """Return the block size of this tile."""
+        return self.stop - self.start
+    
+    @property
+    def id(self) -> int:
+        """Return the id of this tile (always 0 in ref mode)."""
+        # We don't have enough info to compute the actual tile id
+        return 0
+    
+    def __repr__(self) -> str:
+        return f"RefTile({self._slice!r})"
+    
+    def __int__(self) -> int:
+        """Convert to int for cases where a size is expected."""
+        return self.block_size
+    
+    # Make RefTile usable as an index by delegating to the slice
+    def indices(self, length: int) -> tuple[int, int, int]:
+        """Return (start, stop, step) tuple, like slice.indices()."""
+        return self._slice.indices(length)
+    
+    def __eq__(self, other: object) -> bool:
+        """Compare with other RefTile or slice objects."""
+        if isinstance(other, RefTile):
+            return self._slice == other._slice
+        elif isinstance(other, slice):
+            return self._slice == other
+        return False
+    
+    def __hash__(self) -> int:
+        """Hash based on the slice."""
+        return hash(self._slice)
+    
+    def __index__(self) -> int:
+        """Convert to int for use in tensor indexing.
+        
+        This is called when RefTile is used in advanced indexing contexts.
+        We return the start value which works for single-element tiles.
+        """
+        # For single-element access (when block_size=1), return the index
+        if self.block_size == 1:
+            return self.start
+        # For larger tiles, we can't meaningfully convert to a single index
+        # This might happen in user lambdas trying to do advanced indexing
+        raise TypeError(f"Cannot convert RefTile with block_size={self.block_size} to index")
+    
+    @classmethod
+    def __torch_function__(
+        cls,
+        func: Callable[..., object],
+        types: object,
+        args: tuple[object, ...] = (),
+        kwargs: dict[str, object] | None = None,
+    ) -> object:
+        from ..language.memory_ops import load
+        from ..language.memory_ops import store
+
+        if func is torch.Tensor.__getitem__:
+            if len(args) != 2 or kwargs:
+                raise exc.IncorrectTileUsage(func)
+            tensor, index = args
+            assert isinstance(tensor, torch.Tensor)
+            
+            # If a single RefTile is used as index, we want to use it as a slice
+            # e.g., tensor[ref_tile] should behave like tensor[ref_tile._slice]
+            if isinstance(index, RefTile):
+                return tensor[index._slice]
+            
+            # For multi-dimensional indexing (including lists)
+            return load(tensor, cls._prepare_index(index))
+            
+        if func is torch.Tensor.__setitem__:
+            if len(args) != 3 or kwargs:
+                raise exc.IncorrectTileUsage(func)
+            tensor, index, value = args
+            assert isinstance(tensor, torch.Tensor)
+            assert isinstance(value, torch.Tensor)
+            
+            # Similar handling for setitem
+            if isinstance(index, RefTile):
+                tensor[index._slice] = value
+                return None
+                
+            return store(tensor, cls._prepare_index(index), value)
+            
+        if func is torch.Tensor.__format__:
+            return repr(args[0])
+        raise exc.IncorrectTileUsage(func)
+    
+    @staticmethod
+    def _prepare_index(index: object) -> list[object]:
+        if isinstance(index, (list, tuple)):
+            # When indexing with a list of RefTiles like bias[[tile_m, tile_n]],
+            # we want it to be interpreted as bias[tile_m, tile_n]
+            # So we return the list as-is for multi-dimensional indexing
+            return [*index]
+        assert isinstance(index, RefTile)
+        return [index]
+    
diff --git a/test/ref_utils.py b/test/ref_utils.py
diff --git a/test/test_ref_eager.py b/test/test_ref_eager.py