Skip to content
This repository was archived by the owner on May 27, 2021. It is now read-only.

Commit ca01baf

Browse files
Fix performance regression
1 parent 4fe49e0 commit ca01baf

File tree

1 file changed

+16
-6
lines changed

1 file changed

+16
-6
lines changed

src/device/matmul_kernels/layout.jl

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,14 @@ struct AlignedColMajor{T} <: LayoutBase{T} end
4646
@unroll for j = 1 : size[2]
4747
@unroll for i = 1 : vec_len : size[1]
4848
t = translate(tile, (i - 1, j - 1))
49-
ind = Tuple(t.index) .+ 1
50-
@inbounds linear_index = LinearIndices(Base.size(workspace))[ind...]
51-
@inbounds res[i, j] = vloada(Vec{vec_len, T}, pointer(workspace), linear_index)
49+
50+
base = Tuple(t.base) .+ 1
51+
@inbounds linear_base = LinearIndices(Base.size(workspace))[base...]
52+
53+
offset = Tuple(t.offset) .+ 1
54+
@inbounds linear_offset = LinearIndices(Base.size(workspace))[offset...]
55+
56+
@inbounds res[i, j] = vloada(Vec{vec_len, T}, pointer(workspace, linear_base), linear_offset)
5257
end
5358
end
5459

@@ -61,9 +66,14 @@ end
6166
@unroll for j = 1 : size[2]
6267
@unroll for i = 1 : vec_len : size[1]
6368
t = translate(tile, (i - 1, j - 1))
64-
ind = Tuple(t.index) .+ 1
65-
@inbounds linear_index = LinearIndices(Base.size(workspace))[ind...]
66-
vstorea!(Vec{vec_len, T}, pointer(workspace), value[i, j], linear_index)
69+
70+
base = Tuple(t.base) .+ 1
71+
@inbounds linear_base = LinearIndices(Base.size(workspace))[base...]
72+
73+
offset = Tuple(t.offset) .+ 1
74+
@inbounds linear_offset = LinearIndices(Base.size(workspace))[offset...]
75+
76+
vstorea!(Vec{vec_len, T}, pointer(workspace, linear_base), value[i, j], linear_offset)
6777
end
6878
end
6979
end

0 commit comments

Comments
 (0)