Skip to content

Commit fb7a3ea

Browse files
authored
Avoid loop tail (#521)
1 parent 918ca64 commit fb7a3ea

File tree

1 file changed

+7
-15
lines changed

1 file changed

+7
-15
lines changed

Source/astcenc_weight_align.cpp

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -175,24 +175,16 @@ static void compute_lowest_and_highest_weight(
175175
// unrounded weights in a straightforward way.
176176
vfloat min_weight(FLT_MAX);
177177
vfloat max_weight(-FLT_MAX);
178-
unsigned int partial_weight_start = round_down_to_simd_multiple_vla(weight_count);
179-
for (unsigned int i = 0; i < partial_weight_start; i += ASTCENC_SIMD_WIDTH)
180-
{
181-
vfloat weights = loada(dec_weight_ideal_value + i);
182-
min_weight = min(min_weight, weights);
183-
max_weight = max(max_weight, weights);
184-
}
185178

186-
if (partial_weight_start != weight_count)
179+
vint lane_id = vint::lane_id();
180+
for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
187181
{
188-
vfloat partial_weights = loada(dec_weight_ideal_value + partial_weight_start);
189-
vmask active = vint::lane_id() < vint(weight_count - partial_weight_start);
190-
191-
vmask smaller = active & (partial_weights < min_weight);
192-
min_weight = select(min_weight, partial_weights, smaller);
182+
vmask active = lane_id < vint(weight_count);
183+
lane_id += vint(ASTCENC_SIMD_WIDTH);
193184

194-
vmask larger = active & (partial_weights > max_weight);
195-
max_weight = select(max_weight, partial_weights, larger);
185+
vfloat weights = loada(dec_weight_ideal_value + i);
186+
min_weight = min(min_weight, select(min_weight, weights, active));
187+
max_weight = max(max_weight, select(max_weight, weights, active));
196188
}
197189

198190
min_weight = hmin(min_weight);

0 commit comments

Comments
 (0)