@@ -380,26 +380,28 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
380
380
template <uint32_t dimensions, E_FORMAT CacheFormat>
381
381
value_type_t <CacheFormat> quantize (const hlsl::vector<hlsl::float32_t , dimensions>& value)
382
382
{
383
- auto to_float32_t4 = [](hlsl::vector<hlsl::float32_t , dimensions> src) -> hlsl::float32_t4
383
+ using float32_tN = hlsl::vector<hlsl::float32_t , dimensions>;
384
+
385
+ auto to_vec_t4 = []<typename T>(hlsl::vector<T, dimensions> src, T padValue) -> hlsl::vector<T, 4 >
384
386
{
385
387
if constexpr (dimensions == 1 )
386
388
{
387
- return {src.x , 0 , 0 , 0 };
389
+ return {src.x , padValue, padValue, padValue };
388
390
} else if constexpr (dimensions == 2 )
389
391
{
390
- return {src.x , src.y , 0 , 0 };
392
+ return {src.x , src.y , padValue, padValue };
391
393
} else if constexpr (dimensions == 3 )
392
394
{
393
- return {src.x , src.y , src.z , 0 };
395
+ return {src.x , src.y , src.z , padValue };
394
396
} else if constexpr (dimensions == 4 )
395
397
{
396
398
return {src.x , src.y , src.z , src.w };
397
399
}
398
400
};
399
401
400
- const auto negativeMask = to_float32_t4 (lessThan (value, hlsl::vector<hlsl:: float32_t , dimensions> (0 .0f )));
402
+ const auto negativeMask = to_vec_t4 (lessThan (value, float32_tN (0 .0f )), false );
401
403
402
- const hlsl::vector<hlsl:: float32_t , dimensions> absValue = abs (value);
404
+ const float32_tN absValue = abs (value);
403
405
const auto key = Key (absValue);
404
406
405
407
constexpr auto quantizationBits = quantization_bits_v<CacheFormat>;
@@ -413,14 +415,14 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
413
415
{
414
416
const auto fit = findBestFit<dimensions,quantizationBits>(absValue);
415
417
416
- const auto abs_fit = to_float32_t4 (abs (fit));
418
+ const auto abs_fit = to_vec_t4 (abs (fit), 0 . f );
417
419
quantized = hlsl::uint32_t4 (abs_fit.x , abs_fit.y , abs_fit.z , abs_fit.w );
418
420
419
421
insertIntoCache<CacheFormat>(key,quantized);
420
422
}
421
423
}
422
424
423
- auto switch_vec = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask)
425
+ auto select = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask)
424
426
{
425
427
hlsl::uint32_t4 retval;
426
428
retval.x = mask.x ? val2.x : val1.x ;
@@ -435,25 +437,26 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
435
437
436
438
// for positive number xoring with 0 keep its value
437
439
// for negative number we xor with all one which will flip the bits, then we add one later. Flipping the bits then adding one will turn positive number into negative number
438
- auto restoredAsVec = quantized.getValue () ^ switch_vec (hlsl::uint32_t4 (0u ), hlsl::uint32_t4 (xorflag), negativeMask);
439
- restoredAsVec += switch_vec (hlsl::uint32_t4 (0u ), hlsl::uint32_t4 (1u ), negativeMask);
440
+ auto restoredAsVec = quantized.getValue () ^ select (hlsl::uint32_t4 (0u ), hlsl::uint32_t4 (xorflag), negativeMask);
441
+ restoredAsVec += (hlsl::uint32_t4 (1u ) & hlsl::uint32_t4 (negativeMask) );
440
442
441
443
return value_type_t <CacheFormat>(restoredAsVec);
442
444
}
443
445
444
446
template <uint32_t dimensions, uint32_t quantizationBits>
445
447
static inline hlsl::vector<hlsl::float32_t , dimensions> findBestFit (const hlsl::vector<hlsl::float32_t , dimensions>& value)
446
448
{
449
+ using float32_tN = hlsl::vector<hlsl::float32_t , dimensions>;
447
450
static_assert (dimensions>1u ," No point" );
448
451
static_assert (dimensions<=4u ," High Dimensions are Hard!" );
449
452
450
453
const auto vectorForDots = hlsl::normalize (value);
451
454
452
455
//
453
- hlsl::vector<hlsl:: float32_t , dimensions> fittingVector;
454
- hlsl::vector<hlsl:: float32_t , dimensions> floorOffset = {};
456
+ float32_tN fittingVector;
457
+ float32_tN floorOffset = {};
455
458
constexpr uint32_t cornerCount = (0x1u <<(dimensions-1u ))-1u ;
456
- hlsl::vector<hlsl:: float32_t , dimensions> corners[cornerCount] = {};
459
+ float32_tN corners[cornerCount] = {};
457
460
{
458
461
uint32_t maxDirCompIndex = 0u ;
459
462
for (auto i=1u ; i<dimensions; i++)
@@ -465,7 +468,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
465
468
if (maxDirectionComp < std::sqrtf (0 .9998f / float (dimensions)))
466
469
{
467
470
_NBL_DEBUG_BREAK_IF (true );
468
- return hlsl::vector<hlsl:: float32_t , dimensions> (0 .f );
471
+ return float32_tN (0 .f );
469
472
}
470
473
fittingVector = value / maxDirectionComp;
471
474
floorOffset[maxDirCompIndex] = 0 .499f ;
@@ -487,9 +490,9 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
487
490
}
488
491
}
489
492
490
- hlsl::vector<hlsl:: float32_t , dimensions> bestFit;
493
+ float32_tN bestFit;
491
494
float closestTo1 = -1 .f ;
492
- auto evaluateFit = [&](const hlsl::vector<hlsl:: float32_t , dimensions> & newFit) -> void
495
+ auto evaluateFit = [&](const float32_tN & newFit) -> void
493
496
{
494
497
auto newFitLen = length (newFit);
495
498
const float dp = hlsl::dot (newFit,vectorForDots) / (newFitLen);
@@ -502,7 +505,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
502
505
503
506
constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u ;
504
507
const auto test = core::vectorSIMDf (cubeHalfSize);
505
- const hlsl::vector<hlsl:: float32_t , dimensions> cubeHalfSizeND = hlsl::vector<hlsl:: float32_t , dimensions> (cubeHalfSize);
508
+ const float32_tN cubeHalfSizeND = float32_tN (cubeHalfSize);
506
509
for (uint32_t n=cubeHalfSize; n>0u ; n--)
507
510
{
508
511
// we'd use float addition in the interest of speed, to increment the loop
0 commit comments