Skip to content

Commit b152755

Browse files
author
kevyuu
committed
Fix small things
1 parent 50b6493 commit b152755

File tree

4 files changed

+50
-48
lines changed

4 files changed

+50
-48
lines changed

include/nbl/asset/IAccelerationStructure.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ namespace nbl::asset
2323
class IAccelerationStructure : public virtual core::IReferenceCounted
2424
{
2525
public:
26-
static constexpr inline size_t TRANSFORM_DATA_MIN_ALIGNMENT = 16;
26+
static constexpr inline size_t TransformDataMinAlignment = 16;
2727

2828
// build flags, we don't expose flags that don't make sense for certain levels
2929
enum class BUILD_FLAGS : uint8_t

include/nbl/asset/utils/CDirQuantCacheBase.h

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -380,26 +380,28 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
380380
template<uint32_t dimensions, E_FORMAT CacheFormat>
381381
value_type_t<CacheFormat> quantize(const hlsl::vector<hlsl::float32_t, dimensions>& value)
382382
{
383-
auto to_float32_t4 = [](hlsl::vector<hlsl::float32_t, dimensions> src) -> hlsl::float32_t4
383+
using float32_tN = hlsl::vector<hlsl::float32_t, dimensions>;
384+
385+
auto to_vec_t4 = []<typename T>(hlsl::vector<T, dimensions> src, T padValue) -> hlsl::vector<T, 4>
384386
{
385387
if constexpr(dimensions == 1)
386388
{
387-
return {src.x, 0, 0, 0};
389+
return {src.x, padValue, padValue, padValue};
388390
} else if constexpr (dimensions == 2)
389391
{
390-
return {src.x, src.y, 0, 0};
392+
return {src.x, src.y, padValue, padValue};
391393
} else if constexpr (dimensions == 3)
392394
{
393-
return {src.x, src.y, src.z, 0};
395+
return {src.x, src.y, src.z, padValue};
394396
} else if constexpr (dimensions == 4)
395397
{
396398
return {src.x, src.y, src.z, src.w};
397399
}
398400
};
399401

400-
const auto negativeMask = to_float32_t4(lessThan(value, hlsl::vector<hlsl::float32_t, dimensions>(0.0f)));
402+
const auto negativeMask = to_vec_t4(lessThan(value, float32_tN(0.0f)), false);
401403

402-
const hlsl::vector<hlsl::float32_t, dimensions> absValue = abs(value);
404+
const float32_tN absValue = abs(value);
403405
const auto key = Key(absValue);
404406

405407
constexpr auto quantizationBits = quantization_bits_v<CacheFormat>;
@@ -413,14 +415,14 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
413415
{
414416
const auto fit = findBestFit<dimensions,quantizationBits>(absValue);
415417

416-
const auto abs_fit = to_float32_t4(abs(fit));
418+
const auto abs_fit = to_vec_t4(abs(fit), 0.f);
417419
quantized = hlsl::uint32_t4(abs_fit.x, abs_fit.y, abs_fit.z, abs_fit.w);
418420

419421
insertIntoCache<CacheFormat>(key,quantized);
420422
}
421423
}
422424

423-
auto switch_vec = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask)
425+
auto select = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask)
424426
{
425427
hlsl::uint32_t4 retval;
426428
retval.x = mask.x ? val2.x : val1.x;
@@ -435,25 +437,26 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
435437

436438
// for positive number xoring with 0 keep its value
437439
// for negative number we xor with all one which will flip the bits, then we add one later. Flipping the bits then adding one will turn positive number into negative number
438-
auto restoredAsVec = quantized.getValue() ^ switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(xorflag), negativeMask);
439-
restoredAsVec += switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(1u), negativeMask);
440+
auto restoredAsVec = quantized.getValue() ^ select(hlsl::uint32_t4(0u), hlsl::uint32_t4(xorflag), negativeMask);
441+
restoredAsVec += (hlsl::uint32_t4(1u) & hlsl::uint32_t4(negativeMask));
440442

441443
return value_type_t<CacheFormat>(restoredAsVec);
442444
}
443445

444446
template<uint32_t dimensions, uint32_t quantizationBits>
445447
static inline hlsl::vector<hlsl::float32_t, dimensions> findBestFit(const hlsl::vector<hlsl::float32_t, dimensions>& value)
446448
{
449+
using float32_tN = hlsl::vector<hlsl::float32_t, dimensions>;
447450
static_assert(dimensions>1u,"No point");
448451
static_assert(dimensions<=4u,"High Dimensions are Hard!");
449452

450453
const auto vectorForDots = hlsl::normalize(value);
451454

452455
//
453-
hlsl::vector<hlsl::float32_t, dimensions> fittingVector;
454-
hlsl::vector<hlsl::float32_t, dimensions> floorOffset = {};
456+
float32_tN fittingVector;
457+
float32_tN floorOffset = {};
455458
constexpr uint32_t cornerCount = (0x1u<<(dimensions-1u))-1u;
456-
hlsl::vector<hlsl::float32_t, dimensions> corners[cornerCount] = {};
459+
float32_tN corners[cornerCount] = {};
457460
{
458461
uint32_t maxDirCompIndex = 0u;
459462
for (auto i=1u; i<dimensions; i++)
@@ -465,7 +468,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
465468
if (maxDirectionComp < std::sqrtf(0.9998f / float(dimensions)))
466469
{
467470
_NBL_DEBUG_BREAK_IF(true);
468-
return hlsl::vector<hlsl::float32_t, dimensions>(0.f);
471+
return float32_tN(0.f);
469472
}
470473
fittingVector = value / maxDirectionComp;
471474
floorOffset[maxDirCompIndex] = 0.499f;
@@ -487,9 +490,9 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
487490
}
488491
}
489492

490-
hlsl::vector<hlsl::float32_t, dimensions> bestFit;
493+
float32_tN bestFit;
491494
float closestTo1 = -1.f;
492-
auto evaluateFit = [&](const hlsl::vector<hlsl::float32_t, dimensions>& newFit) -> void
495+
auto evaluateFit = [&](const float32_tN& newFit) -> void
493496
{
494497
auto newFitLen = length(newFit);
495498
const float dp = hlsl::dot(newFit,vectorForDots) / (newFitLen);
@@ -502,7 +505,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
502505

503506
constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u;
504507
const auto test = core::vectorSIMDf(cubeHalfSize);
505-
const hlsl::vector<hlsl::float32_t, dimensions> cubeHalfSizeND = hlsl::vector<hlsl::float32_t, dimensions>(cubeHalfSize);
508+
const float32_tN cubeHalfSizeND = float32_tN(cubeHalfSize);
506509
for (uint32_t n=cubeHalfSize; n>0u; n--)
507510
{
508511
//we'd use float addition in the interest of speed, to increment the loop

src/nbl/asset/utils/CGeometryCreator.cpp

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -13,33 +13,32 @@
1313
namespace nbl::asset
1414
{
1515

16-
namespace
17-
{
18-
using snorm_normal_t = hlsl::vector<int8_t, 4>;
19-
constexpr int8_t snorm_one = std::numeric_limits<int8_t>::max();
20-
constexpr int8_t snorm_neg_one = std::numeric_limits<int8_t>::min();
21-
constexpr auto snorm_positive_x = hlsl::vector<int8_t, 4>(snorm_one, 0, 0, 0);
22-
constexpr auto snorm_negative_x = hlsl::vector<int8_t, 4>(snorm_neg_one, 0, 0, 0);
23-
constexpr auto snorm_positive_y = hlsl::vector<int8_t, 4>(0, snorm_one, 0, 0);
24-
constexpr auto snorm_negative_y = hlsl::vector<int8_t, 4>(0, snorm_neg_one, 0, 0);
25-
constexpr auto snorm_positive_z = hlsl::vector<int8_t, 4>(0, 0, snorm_one, 0);
26-
constexpr auto snorm_negative_z = hlsl::vector<int8_t, 4>(0, 0, snorm_neg_one, 0);
27-
28-
constexpr auto snorm_all_ones = hlsl::vector<int8_t, 4>(snorm_one, snorm_one, snorm_one, snorm_one);
29-
30-
template <typename ElementT>
31-
requires(std::is_same_v<ElementT, uint8_t> || std::is_same_v<ElementT, uint16_t>)
32-
constexpr E_FORMAT get_uv_format()
33-
{
34-
if constexpr(std::is_same_v<ElementT, uint8_t>)
35-
{
36-
return EF_R8G8_UNORM;
37-
} else
38-
{
39-
return EF_R16G16_UNORM;
40-
}
41-
}
16+
namespace
17+
{
18+
using snorm_normal_t = hlsl::vector<int8_t, 4>;
19+
constexpr int8_t snorm_one = std::numeric_limits<int8_t>::max();
20+
constexpr int8_t snorm_neg_one = std::numeric_limits<int8_t>::min();
21+
constexpr auto snorm_positive_x = hlsl::vector<int8_t, 4>(snorm_one, 0, 0, 0);
22+
constexpr auto snorm_negative_x = hlsl::vector<int8_t, 4>(snorm_neg_one, 0, 0, 0);
23+
constexpr auto snorm_positive_y = hlsl::vector<int8_t, 4>(0, snorm_one, 0, 0);
24+
constexpr auto snorm_negative_y = hlsl::vector<int8_t, 4>(0, snorm_neg_one, 0, 0);
25+
constexpr auto snorm_positive_z = hlsl::vector<int8_t, 4>(0, 0, snorm_one, 0);
26+
constexpr auto snorm_negative_z = hlsl::vector<int8_t, 4>(0, 0, snorm_neg_one, 0);
27+
28+
constexpr auto snorm_all_ones = hlsl::vector<int8_t, 4>(snorm_one, snorm_one, snorm_one, snorm_one);
4229

30+
template <typename ElementT>
31+
requires(std::is_same_v<ElementT, uint8_t> || std::is_same_v<ElementT, uint16_t>)
32+
constexpr E_FORMAT get_uv_format()
33+
{
34+
if constexpr(std::is_same_v<ElementT, uint8_t>)
35+
{
36+
return EF_R8G8_UNORM;
37+
} else
38+
{
39+
return EF_R16G16_UNORM;
40+
}
41+
}
4342
}
4443

4544
template <typename ElementT>
@@ -682,7 +681,7 @@ core::smart_refctd_ptr<ICPUGeometryCollection> CGeometryCreator::createArrow(
682681
geometries->push_back({
683682
.geometry = cylinder
684683
});
685-
const auto coneTransform = hlsl::math::linalg::rotation_mat(-1.5707963268f, hlsl::float32_t3(1.f, 0.f, 0.f));
684+
const auto coneTransform = hlsl::math::linalg::rotation_mat(hlsl::numbers::pi<hlsl::float32_t> * -0.5f, hlsl::float32_t3(1.f, 0.f, 0.f));
686685
geometries->push_back({
687686
.transform = hlsl::float32_t3x4(coneTransform),
688687
.geometry = cone

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3024,7 +3024,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
30243024
uint16_t alignment = hlsl::max(0x1u<<hlsl::findLSB(geom.vertexStride),32u);
30253025
if (geom.hasTransform())
30263026
{
3027-
size = core::alignUp(size, IAccelerationStructure::TRANSFORM_DATA_MIN_ALIGNMENT)+sizeof(hlsl::float32_t3x4);
3027+
size = core::alignUp(size, IAccelerationStructure::TransformDataMinAlignment)+sizeof(hlsl::float32_t3x4);
30283028
alignment = hlsl::max<uint16_t>(alignof(float),alignment);
30293029
}
30303030
uint16_t indexSize = 0;
@@ -5061,7 +5061,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
50615061
uint16_t alignment = hlsl::max(0x1u<<hlsl::findLSB(geom.vertexStride),32u);
50625062
if (geom.hasTransform())
50635063
{
5064-
size = core::alignUp(size, IAccelerationStructure::TRANSFORM_DATA_MIN_ALIGNMENT)+sizeof(hlsl::float32_t3x4);
5064+
size = core::alignUp(size, IAccelerationStructure::TransformDataMinAlignment)+sizeof(hlsl::float32_t3x4);
50655065
alignment = hlsl::max<uint16_t>(alignof(float),alignment);
50665066
}
50675067
uint16_t indexSize = 0u;
@@ -5265,7 +5265,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
52655265
}
52665266
if (geom.hasTransform())
52675267
{
5268-
offset = core::alignUp(offset, IAccelerationStructure::TRANSFORM_DATA_MIN_ALIGNMENT);
5268+
offset = core::alignUp(offset, IAccelerationStructure::TransformDataMinAlignment);
52695269
outGeom.transform = {.offset=offset,.buffer=smart_refctd_ptr<const IGPUBuffer>(scratchBuffer)};
52705270
memcpyCallback.data = &geom.transform;
52715271
if (!streamDataToScratch(offset,sizeof(geom.transform),memcpyCallback))

0 commit comments

Comments
 (0)