32 primitive_indices.clear();
36 std::array<uint32_t, 6> type_counters = {0};
40 type_offsets[i] = type_counters[prim_type]++;
48 primitive_refs[i].prim_index =
static_cast<uint32_t
>(i);
49 primitive_refs[i].prim_type = prim_type;
50 primitive_refs[i].bounds = computePrimitiveAABB(
static_cast<uint32_t
>(i), prim_type);
54 BuildNode *root = recursiveBuild(primitive_refs, 0,
static_cast<uint32_t
>(geom.
primitive_count), 0);
61 std::vector<BVHNode> nodes;
62 nodes.reserve(allocated_nodes.size() * 2);
63 flattenTree(root, nodes);
67 primitive_refs.clear();
73 AABB BVHBuilder::computePrimitiveAABB(uint32_t prim_index, uint32_t prim_type)
const {
76 return computePatchAABB(prim_index);
78 return computeTriangleAABB(prim_index);
80 return computeDiskAABB(prim_index);
82 return computeTileAABB(prim_index);
84 return computeVoxelAABB(prim_index);
86 return computeBBoxAABB(prim_index);
88 return AABB{{0, 0, 0}, {0, 0, 0}};
100 result.
x = transform[0] * point.
x + transform[1] * point.
y + transform[2] * point.
z + transform[3];
101 result.
y = transform[4] * point.
x + transform[5] * point.
y + transform[6] * point.
z + transform[7];
102 result.
z = transform[8] * point.
x + transform[9] * point.
y + transform[10] * point.
z + transform[11];
106 AABB BVHBuilder::computePatchAABB(uint32_t prim_index)
const {
108 static const helios::vec3 canonical_quad[4] = {{-0.5f, -0.5f, 0.f}, {0.5f, -0.5f, 0.f}, {0.5f, 0.5f, 0.f}, {-0.5f, 0.5f, 0.f}};
111 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
113 for (uint32_t v = 0; v < 4; ++v) {
114 helios::vec3 world_vertex = transformPoint(transform, canonical_quad[v]);
116 bounds.min.x = std::min(bounds.min.x, world_vertex.
x);
117 bounds.min.y = std::min(bounds.min.y, world_vertex.
y);
118 bounds.min.z = std::min(bounds.min.z, world_vertex.
z);
119 bounds.max.x = std::max(bounds.max.x, world_vertex.
x);
120 bounds.max.y = std::max(bounds.max.y, world_vertex.
y);
121 bounds.max.z = std::max(bounds.max.z, world_vertex.
z);
127 AABB BVHBuilder::computeTriangleAABB(uint32_t prim_index)
const {
129 static const helios::vec3 canonical_tri[3] = {{0.f, 0.f, 0.f}, {0.f, 1.f, 0.f}, {1.f, 1.f, 0.f}};
132 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
134 for (uint32_t v = 0; v < 3; ++v) {
135 helios::vec3 world_vertex = transformPoint(transform, canonical_tri[v]);
137 bounds.min.x = std::min(bounds.min.x, world_vertex.
x);
138 bounds.min.y = std::min(bounds.min.y, world_vertex.
y);
139 bounds.min.z = std::min(bounds.min.z, world_vertex.
z);
140 bounds.max.x = std::max(bounds.max.x, world_vertex.
x);
141 bounds.max.y = std::max(bounds.max.y, world_vertex.
y);
142 bounds.max.z = std::max(bounds.max.z, world_vertex.
z);
148 AABB BVHBuilder::computeDiskAABB(uint32_t prim_index)
const {
150 uint32_t disk_index = type_offsets[prim_index];
155 float radius = geometry->
disk_radii[disk_index];
158 helios::vec3 world_center = transformPoint(transform, center);
162 bounds.min = world_center -
helios::vec3(radius, radius, radius);
163 bounds.max = world_center +
helios::vec3(radius, radius, radius);
168 AABB BVHBuilder::computeTileAABB(uint32_t prim_index)
const {
170 static const helios::vec3 canonical_quad[4] = {{-0.5f, -0.5f, 0.f}, {0.5f, -0.5f, 0.f}, {0.5f, 0.5f, 0.f}, {-0.5f, 0.5f, 0.f}};
173 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
175 for (uint32_t v = 0; v < 4; ++v) {
176 helios::vec3 world_vertex = transformPoint(transform, canonical_quad[v]);
178 bounds.min.x = std::min(bounds.min.x, world_vertex.
x);
179 bounds.min.y = std::min(bounds.min.y, world_vertex.
y);
180 bounds.min.z = std::min(bounds.min.z, world_vertex.
z);
181 bounds.max.x = std::max(bounds.max.x, world_vertex.
x);
182 bounds.max.y = std::max(bounds.max.y, world_vertex.
y);
183 bounds.max.z = std::max(bounds.max.z, world_vertex.
z);
189 AABB BVHBuilder::computeVoxelAABB(uint32_t prim_index)
const {
191 uint32_t voxel_index = type_offsets[prim_index];
195 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
197 for (uint32_t v = 0; v < 8; ++v) {
199 helios::vec3 world_vertex = transformPoint(transform, vertex);
201 bounds.min.x = std::min(bounds.min.x, world_vertex.
x);
202 bounds.min.y = std::min(bounds.min.y, world_vertex.
y);
203 bounds.min.z = std::min(bounds.min.z, world_vertex.
z);
204 bounds.max.x = std::max(bounds.max.x, world_vertex.
x);
205 bounds.max.y = std::max(bounds.max.y, world_vertex.
y);
206 bounds.max.z = std::max(bounds.max.z, world_vertex.
z);
212 AABB BVHBuilder::computeBBoxAABB(uint32_t prim_index)
const {
214 uint32_t bbox_index = type_offsets[prim_index];
218 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
220 for (uint32_t v = 0; v < 8; ++v) {
222 helios::vec3 world_vertex = transformPoint(transform, vertex);
224 bounds.min.x = std::min(bounds.min.x, world_vertex.
x);
225 bounds.min.y = std::min(bounds.min.y, world_vertex.
y);
226 bounds.min.z = std::min(bounds.min.z, world_vertex.
z);
227 bounds.max.x = std::max(bounds.max.x, world_vertex.
x);
228 bounds.max.y = std::max(bounds.max.y, world_vertex.
y);
229 bounds.max.z = std::max(bounds.max.z, world_vertex.
z);
235 BVHBuilder::BuildNode *BVHBuilder::recursiveBuild(std::vector<PrimitiveRef> &refs, uint32_t start, uint32_t end, uint32_t depth) {
236 BuildNode *node =
new BuildNode();
237 allocated_nodes.push_back(node);
240 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
241 for (uint32_t i = start; i < end; ++i) {
242 bounds.expand(refs[i].bounds);
244 node->bounds = bounds;
246 uint32_t prim_count = end - start;
249 if (prim_count <= MAX_PRIMS_PER_LEAF || depth >= MAX_DEPTH) {
250 return createLeaf(refs, start, end);
255 if (!partitionSAH(refs, start, end, bounds, mid, axis)) {
257 return createLeaf(refs, start, end);
260 node->split_axis = axis;
263 node->left = recursiveBuild(refs, start, mid, depth + 1);
264 node->right = recursiveBuild(refs, mid, end, depth + 1);
269 bool BVHBuilder::partitionSAH(std::vector<PrimitiveRef> &refs, uint32_t start, uint32_t end,
const AABB &bounds, uint32_t &mid, uint32_t &axis) {
270 uint32_t prim_count = end - start;
273 float best_cost = std::numeric_limits<float>::infinity();
274 uint32_t best_axis = 0;
275 uint32_t best_split = 0;
277 for (uint32_t test_axis = 0; test_axis < 3; ++test_axis) {
280 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
285 float axis_min = (test_axis == 0) ? bounds.min.x : (test_axis == 1) ? bounds.
min.y : bounds.
min.z;
286 float axis_max = (test_axis == 0) ? bounds.max.x : (test_axis == 1) ? bounds.
max.y : bounds.
max.z;
287 float axis_extent = axis_max - axis_min;
289 if (axis_extent < 1e-6f)
293 for (uint32_t i = start; i < end; ++i) {
295 float centroid_axis = (test_axis == 0) ? centroid.
x : (test_axis == 1) ? centroid.y : centroid.z;
296 int bin_idx =
static_cast<int>(NUM_BINS * (centroid_axis - axis_min) / axis_extent);
297 bin_idx = std::min(bin_idx,
static_cast<int>(NUM_BINS - 1));
299 bins[bin_idx].count++;
300 bins[bin_idx].bounds.expand(refs[i].bounds);
304 AABB left_prefix_bounds[NUM_BINS];
305 AABB right_prefix_bounds[NUM_BINS];
306 uint32_t left_prefix_count[NUM_BINS];
307 uint32_t right_prefix_count[NUM_BINS];
311 AABB running{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
313 for (uint32_t i = 0; i < NUM_BINS - 1; ++i) {
314 running.expand(bins[i].bounds);
315 count += bins[i].count;
316 left_prefix_bounds[i] = running;
317 left_prefix_count[i] = count;
323 AABB running{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
325 for (
int i = NUM_BINS - 1; i >= 1; --i) {
326 running.expand(bins[i].bounds);
327 count += bins[i].count;
328 right_prefix_bounds[i - 1] = running;
329 right_prefix_count[i - 1] = count;
334 float parent_sa = bounds.surfaceArea();
335 for (uint32_t split_idx = 0; split_idx < NUM_BINS - 1; ++split_idx) {
336 if (left_prefix_count[split_idx] == 0 || right_prefix_count[split_idx] == 0)
339 float cost = 1.0f + (left_prefix_bounds[split_idx].surfaceArea() * left_prefix_count[split_idx] + right_prefix_bounds[split_idx].surfaceArea() * right_prefix_count[split_idx]) / parent_sa;
341 if (cost < best_cost) {
343 best_axis = test_axis;
344 best_split = split_idx + 1;
350 float leaf_cost =
static_cast<float>(prim_count);
351 if (best_cost >= leaf_cost) {
357 float axis_min = (axis == 0) ? bounds.min.x : (axis == 1) ? bounds.
min.y : bounds.
min.z;
358 float axis_max = (axis == 0) ? bounds.max.x : (axis == 1) ? bounds.
max.y : bounds.
max.z;
359 float axis_extent = axis_max - axis_min;
360 float split_pos = axis_min + (best_split /
static_cast<float>(NUM_BINS)) * axis_extent;
362 auto partition_pred = [&](
const PrimitiveRef &ref) {
364 float centroid_axis = (axis == 0) ? centroid.
x : (axis == 1) ? centroid.y : centroid.z;
365 return centroid_axis < split_pos;
368 PrimitiveRef *mid_ptr = std::partition(&refs[start], &refs[end], partition_pred);
369 mid =
static_cast<uint32_t
>(mid_ptr - &refs[0]);
372 if (mid == start || mid == end) {
373 mid = (start + end) / 2;
379 BVHBuilder::BuildNode *BVHBuilder::createLeaf(std::vector<PrimitiveRef> &refs, uint32_t start, uint32_t end) {
380 BuildNode *node =
new BuildNode();
381 allocated_nodes.push_back(node);
384 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
385 for (uint32_t i = start; i < end; ++i) {
386 bounds.expand(refs[i].bounds);
388 node->bounds = bounds;
391 node->first_prim_offset =
static_cast<uint32_t
>(primitive_indices.size());
392 node->prim_count = end - start;
393 node->prim_type = refs[start].prim_type;
395 for (uint32_t i = start; i < end; ++i) {
396 primitive_indices.push_back(refs[i].prim_index);
402 uint32_t BVHBuilder::flattenTree(BuildNode *node, std::vector<BVHNode> &nodes) {
408 std::queue<BuildNode *> bfs_queue;
409 std::vector<BuildNode *> bfs_order;
410 bfs_queue.push(node);
411 while (!bfs_queue.empty()) {
412 BuildNode *current = bfs_queue.front();
414 bfs_order.push_back(current);
415 if (current->prim_count == 0) {
417 bfs_queue.push(current->left);
418 bfs_queue.push(current->right);
423 std::unordered_map<BuildNode *, uint32_t> node_to_index;
424 for (uint32_t i = 0; i < bfs_order.size(); ++i) {
425 node_to_index[bfs_order[i]] = i;
429 nodes.resize(bfs_order.size());
430 for (uint32_t i = 0; i < bfs_order.size(); ++i) {
431 BuildNode *current = bfs_order[i];
434 flat_node.aabb_min[0] = current->bounds.min.x;
435 flat_node.aabb_min[1] = current->bounds.min.y;
436 flat_node.aabb_min[2] = current->bounds.min.z;
437 flat_node.aabb_max[0] = current->bounds.max.x;
438 flat_node.aabb_max[1] = current->bounds.max.y;
439 flat_node.aabb_max[2] = current->bounds.max.z;
441 if (current->prim_count > 0) {
443 flat_node.prim_count = current->prim_count;
444 flat_node.prim_type = current->prim_type;
445 flat_node.first_prim = current->first_prim_offset;
446 flat_node.right_child = UINT32_MAX;
449 flat_node.prim_count = 0;
450 flat_node.split_axis = current->split_axis;
451 flat_node.left_child = node_to_index[current->left];
452 flat_node.right_child = node_to_index[current->right];
455 nodes[i] = flat_node;
461 void BVHBuilder::cleanup() {
462 for (BuildNode *node: allocated_nodes) {
465 allocated_nodes.clear();
471 if (bvh2_nodes.empty()) {
476 BuildNode *root = reconstructTree(bvh2_nodes, 0);
479 BVH8Node *root8 = collapseToBVH8(root);
482 std::vector<CWBVH_Node> cwbvh_nodes;
483 flattenBVH8(root8, cwbvh_nodes);
486 for (BVH8Node *node: allocated_bvh8_nodes) {
489 allocated_bvh8_nodes.clear();
494 BVHBuilder::BuildNode *BVHBuilder::reconstructTree(
const std::vector<BVHNode> &bvh2_nodes, uint32_t node_idx) {
495 if (node_idx >= bvh2_nodes.size()) {
499 const BVHNode &flat_node = bvh2_nodes[node_idx];
500 BuildNode *node =
new BuildNode();
501 allocated_nodes.push_back(node);
504 node->bounds.min = helios::make_vec3(flat_node.aabb_min[0], flat_node.aabb_min[1], flat_node.aabb_min[2]);
505 node->bounds.max = helios::make_vec3(flat_node.aabb_max[0], flat_node.aabb_max[1], flat_node.aabb_max[2]);
507 if (flat_node.right_child == UINT32_MAX) {
509 node->prim_count = flat_node.prim_count;
510 node->prim_type = flat_node.prim_type;
511 node->first_prim_offset = flat_node.first_prim;
512 node->left =
nullptr;
513 node->right =
nullptr;
516 node->prim_count = 0;
517 node->split_axis = flat_node.split_axis;
518 node->left = reconstructTree(bvh2_nodes, flat_node.left_child);
519 node->right = reconstructTree(bvh2_nodes, flat_node.right_child);
525 BVHBuilder::BVH8Node *BVHBuilder::collapseToBVH8(BuildNode *bvh2_node,
int depth) {
530 auto countPrimitivesInSubtree = [](BuildNode *node,
auto &self) -> uint32_t {
533 if (node->prim_count > 0)
534 return node->prim_count;
535 return self(node->left, self) + self(node->right, self);
537 BVH8Node *node8 =
new BVH8Node();
538 allocated_bvh8_nodes.push_back(node8);
540 node8->aabb = bvh2_node->bounds;
543 if (bvh2_node->prim_count > 0) {
544 node8->children[0] = bvh2_node;
545 node8->is_leaf[0] =
true;
546 node8->first_prim[0] = bvh2_node->first_prim_offset;
547 node8->prim_count[0] = bvh2_node->prim_count;
548 node8->prim_type[0] = bvh2_node->prim_type;
553 std::vector<BuildNode *> candidates = {bvh2_node};
555 while (candidates.size() < 8) {
558 uint32_t max_prim_count = 0;
560 for (
size_t i = 0; i < candidates.size(); i++) {
561 if (candidates[i]->prim_count > 0)
565 uint32_t subtree_prims = countPrimitivesInSubtree(candidates[i], countPrimitivesInSubtree);
567 if (subtree_prims > max_prim_count) {
568 max_prim_count = subtree_prims;
569 best_idx =
static_cast<int>(i);
577 BuildNode *expand = candidates[best_idx];
578 candidates.erase(candidates.begin() + best_idx);
580 candidates.push_back(expand->left);
582 candidates.push_back(expand->right);
586 assignChildrenToOctants(node8, candidates);
591 void BVHBuilder::assignChildrenToOctants(BVH8Node *node8, std::vector<BuildNode *> &children) {
607 for (
int slot = 0; slot < 8; slot++) {
608 if (children.empty())
612 float best_score = 1e30f;
614 for (
size_t c = 0; c < children.size(); c++) {
615 helios::vec3 child_center = children[c]->bounds.centroid();
619 float score = -(offset.
x * octants[slot].
x + offset.
y * octants[slot].
y + offset.
z * octants[slot].
z);
621 if (score < best_score) {
623 best =
static_cast<int>(c);
628 BuildNode *child = children[best];
629 node8->children[slot] = child;
630 node8->is_leaf[slot] = (child->prim_count > 0);
632 if (node8->is_leaf[slot]) {
633 node8->first_prim[slot] = child->first_prim_offset;
634 node8->prim_count[slot] = child->prim_count;
635 node8->prim_type[slot] = child->prim_type;
638 children.erase(children.begin() + best);
643 void BVHBuilder::compressNode(
const BVH8Node *src, CWBVH_Node &dst) {
645 float extent_x = src->aabb.max.
x - src->aabb.min.x;
646 float extent_y = src->aabb.max.y - src->aabb.min.y;
647 float extent_z = src->aabb.max.z - src->aabb.min.z;
649 dst.p[0] = src->aabb.min.x;
650 dst.p[1] = src->aabb.min.y;
651 dst.p[2] = src->aabb.min.z;
654 int exp_x = (extent_x > 1e-6f) ?
static_cast<int>(std::ceil(std::log2(extent_x / 255.0f))) : 0;
655 int exp_y = (extent_y > 1e-6f) ?
static_cast<int>(std::ceil(std::log2(extent_y / 255.0f))) : 0;
656 int exp_z = (extent_z > 1e-6f) ?
static_cast<int>(std::ceil(std::log2(extent_z / 255.0f))) : 0;
659 uint8_t biased_x =
static_cast<uint8_t
>(std::max(0, std::min(255, 127 + exp_x)));
660 uint8_t biased_y =
static_cast<uint8_t
>(std::max(0, std::min(255, 127 + exp_y)));
661 uint8_t biased_z =
static_cast<uint8_t
>(std::max(0, std::min(255, 127 + exp_z)));
662 dst.e_packed = biased_x | (biased_y << 8) | (biased_z << 16);
665 float scale_x = std::pow(2.0f,
static_cast<float>(exp_x));
666 float scale_y = std::pow(2.0f,
static_cast<float>(exp_y));
667 float scale_z = std::pow(2.0f,
static_cast<float>(exp_z));
670 for (
int child = 0; child < 8; child++) {
671 if (!src->children[child]) {
673 dst.qmin_x[child] = 255;
674 dst.qmin_y[child] = 255;
675 dst.qmin_z[child] = 255;
676 dst.qmax_x[child] = 0;
677 dst.qmax_y[child] = 0;
678 dst.qmax_z[child] = 0;
682 BuildNode *child_node = src->children[child];
685 float min_qx = (child_node->bounds.min.x - dst.p[0]) / scale_x;
686 float max_qx = (child_node->bounds.max.x - dst.p[0]) / scale_x;
687 int qmin_x = std::max(0, std::min(255,
static_cast<int>(std::floor(min_qx - 0.001f))));
688 int qmax_x = std::max(0, std::min(255,
static_cast<int>(std::ceil(max_qx + 0.001f))));
689 dst.qmin_x[child] =
static_cast<uint8_t
>(qmin_x);
690 dst.qmax_x[child] =
static_cast<uint8_t
>(qmax_x);
693 float min_qy = (child_node->bounds.min.y - dst.p[1]) / scale_y;
694 float max_qy = (child_node->bounds.max.y - dst.p[1]) / scale_y;
695 int qmin_y = std::max(0, std::min(255,
static_cast<int>(std::floor(min_qy - 0.001f))));
696 int qmax_y = std::max(0, std::min(255,
static_cast<int>(std::ceil(max_qy + 0.001f))));
697 dst.qmin_y[child] =
static_cast<uint8_t
>(qmin_y);
698 dst.qmax_y[child] =
static_cast<uint8_t
>(qmax_y);
701 float min_qz = (child_node->bounds.min.z - dst.p[2]) / scale_z;
702 float max_qz = (child_node->bounds.max.z - dst.p[2]) / scale_z;
703 int qmin_z = std::max(0, std::min(255,
static_cast<int>(std::floor(min_qz - 0.001f))));
704 int qmax_z = std::max(0, std::min(255,
static_cast<int>(std::ceil(max_qz + 0.001f))));
705 dst.qmin_z[child] =
static_cast<uint8_t
>(qmin_z);
706 dst.qmax_z[child] =
static_cast<uint8_t
>(qmax_z);
709 if (!src->is_leaf[child]) {
710 dst.imask |= (1 << child);
714 dst.child_first_prim[child] = src->first_prim[child];
715 dst.child_prim_count[child] =
static_cast<uint8_t
>(src->prim_count[child]);
716 dst.child_prim_type[child] =
static_cast<uint8_t
>(src->prim_type[child]);
720 uint32_t BVHBuilder::flattenBVH8(BVH8Node *root, std::vector<CWBVH_Node> &cwbvh_nodes) {
727 uint32_t array_index;
729 std::queue<QueueEntry> bfs_queue;
732 CWBVH_Node root_flat = {};
733 compressNode(root, root_flat);
734 cwbvh_nodes.push_back(root_flat);
735 bfs_queue.push({root, 0});
737 while (!bfs_queue.empty()) {
738 auto [current, my_idx] = bfs_queue.front();
742 std::vector<std::pair<int, BVH8Node *>> internal_children;
743 for (
int i = 0; i < 8; i++) {
744 if (current->children[i] && !current->is_leaf[i]) {
745 BVH8Node *child8 = collapseToBVH8(current->children[i]);
746 internal_children.push_back({i, child8});
749 if (internal_children.empty())
753 uint32_t child_base =
static_cast<uint32_t
>(cwbvh_nodes.size());
754 cwbvh_nodes[my_idx].base_index_child = child_base;
756 for (
auto &[slot, child8]: internal_children) {
757 CWBVH_Node child_flat = {};
758 compressNode(child8, child_flat);
759 uint32_t child_idx =
static_cast<uint32_t
>(cwbvh_nodes.size());
760 cwbvh_nodes.push_back(child_flat);
761 bfs_queue.push({child8, child_idx});