1.3.72
 
Loading...
Searching...
No Matches
BVHBuilder.cpp
Go to the documentation of this file.
1
16#include "BVHBuilder.h"
17#include <algorithm>
18#include <limits>
19#include <cmath>
20#include <queue>
21
22namespace helios {
23
24 std::vector<BVHNode> BVHBuilder::build(const RayTracingGeometry &geom) {
25 geometry = &geom;
26
27 if (geom.primitive_count == 0) {
28 return {}; // Empty BVH
29 }
30
31 // Clear state from previous build (prevents accumulation across multiple updateGeometry calls)
32 primitive_indices.clear();
33
34 // Step 1: Pre-compute type-specific offset tables (O(N) instead of O(N²))
35 type_offsets.resize(geom.primitive_count);
36 std::array<uint32_t, 6> type_counters = {0}; // 6 primitive types
37
38 for (size_t i = 0; i < geom.primitive_count; ++i) {
39 uint32_t prim_type = geom.primitive_types[i];
40 type_offsets[i] = type_counters[prim_type]++;
41 }
42
43 // Step 2: Compute AABBs for all primitives
44 primitive_refs.resize(geom.primitive_count);
45
46 for (size_t i = 0; i < geom.primitive_count; ++i) {
47 uint32_t prim_type = geom.primitive_types[i];
48 primitive_refs[i].prim_index = static_cast<uint32_t>(i);
49 primitive_refs[i].prim_type = prim_type;
50 primitive_refs[i].bounds = computePrimitiveAABB(static_cast<uint32_t>(i), prim_type);
51 }
52
53 // Step 3: Build BVH tree (recursive SAH)
54 BuildNode *root = recursiveBuild(primitive_refs, 0, static_cast<uint32_t>(geom.primitive_count), 0);
55
56 if (!root) {
57 return {}; // Failed to build
58 }
59
60 // Step 4: Flatten tree to contiguous array
61 std::vector<BVHNode> nodes;
62 nodes.reserve(allocated_nodes.size() * 2); // Estimate size
63 flattenTree(root, nodes);
64
65 // Cleanup
66 cleanup();
67 primitive_refs.clear();
68 type_offsets.clear();
69
70 return nodes;
71 }
72
73 AABB BVHBuilder::computePrimitiveAABB(uint32_t prim_index, uint32_t prim_type) const {
74 switch (prim_type) {
75 case 0:
76 return computePatchAABB(prim_index);
77 case 1:
78 return computeTriangleAABB(prim_index);
79 case 2:
80 return computeDiskAABB(prim_index);
81 case 3:
82 return computeTileAABB(prim_index);
83 case 4:
84 return computeVoxelAABB(prim_index);
85 case 5:
86 return computeBBoxAABB(prim_index);
87 default:
88 return AABB{{0, 0, 0}, {0, 0, 0}};
89 }
90 }
91
92 helios::vec3 BVHBuilder::transformPoint(const float *transform, const helios::vec3 &point) const {
93 // 4x4 row-major transform matrix (16 floats)
94 // [m0 m1 m2 m3 ] [x]
95 // [m4 m5 m6 m7 ] * [y]
96 // [m8 m9 m10 m11] [z]
97 // [m12 m13 m14 m15] [1]
98
99 helios::vec3 result;
100 result.x = transform[0] * point.x + transform[1] * point.y + transform[2] * point.z + transform[3];
101 result.y = transform[4] * point.x + transform[5] * point.y + transform[6] * point.z + transform[7];
102 result.z = transform[8] * point.x + transform[9] * point.y + transform[10] * point.z + transform[11];
103 return result;
104 }
105
106 AABB BVHBuilder::computePatchAABB(uint32_t prim_index) const {
107 // Canonical patch vertices in local space — transform to world space for AABB
108 static const helios::vec3 canonical_quad[4] = {{-0.5f, -0.5f, 0.f}, {0.5f, -0.5f, 0.f}, {0.5f, 0.5f, 0.f}, {-0.5f, 0.5f, 0.f}};
109
110 const float *transform = &geometry->transform_matrices[prim_index * 16];
111 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
112
113 for (uint32_t v = 0; v < 4; ++v) {
114 helios::vec3 world_vertex = transformPoint(transform, canonical_quad[v]);
115
116 bounds.min.x = std::min(bounds.min.x, world_vertex.x);
117 bounds.min.y = std::min(bounds.min.y, world_vertex.y);
118 bounds.min.z = std::min(bounds.min.z, world_vertex.z);
119 bounds.max.x = std::max(bounds.max.x, world_vertex.x);
120 bounds.max.y = std::max(bounds.max.y, world_vertex.y);
121 bounds.max.z = std::max(bounds.max.z, world_vertex.z);
122 }
123
124 return bounds;
125 }
126
127 AABB BVHBuilder::computeTriangleAABB(uint32_t prim_index) const {
128 // Canonical triangle vertices in local space — transform to world space for AABB
129 static const helios::vec3 canonical_tri[3] = {{0.f, 0.f, 0.f}, {0.f, 1.f, 0.f}, {1.f, 1.f, 0.f}};
130
131 const float *transform = &geometry->transform_matrices[prim_index * 16];
132 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
133
134 for (uint32_t v = 0; v < 3; ++v) {
135 helios::vec3 world_vertex = transformPoint(transform, canonical_tri[v]);
136
137 bounds.min.x = std::min(bounds.min.x, world_vertex.x);
138 bounds.min.y = std::min(bounds.min.y, world_vertex.y);
139 bounds.min.z = std::min(bounds.min.z, world_vertex.z);
140 bounds.max.x = std::max(bounds.max.x, world_vertex.x);
141 bounds.max.y = std::max(bounds.max.y, world_vertex.y);
142 bounds.max.z = std::max(bounds.max.z, world_vertex.z);
143 }
144
145 return bounds;
146 }
147
148 AABB BVHBuilder::computeDiskAABB(uint32_t prim_index) const {
149 // O(1) lookup using pre-computed offset table
150 uint32_t disk_index = type_offsets[prim_index];
151
152 // Disks: center + radius + normal (already in world space from disk_centers)
153 const float *transform = &geometry->transform_matrices[prim_index * 16];
154 helios::vec3 center = geometry->disk_centers[disk_index];
155 float radius = geometry->disk_radii[disk_index];
156
157 // Apply transform to center
158 helios::vec3 world_center = transformPoint(transform, center);
159
160 // Conservative AABB: cube around center with side length = 2*radius
161 AABB bounds;
162 bounds.min = world_center - helios::vec3(radius, radius, radius);
163 bounds.max = world_center + helios::vec3(radius, radius, radius);
164
165 return bounds;
166 }
167
168 AABB BVHBuilder::computeTileAABB(uint32_t prim_index) const {
169 // Canonical tile vertices in local space — transform to world space for AABB
170 static const helios::vec3 canonical_quad[4] = {{-0.5f, -0.5f, 0.f}, {0.5f, -0.5f, 0.f}, {0.5f, 0.5f, 0.f}, {-0.5f, 0.5f, 0.f}};
171
172 const float *transform = &geometry->transform_matrices[prim_index * 16];
173 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
174
175 for (uint32_t v = 0; v < 4; ++v) {
176 helios::vec3 world_vertex = transformPoint(transform, canonical_quad[v]);
177
178 bounds.min.x = std::min(bounds.min.x, world_vertex.x);
179 bounds.min.y = std::min(bounds.min.y, world_vertex.y);
180 bounds.min.z = std::min(bounds.min.z, world_vertex.z);
181 bounds.max.x = std::max(bounds.max.x, world_vertex.x);
182 bounds.max.y = std::max(bounds.max.y, world_vertex.y);
183 bounds.max.z = std::max(bounds.max.z, world_vertex.z);
184 }
185
186 return bounds;
187 }
188
189 AABB BVHBuilder::computeVoxelAABB(uint32_t prim_index) const {
190 // O(1) lookup using pre-computed offset table
191 uint32_t voxel_index = type_offsets[prim_index];
192
193 // Voxels have 8 vertices
194 const float *transform = &geometry->transform_matrices[prim_index * 16];
195 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
196
197 for (uint32_t v = 0; v < 8; ++v) {
198 helios::vec3 vertex = geometry->voxels.vertices[voxel_index * 8 + v];
199 helios::vec3 world_vertex = transformPoint(transform, vertex);
200
201 bounds.min.x = std::min(bounds.min.x, world_vertex.x);
202 bounds.min.y = std::min(bounds.min.y, world_vertex.y);
203 bounds.min.z = std::min(bounds.min.z, world_vertex.z);
204 bounds.max.x = std::max(bounds.max.x, world_vertex.x);
205 bounds.max.y = std::max(bounds.max.y, world_vertex.y);
206 bounds.max.z = std::max(bounds.max.z, world_vertex.z);
207 }
208
209 return bounds;
210 }
211
212 AABB BVHBuilder::computeBBoxAABB(uint32_t prim_index) const {
213 // O(1) lookup using pre-computed offset table
214 uint32_t bbox_index = type_offsets[prim_index];
215
216 // BBoxes have 8 vertices
217 const float *transform = &geometry->transform_matrices[prim_index * 16];
218 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
219
220 for (uint32_t v = 0; v < 8; ++v) {
221 helios::vec3 vertex = geometry->bboxes.vertices[bbox_index * 8 + v];
222 helios::vec3 world_vertex = transformPoint(transform, vertex);
223
224 bounds.min.x = std::min(bounds.min.x, world_vertex.x);
225 bounds.min.y = std::min(bounds.min.y, world_vertex.y);
226 bounds.min.z = std::min(bounds.min.z, world_vertex.z);
227 bounds.max.x = std::max(bounds.max.x, world_vertex.x);
228 bounds.max.y = std::max(bounds.max.y, world_vertex.y);
229 bounds.max.z = std::max(bounds.max.z, world_vertex.z);
230 }
231
232 return bounds;
233 }
234
235 BVHBuilder::BuildNode *BVHBuilder::recursiveBuild(std::vector<PrimitiveRef> &refs, uint32_t start, uint32_t end, uint32_t depth) {
236 BuildNode *node = new BuildNode();
237 allocated_nodes.push_back(node);
238
239 // Compute bounds of all primitives
240 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
241 for (uint32_t i = start; i < end; ++i) {
242 bounds.expand(refs[i].bounds);
243 }
244 node->bounds = bounds;
245
246 uint32_t prim_count = end - start;
247
248 // Leaf criteria: few primitives or max depth reached
249 if (prim_count <= MAX_PRIMS_PER_LEAF || depth >= MAX_DEPTH) {
250 return createLeaf(refs, start, end);
251 }
252
253 // Try to partition using SAH
254 uint32_t mid, axis;
255 if (!partitionSAH(refs, start, end, bounds, mid, axis)) {
256 // SAH failed to find good split - create leaf
257 return createLeaf(refs, start, end);
258 }
259
260 node->split_axis = axis;
261
262 // Recursively build children
263 node->left = recursiveBuild(refs, start, mid, depth + 1);
264 node->right = recursiveBuild(refs, mid, end, depth + 1);
265
266 return node;
267 }
268
269 bool BVHBuilder::partitionSAH(std::vector<PrimitiveRef> &refs, uint32_t start, uint32_t end, const AABB &bounds, uint32_t &mid, uint32_t &axis) {
270 uint32_t prim_count = end - start;
271
272 // Find best split axis and position using binned SAH
273 float best_cost = std::numeric_limits<float>::infinity();
274 uint32_t best_axis = 0;
275 uint32_t best_split = 0;
276
277 for (uint32_t test_axis = 0; test_axis < 3; ++test_axis) {
278 // Bin primitives along this axis
279 struct Bin {
280 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
281 uint32_t count = 0;
282 };
283 Bin bins[NUM_BINS];
284
285 float axis_min = (test_axis == 0) ? bounds.min.x : (test_axis == 1) ? bounds.min.y : bounds.min.z;
286 float axis_max = (test_axis == 0) ? bounds.max.x : (test_axis == 1) ? bounds.max.y : bounds.max.z;
287 float axis_extent = axis_max - axis_min;
288
289 if (axis_extent < 1e-6f)
290 continue; // Degenerate axis
291
292 // Assign primitives to bins
293 for (uint32_t i = start; i < end; ++i) {
294 helios::vec3 centroid = refs[i].bounds.centroid();
295 float centroid_axis = (test_axis == 0) ? centroid.x : (test_axis == 1) ? centroid.y : centroid.z;
296 int bin_idx = static_cast<int>(NUM_BINS * (centroid_axis - axis_min) / axis_extent);
297 bin_idx = std::min(bin_idx, static_cast<int>(NUM_BINS - 1));
298
299 bins[bin_idx].count++;
300 bins[bin_idx].bounds.expand(refs[i].bounds);
301 }
302
303 // Incremental SAH cost computation using prefix sums (O(NUM_BINS) instead of O(NUM_BINS^2))
304 AABB left_prefix_bounds[NUM_BINS];
305 AABB right_prefix_bounds[NUM_BINS];
306 uint32_t left_prefix_count[NUM_BINS];
307 uint32_t right_prefix_count[NUM_BINS];
308
309 // Forward sweep: cumulative left bounds and counts
310 {
311 AABB running{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
312 uint32_t count = 0;
313 for (uint32_t i = 0; i < NUM_BINS - 1; ++i) {
314 running.expand(bins[i].bounds);
315 count += bins[i].count;
316 left_prefix_bounds[i] = running;
317 left_prefix_count[i] = count;
318 }
319 }
320
321 // Backward sweep: cumulative right bounds and counts
322 {
323 AABB running{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
324 uint32_t count = 0;
325 for (int i = NUM_BINS - 1; i >= 1; --i) {
326 running.expand(bins[i].bounds);
327 count += bins[i].count;
328 right_prefix_bounds[i - 1] = running;
329 right_prefix_count[i - 1] = count;
330 }
331 }
332
333 // Evaluate SAH cost at each split position in a single pass
334 float parent_sa = bounds.surfaceArea();
335 for (uint32_t split_idx = 0; split_idx < NUM_BINS - 1; ++split_idx) {
336 if (left_prefix_count[split_idx] == 0 || right_prefix_count[split_idx] == 0)
337 continue;
338
339 float cost = 1.0f + (left_prefix_bounds[split_idx].surfaceArea() * left_prefix_count[split_idx] + right_prefix_bounds[split_idx].surfaceArea() * right_prefix_count[split_idx]) / parent_sa;
340
341 if (cost < best_cost) {
342 best_cost = cost;
343 best_axis = test_axis;
344 best_split = split_idx + 1;
345 }
346 }
347 }
348
349 // Check if SAH split is better than making a leaf
350 float leaf_cost = static_cast<float>(prim_count);
351 if (best_cost >= leaf_cost) {
352 return false; // Leaf is better
353 }
354
355 // Partition primitives based on best split
356 axis = best_axis;
357 float axis_min = (axis == 0) ? bounds.min.x : (axis == 1) ? bounds.min.y : bounds.min.z;
358 float axis_max = (axis == 0) ? bounds.max.x : (axis == 1) ? bounds.max.y : bounds.max.z;
359 float axis_extent = axis_max - axis_min;
360 float split_pos = axis_min + (best_split / static_cast<float>(NUM_BINS)) * axis_extent;
361
362 auto partition_pred = [&](const PrimitiveRef &ref) {
363 helios::vec3 centroid = ref.bounds.centroid();
364 float centroid_axis = (axis == 0) ? centroid.x : (axis == 1) ? centroid.y : centroid.z;
365 return centroid_axis < split_pos;
366 };
367
368 PrimitiveRef *mid_ptr = std::partition(&refs[start], &refs[end], partition_pred);
369 mid = static_cast<uint32_t>(mid_ptr - &refs[0]);
370
371 // Fallback if partition failed
372 if (mid == start || mid == end) {
373 mid = (start + end) / 2;
374 }
375
376 return true;
377 }
378
379 BVHBuilder::BuildNode *BVHBuilder::createLeaf(std::vector<PrimitiveRef> &refs, uint32_t start, uint32_t end) {
380 BuildNode *node = new BuildNode();
381 allocated_nodes.push_back(node);
382
383 // Compute bounds
384 AABB bounds{{1e30f, 1e30f, 1e30f}, {-1e30f, -1e30f, -1e30f}};
385 for (uint32_t i = start; i < end; ++i) {
386 bounds.expand(refs[i].bounds);
387 }
388 node->bounds = bounds;
389
390 // Store primitive indices
391 node->first_prim_offset = static_cast<uint32_t>(primitive_indices.size());
392 node->prim_count = end - start;
393 node->prim_type = refs[start].prim_type; // Assume homogeneous leaf (all same type)
394
395 for (uint32_t i = start; i < end; ++i) {
396 primitive_indices.push_back(refs[i].prim_index);
397 }
398
399 return node;
400 }
401
402 uint32_t BVHBuilder::flattenTree(BuildNode *node, std::vector<BVHNode> &nodes) {
403 // Breadth-first layout: nodes are stored level by level so that upper-level
404 // nodes (visited by all rays) are contiguous at the start of the buffer and
405 // remain cache-resident during traversal.
406
407 // First pass: BFS to assign indices and collect nodes in level order
408 std::queue<BuildNode *> bfs_queue;
409 std::vector<BuildNode *> bfs_order;
410 bfs_queue.push(node);
411 while (!bfs_queue.empty()) {
412 BuildNode *current = bfs_queue.front();
413 bfs_queue.pop();
414 bfs_order.push_back(current);
415 if (current->prim_count == 0) {
416 // Internal node: enqueue children (left then right)
417 bfs_queue.push(current->left);
418 bfs_queue.push(current->right);
419 }
420 }
421
422 // Build a map from BuildNode pointer to its BFS index
423 std::unordered_map<BuildNode *, uint32_t> node_to_index;
424 for (uint32_t i = 0; i < bfs_order.size(); ++i) {
425 node_to_index[bfs_order[i]] = i;
426 }
427
428 // Second pass: write BVHNode entries in BFS order with correct child indices
429 nodes.resize(bfs_order.size());
430 for (uint32_t i = 0; i < bfs_order.size(); ++i) {
431 BuildNode *current = bfs_order[i];
432 BVHNode flat_node{};
433
434 flat_node.aabb_min[0] = current->bounds.min.x;
435 flat_node.aabb_min[1] = current->bounds.min.y;
436 flat_node.aabb_min[2] = current->bounds.min.z;
437 flat_node.aabb_max[0] = current->bounds.max.x;
438 flat_node.aabb_max[1] = current->bounds.max.y;
439 flat_node.aabb_max[2] = current->bounds.max.z;
440
441 if (current->prim_count > 0) {
442 // Leaf node
443 flat_node.prim_count = current->prim_count;
444 flat_node.prim_type = current->prim_type;
445 flat_node.first_prim = current->first_prim_offset;
446 flat_node.right_child = UINT32_MAX; // Mark as leaf
447 } else {
448 // Internal node
449 flat_node.prim_count = 0;
450 flat_node.split_axis = current->split_axis;
451 flat_node.left_child = node_to_index[current->left];
452 flat_node.right_child = node_to_index[current->right];
453 }
454
455 nodes[i] = flat_node;
456 }
457
458 return 0; // Root is always at index 0
459 }
460
461 void BVHBuilder::cleanup() {
462 for (BuildNode *node: allocated_nodes) {
463 delete node;
464 }
465 allocated_nodes.clear();
466 }
467
468 // ========== CWBVH Conversion Implementation ==========
469
470 std::vector<CWBVH_Node> BVHBuilder::convertToCWBVH(const std::vector<BVHNode> &bvh2_nodes) {
471 if (bvh2_nodes.empty()) {
472 return {};
473 }
474
475 // Step 1: Reconstruct tree from flat BVH2
476 BuildNode *root = reconstructTree(bvh2_nodes, 0);
477
478 // Step 2: Collapse to BVH8
479 BVH8Node *root8 = collapseToBVH8(root);
480
481 // Step 3: Flatten with compression
482 std::vector<CWBVH_Node> cwbvh_nodes;
483 flattenBVH8(root8, cwbvh_nodes);
484
485 // Cleanup
486 for (BVH8Node *node: allocated_bvh8_nodes) {
487 delete node;
488 }
489 allocated_bvh8_nodes.clear();
490
491 return cwbvh_nodes;
492 }
493
494 BVHBuilder::BuildNode *BVHBuilder::reconstructTree(const std::vector<BVHNode> &bvh2_nodes, uint32_t node_idx) {
495 if (node_idx >= bvh2_nodes.size()) {
496 return nullptr;
497 }
498
499 const BVHNode &flat_node = bvh2_nodes[node_idx];
500 BuildNode *node = new BuildNode();
501 allocated_nodes.push_back(node);
502
503 // Copy AABB
504 node->bounds.min = helios::make_vec3(flat_node.aabb_min[0], flat_node.aabb_min[1], flat_node.aabb_min[2]);
505 node->bounds.max = helios::make_vec3(flat_node.aabb_max[0], flat_node.aabb_max[1], flat_node.aabb_max[2]);
506
507 if (flat_node.right_child == UINT32_MAX) {
508 // Leaf node
509 node->prim_count = flat_node.prim_count;
510 node->prim_type = flat_node.prim_type;
511 node->first_prim_offset = flat_node.first_prim;
512 node->left = nullptr;
513 node->right = nullptr;
514 } else {
515 // Internal node
516 node->prim_count = 0;
517 node->split_axis = flat_node.split_axis;
518 node->left = reconstructTree(bvh2_nodes, flat_node.left_child);
519 node->right = reconstructTree(bvh2_nodes, flat_node.right_child);
520 }
521
522 return node;
523 }
524
525 BVHBuilder::BVH8Node *BVHBuilder::collapseToBVH8(BuildNode *bvh2_node, int depth) {
526 if (!bvh2_node)
527 return nullptr;
528
529 // Helper lambda: Count total primitives in a subtree
530 auto countPrimitivesInSubtree = [](BuildNode *node, auto &self) -> uint32_t {
531 if (!node)
532 return 0;
533 if (node->prim_count > 0)
534 return node->prim_count;
535 return self(node->left, self) + self(node->right, self);
536 };
537 BVH8Node *node8 = new BVH8Node();
538 allocated_bvh8_nodes.push_back(node8);
539
540 node8->aabb = bvh2_node->bounds;
541
542 // If already a leaf, create a BVH8 leaf with one child
543 if (bvh2_node->prim_count > 0) {
544 node8->children[0] = bvh2_node;
545 node8->is_leaf[0] = true;
546 node8->first_prim[0] = bvh2_node->first_prim_offset;
547 node8->prim_count[0] = bvh2_node->prim_count;
548 node8->prim_type[0] = bvh2_node->prim_type;
549 return node8;
550 }
551
552 // Greedy collapse: collect up to 8 BuildNode children from BVH2 subtree
553 std::vector<BuildNode *> candidates = {bvh2_node};
554
555 while (candidates.size() < 8) {
556 // Find node with most primitives to expand (better load balancing than surface area)
557 int best_idx = -1;
558 uint32_t max_prim_count = 0;
559
560 for (size_t i = 0; i < candidates.size(); i++) {
561 if (candidates[i]->prim_count > 0)
562 continue; // Skip leaves
563
564 // Count total primitives in this subtree (recursive count)
565 uint32_t subtree_prims = countPrimitivesInSubtree(candidates[i], countPrimitivesInSubtree);
566
567 if (subtree_prims > max_prim_count) {
568 max_prim_count = subtree_prims;
569 best_idx = static_cast<int>(i);
570 }
571 }
572
573 if (best_idx == -1)
574 break; // All leaves
575
576 // Expand the node with most primitives
577 BuildNode *expand = candidates[best_idx];
578 candidates.erase(candidates.begin() + best_idx);
579 if (expand->left)
580 candidates.push_back(expand->left);
581 if (expand->right)
582 candidates.push_back(expand->right);
583 }
584
585 // Assign BuildNode children to octants
586 assignChildrenToOctants(node8, candidates);
587
588 return node8;
589 }
590
591 void BVHBuilder::assignChildrenToOctants(BVH8Node *node8, std::vector<BuildNode *> &children) {
592 helios::vec3 center = node8->aabb.centroid();
593
594 // 8 octant directions (Morton order) - bit 1 = positive direction
595 static const helios::vec3 octants[8] = {
596 {1, 1, 1}, // 000
597 {-1, 1, 1}, // 001 (x negative)
598 {1, -1, 1}, // 010 (y negative)
599 {-1, -1, 1}, // 011 (x,y negative)
600 {1, 1, -1}, // 100 (z negative)
601 {-1, 1, -1}, // 101 (x,z negative)
602 {1, -1, -1}, // 110 (y,z negative)
603 {-1, -1, -1} // 111 (all negative)
604 };
605
606 // Greedy assignment: minimize distance to octant direction
607 for (int slot = 0; slot < 8; slot++) {
608 if (children.empty())
609 break;
610
611 int best = -1;
612 float best_score = 1e30f;
613
614 for (size_t c = 0; c < children.size(); c++) {
615 helios::vec3 child_center = children[c]->bounds.centroid();
616 helios::vec3 offset = child_center - center;
617
618 // Negative dot product = child is in this octant's direction
619 float score = -(offset.x * octants[slot].x + offset.y * octants[slot].y + offset.z * octants[slot].z);
620
621 if (score < best_score) {
622 best_score = score;
623 best = static_cast<int>(c);
624 }
625 }
626
627 if (best >= 0) {
628 BuildNode *child = children[best];
629 node8->children[slot] = child;
630 node8->is_leaf[slot] = (child->prim_count > 0);
631
632 if (node8->is_leaf[slot]) {
633 node8->first_prim[slot] = child->first_prim_offset;
634 node8->prim_count[slot] = child->prim_count;
635 node8->prim_type[slot] = child->prim_type;
636 }
637
638 children.erase(children.begin() + best);
639 }
640 }
641 }
642
643 void BVHBuilder::compressNode(const BVH8Node *src, CWBVH_Node &dst) {
644 // Compute quantization parameters for each axis
645 float extent_x = src->aabb.max.x - src->aabb.min.x;
646 float extent_y = src->aabb.max.y - src->aabb.min.y;
647 float extent_z = src->aabb.max.z - src->aabb.min.z;
648
649 dst.p[0] = src->aabb.min.x;
650 dst.p[1] = src->aabb.min.y;
651 dst.p[2] = src->aabb.min.z;
652
653 // Compute exponents: ceil(log2(extent / 255))
654 int exp_x = (extent_x > 1e-6f) ? static_cast<int>(std::ceil(std::log2(extent_x / 255.0f))) : 0;
655 int exp_y = (extent_y > 1e-6f) ? static_cast<int>(std::ceil(std::log2(extent_y / 255.0f))) : 0;
656 int exp_z = (extent_z > 1e-6f) ? static_cast<int>(std::ceil(std::log2(extent_z / 255.0f))) : 0;
657
658 // Store IEEE754-biased exponents so the GPU can decode via uintBitsToFloat(byte << 23)
659 uint8_t biased_x = static_cast<uint8_t>(std::max(0, std::min(255, 127 + exp_x)));
660 uint8_t biased_y = static_cast<uint8_t>(std::max(0, std::min(255, 127 + exp_y)));
661 uint8_t biased_z = static_cast<uint8_t>(std::max(0, std::min(255, 127 + exp_z)));
662 dst.e_packed = biased_x | (biased_y << 8) | (biased_z << 16);
663 dst.imask = 0;
664
665 float scale_x = std::pow(2.0f, static_cast<float>(exp_x));
666 float scale_y = std::pow(2.0f, static_cast<float>(exp_y));
667 float scale_z = std::pow(2.0f, static_cast<float>(exp_z));
668
669 // Quantize each child's AABB
670 for (int child = 0; child < 8; child++) {
671 if (!src->children[child]) {
672 // Empty slot - set inverted bounds so AABB test fails
673 dst.qmin_x[child] = 255;
674 dst.qmin_y[child] = 255;
675 dst.qmin_z[child] = 255;
676 dst.qmax_x[child] = 0;
677 dst.qmax_y[child] = 0;
678 dst.qmax_z[child] = 0;
679 continue;
680 }
681
682 BuildNode *child_node = src->children[child];
683
684 // Quantize X axis
685 float min_qx = (child_node->bounds.min.x - dst.p[0]) / scale_x;
686 float max_qx = (child_node->bounds.max.x - dst.p[0]) / scale_x;
687 int qmin_x = std::max(0, std::min(255, static_cast<int>(std::floor(min_qx - 0.001f))));
688 int qmax_x = std::max(0, std::min(255, static_cast<int>(std::ceil(max_qx + 0.001f))));
689 dst.qmin_x[child] = static_cast<uint8_t>(qmin_x);
690 dst.qmax_x[child] = static_cast<uint8_t>(qmax_x);
691
692 // Quantize Y axis
693 float min_qy = (child_node->bounds.min.y - dst.p[1]) / scale_y;
694 float max_qy = (child_node->bounds.max.y - dst.p[1]) / scale_y;
695 int qmin_y = std::max(0, std::min(255, static_cast<int>(std::floor(min_qy - 0.001f))));
696 int qmax_y = std::max(0, std::min(255, static_cast<int>(std::ceil(max_qy + 0.001f))));
697 dst.qmin_y[child] = static_cast<uint8_t>(qmin_y);
698 dst.qmax_y[child] = static_cast<uint8_t>(qmax_y);
699
700 // Quantize Z axis
701 float min_qz = (child_node->bounds.min.z - dst.p[2]) / scale_z;
702 float max_qz = (child_node->bounds.max.z - dst.p[2]) / scale_z;
703 int qmin_z = std::max(0, std::min(255, static_cast<int>(std::floor(min_qz - 0.001f))));
704 int qmax_z = std::max(0, std::min(255, static_cast<int>(std::ceil(max_qz + 0.001f))));
705 dst.qmin_z[child] = static_cast<uint8_t>(qmin_z);
706 dst.qmax_z[child] = static_cast<uint8_t>(qmax_z);
707
708 // Set imask bit if child is internal (not a leaf)
709 if (!src->is_leaf[child]) {
710 dst.imask |= (1 << child);
711 }
712
713 // Store per-child leaf data in extended fields
714 dst.child_first_prim[child] = src->first_prim[child];
715 dst.child_prim_count[child] = static_cast<uint8_t>(src->prim_count[child]);
716 dst.child_prim_type[child] = static_cast<uint8_t>(src->prim_type[child]);
717 }
718 }
719
720 uint32_t BVHBuilder::flattenBVH8(BVH8Node *root, std::vector<CWBVH_Node> &cwbvh_nodes) {
721 // BFS flattening ensures internal children of each parent occupy consecutive
722 // array slots, which is required for the GPU popcount-based child indexing:
723 // child_index = base_index_child + bitCount(imask & ((1 << slot) - 1))
724
725 struct QueueEntry {
726 BVH8Node *node;
727 uint32_t array_index;
728 };
729 std::queue<QueueEntry> bfs_queue;
730
731 // Place root
732 CWBVH_Node root_flat = {};
733 compressNode(root, root_flat);
734 cwbvh_nodes.push_back(root_flat);
735 bfs_queue.push({root, 0});
736
737 while (!bfs_queue.empty()) {
738 auto [current, my_idx] = bfs_queue.front();
739 bfs_queue.pop();
740
741 // Collect internal children (need recursive collapse to BVH8)
742 std::vector<std::pair<int, BVH8Node *>> internal_children;
743 for (int i = 0; i < 8; i++) {
744 if (current->children[i] && !current->is_leaf[i]) {
745 BVH8Node *child8 = collapseToBVH8(current->children[i]);
746 internal_children.push_back({i, child8});
747 }
748 }
749 if (internal_children.empty())
750 continue;
751
752 // Children placed contiguously starting at current array end
753 uint32_t child_base = static_cast<uint32_t>(cwbvh_nodes.size());
754 cwbvh_nodes[my_idx].base_index_child = child_base;
755
756 for (auto &[slot, child8]: internal_children) {
757 CWBVH_Node child_flat = {};
758 compressNode(child8, child_flat);
759 uint32_t child_idx = static_cast<uint32_t>(cwbvh_nodes.size());
760 cwbvh_nodes.push_back(child_flat);
761 bfs_queue.push({child8, child_idx});
762 }
763 }
764
765 return 0; // Root is always at index 0
766 }
767
768} // namespace helios