1.3.49
 
Loading...
Searching...
No Matches
CollisionDetection.cpp
Go to the documentation of this file.
1
16#include "CollisionDetection.h"
17#include <atomic>
18#include <functional>
19#include <limits>
20#include <queue>
21
22#ifdef _OPENMP
23#include <omp.h>
24#endif
25
26// SIMD headers
27#ifdef __AVX2__
28#include <immintrin.h>
29#elif defined(__SSE4_1__)
30#include <smmintrin.h>
31#elif defined(__SSE2__)
32#include <emmintrin.h>
33#endif
34
35#ifdef HELIOS_CUDA_AVAILABLE
36#include <cuda_runtime.h>
37#endif
38
39using namespace helios;
40
41#ifdef HELIOS_CUDA_AVAILABLE
42// GPU BVH node structure (must match the one in .cu file)
43struct GPUBVHNode {
44 float3 aabb_min, aabb_max;
45 unsigned int left_child, right_child;
46 unsigned int primitive_start, primitive_count;
47 unsigned int is_leaf, padding;
48};
49
50// External CUDA functions
51extern "C" {
52void launchBVHTraversal(void *h_nodes, int node_count, unsigned int *h_primitive_indices, int primitive_count, float *h_primitive_aabb_min, float *h_primitive_aabb_max, float *h_query_aabb_min, float *h_query_aabb_max, int num_queries,
53 unsigned int *h_results, unsigned int *h_result_counts, int max_results_per_query);
54void launchVoxelRayPathLengths(int num_rays, float *h_ray_origins, float *h_ray_directions, float grid_center_x, float grid_center_y, float grid_center_z, float grid_size_x, float grid_size_y, float grid_size_z, int grid_divisions_x,
55 int grid_divisions_y, int grid_divisions_z, int primitive_count, int *h_voxel_ray_counts, float *h_voxel_path_lengths, int *h_voxel_transmitted, int *h_voxel_hit_before, int *h_voxel_hit_after, int *h_voxel_hit_inside);
56// Warp-efficient GPU kernels
57void launchWarpEfficientBVH(void *h_bvh_soa_gpu, unsigned int *h_primitive_indices, int primitive_count, float *h_primitive_aabb_min, float *h_primitive_aabb_max, float *h_ray_origins, float *h_ray_directions, float *h_ray_max_distances,
58 int num_rays, unsigned int *h_results, unsigned int *h_result_counts, int max_results_per_ray);
59}
60
61// Helper function to convert helios::vec3 to float3
62inline float3 heliosVecToFloat3(const helios::vec3 &v) {
63 return make_float3(v.x, v.y, v.z);
64}
65#endif
66
68
69 if (a_context == nullptr) {
70 helios_runtime_error("ERROR (CollisionDetection::CollisionDetection): Context is null");
71 }
72
73 context = a_context;
74 printmessages = true;
75
76#ifdef HELIOS_CUDA_AVAILABLE
77 gpu_acceleration_enabled = true;
78#else
79 gpu_acceleration_enabled = false;
80#endif
81
82 // Initialize GPU memory pointers
83 d_bvh_nodes = nullptr;
84 d_primitive_indices = nullptr;
85 gpu_memory_allocated = false;
86
87 // Initialize BVH caching variables
88 bvh_dirty = true;
89 soa_dirty = true; // SoA needs initial build
90 automatic_bvh_rebuilds = true; // Default: allow automatic rebuilds
91
92 // Initialize hierarchical BVH variables
93 hierarchical_bvh_enabled = false;
94 static_bvh_valid = false;
95
96 // Initialize tree-based BVH variables
97 tree_based_bvh_enabled = false;
98 tree_isolation_distance = 5.0f; // Default 5 meter isolation distance
99 obstacle_spatial_grid_initialized = false;
100
101 // Initialize grid parameters
102 grid_center = make_vec3(0, 0, 0);
103 grid_size = make_vec3(1, 1, 1);
104 grid_divisions = helios::make_int3(1, 1, 1);
105
106 // Initialize voxel data parameters
107 voxel_data_initialized = false;
108 voxel_grid_center = make_vec3(0, 0, 0);
109 voxel_grid_size = make_vec3(1, 1, 1);
110 voxel_grid_divisions = helios::make_int3(1, 1, 1);
111 use_flat_arrays = false; // Default to false, enabled during initialization
112
113 // Initialize spatial optimization parameters
114 max_collision_distance = 10.0f; // Default 10 meter maximum distance
115}
116
118#ifdef HELIOS_CUDA_AVAILABLE
119 freeGPUMemory();
120#endif
121}
122
123std::vector<uint> CollisionDetection::findCollisions(uint UUID, bool allow_spatial_culling) {
124 return findCollisions(std::vector<uint>{UUID}, allow_spatial_culling);
125}
126
127std::vector<uint> CollisionDetection::findCollisions(const std::vector<uint> &UUIDs, bool allow_spatial_culling) {
128
129 if (UUIDs.empty()) {
130 if (printmessages) {
131 std::cerr << "WARNING (CollisionDetection::findCollisions): No UUIDs provided" << std::endl;
132 }
133 return {};
134 }
135
136 // Validate UUIDs - throw exception if any are invalid
137 std::vector<uint> valid_UUIDs;
138 for (uint uuid: UUIDs) {
139 if (context->doesPrimitiveExist(uuid)) {
140 valid_UUIDs.push_back(uuid);
141 } else {
142 if (printmessages) {
143 std::cerr << "ERROR (CollisionDetection::findCollisions): Invalid UUID " << uuid << std::endl;
144 }
145 helios_runtime_error("ERROR (CollisionDetection::findCollisions): Invalid UUID " + std::to_string(uuid) + " provided");
146 }
147 }
148
149 // Automatically rebuild BVH if geometry has changed or BVH is empty
150 ensureBVHCurrent();
151
152 std::vector<uint> all_collisions;
153
154 for (uint UUID: valid_UUIDs) {
155
156 // Get bounding box for query primitive
157 if (!context->doesPrimitiveExist(UUID)) {
158 continue; // Skip invalid primitive
159 }
160 vec3 aabb_min, aabb_max;
161 context->getPrimitiveBoundingBox(UUID, aabb_min, aabb_max);
162
163 std::vector<uint> collisions;
164
165#ifdef HELIOS_CUDA_AVAILABLE
166 if (gpu_acceleration_enabled && gpu_memory_allocated) {
167 collisions = traverseBVH_GPU(aabb_min, aabb_max);
168 } else {
169 collisions = traverseBVH_CPU(aabb_min, aabb_max);
170 }
171#else
172 collisions = traverseBVH_CPU(aabb_min, aabb_max);
173#endif
174
175 // Remove the query UUID from results
176 collisions.erase(std::remove(collisions.begin(), collisions.end(), UUID), collisions.end());
177
178 // Add to overall results
179 all_collisions.insert(all_collisions.end(), collisions.begin(), collisions.end());
180 }
181
182 // Remove duplicates
183 std::sort(all_collisions.begin(), all_collisions.end());
184 all_collisions.erase(std::unique(all_collisions.begin(), all_collisions.end()), all_collisions.end());
185
186 return all_collisions;
187}
188
189std::vector<uint> CollisionDetection::findCollisions(const std::vector<uint> &primitive_UUIDs, const std::vector<uint> &object_IDs, bool allow_spatial_culling) {
190
191 if (primitive_UUIDs.empty() && object_IDs.empty()) {
192 if (printmessages) {
193 std::cerr << "WARNING (CollisionDetection::findCollisions): No UUIDs or object IDs provided" << std::endl;
194 }
195 return {};
196 }
197
198 // Expand object IDs to their constituent primitive UUIDs
199 std::vector<uint> all_test_UUIDs = primitive_UUIDs;
200
201 for (uint ObjID: object_IDs) {
202 if (!context->doesObjectExist(ObjID)) {
203 helios_runtime_error("ERROR (CollisionDetection::findCollisions): Object ID " + std::to_string(ObjID) + " does not exist");
204 }
205
206 std::vector<uint> object_UUIDs = context->getObjectPrimitiveUUIDs(ObjID);
207 all_test_UUIDs.insert(all_test_UUIDs.end(), object_UUIDs.begin(), object_UUIDs.end());
208 }
209
210 return findCollisions(all_test_UUIDs, allow_spatial_culling);
211}
212
213std::vector<uint> CollisionDetection::findCollisions(const std::vector<uint> &query_UUIDs, const std::vector<uint> &query_object_IDs, const std::vector<uint> &target_UUIDs, const std::vector<uint> &target_object_IDs, bool allow_spatial_culling) {
214
215 if (query_UUIDs.empty() && query_object_IDs.empty()) {
216 if (printmessages) {
217 std::cerr << "WARNING (CollisionDetection::findCollisions): No query UUIDs or object IDs provided" << std::endl;
218 }
219 return {};
220 }
221
222 // Expand query objects to their constituent primitive UUIDs
223 std::vector<uint> all_query_UUIDs = query_UUIDs;
224
225 for (uint ObjID: query_object_IDs) {
226 if (!context->doesObjectExist(ObjID)) {
227 helios_runtime_error("ERROR (CollisionDetection::findCollisions): Query object ID " + std::to_string(ObjID) + " does not exist");
228 }
229
230 std::vector<uint> object_UUIDs = context->getObjectPrimitiveUUIDs(ObjID);
231 all_query_UUIDs.insert(all_query_UUIDs.end(), object_UUIDs.begin(), object_UUIDs.end());
232 }
233
234 // Validate query UUIDs
235 if (!validateUUIDs(all_query_UUIDs)) {
236 helios_runtime_error("ERROR (CollisionDetection::findCollisions): One or more invalid query UUIDs provided");
237 }
238
239 // Build restricted BVH if target geometry is specified
240
241 if (target_UUIDs.empty() && target_object_IDs.empty()) {
242 // Use all geometry in context as target
243 ensureBVHCurrent();
244 } else {
245 // Build restricted BVH with only target geometry
246 std::vector<uint> all_target_UUIDs = target_UUIDs;
247
248 for (uint ObjID: target_object_IDs) {
249 if (!context->doesObjectExist(ObjID)) {
250 helios_runtime_error("ERROR (CollisionDetection::findCollisions): Target object ID " + std::to_string(ObjID) + " does not exist");
251 }
252
253 std::vector<uint> object_UUIDs = context->getObjectPrimitiveUUIDs(ObjID);
254 all_target_UUIDs.insert(all_target_UUIDs.end(), object_UUIDs.begin(), object_UUIDs.end());
255 }
256
257 // OPTIMIZATION: Use per-tree BVH if enabled for better scaling
258 if (tree_based_bvh_enabled && allow_spatial_culling && !all_query_UUIDs.empty()) {
259 // Get tree-relevant geometry instead of using all targets
260 helios::vec3 query_center = helios::vec3(0, 0, 0);
261 if (!all_query_UUIDs.empty()) {
262 // Calculate approximate query center from first query primitive
263 helios::vec3 min_corner, max_corner;
264 context->getPrimitiveBoundingBox(all_query_UUIDs[0], min_corner, max_corner);
265 query_center = (min_corner + max_corner) * 0.5f;
266 }
267
268 // Use the configured tree isolation distance (collision cone height)
269 std::vector<uint> effective_targets = getRelevantGeometryForTree(query_center, all_query_UUIDs, tree_isolation_distance);
270
271 all_target_UUIDs = effective_targets;
272 }
273
274 // Validate target UUIDs
275 if (!all_target_UUIDs.empty() && !validateUUIDs(all_target_UUIDs)) {
276 helios_runtime_error("ERROR (CollisionDetection::findCollisions): One or more invalid target UUIDs provided");
277 }
278
279 // Build BVH with only the target geometry (with caching)
280 if (!all_target_UUIDs.empty()) {
281 updateBVH(all_target_UUIDs, false); // Use caching logic instead of direct rebuild
282 }
283 }
284
285 // Perform collision detection using the same logic as the standard findCollisions
286 std::vector<uint> all_collisions;
287
288 for (uint UUID: all_query_UUIDs) {
289
290 // Get bounding box for query primitive
291 if (!context->doesPrimitiveExist(UUID)) {
292 continue; // Skip invalid primitive
293 }
294 vec3 aabb_min, aabb_max;
295 context->getPrimitiveBoundingBox(UUID, aabb_min, aabb_max);
296
297 std::vector<uint> collisions;
298
299#ifdef HELIOS_CUDA_AVAILABLE
300 if (gpu_acceleration_enabled && gpu_memory_allocated) {
301 collisions = traverseBVH_GPU(aabb_min, aabb_max);
302 } else {
303 collisions = traverseBVH_CPU(aabb_min, aabb_max);
304 }
305#else
306 collisions = traverseBVH_CPU(aabb_min, aabb_max);
307#endif
308
309 // Remove the query UUID from results
310 collisions.erase(std::remove(collisions.begin(), collisions.end(), UUID), collisions.end());
311
312 // Add to overall results
313 all_collisions.insert(all_collisions.end(), collisions.begin(), collisions.end());
314 }
315
316 // Remove duplicates
317 std::sort(all_collisions.begin(), all_collisions.end());
318 all_collisions.erase(std::unique(all_collisions.begin(), all_collisions.end()), all_collisions.end());
319
320 return all_collisions;
321}
322
323void CollisionDetection::buildBVH(const std::vector<uint> &UUIDs) {
324
325 std::vector<uint> primitives_to_include;
326
327 if (UUIDs.empty()) {
328 // Include all primitives in context
329 primitives_to_include = context->getAllUUIDs();
330 } else {
331 primitives_to_include = UUIDs;
332 }
333
334
335 if (primitives_to_include.empty()) {
336 if (printmessages) {
337 std::cerr << "WARNING (CollisionDetection::buildBVH): No primitives found to build BVH" << std::endl;
338 }
339 return;
340 }
341
342 // Validate UUIDs - throw exception if any are invalid (only when specific UUIDs are provided)
343 std::vector<uint> valid_primitives;
344 if (!UUIDs.empty()) {
345 // When specific UUIDs are provided, they must all be valid
346 for (uint uuid: primitives_to_include) {
347 if (context->doesPrimitiveExist(uuid)) {
348 valid_primitives.push_back(uuid);
349 } else {
350 if (printmessages) {
351 std::cerr << "ERROR (CollisionDetection::buildBVH): Invalid UUID " << uuid << std::endl;
352 }
353 helios_runtime_error("ERROR (CollisionDetection::buildBVH): Invalid UUID " + std::to_string(uuid) + " provided");
354 }
355 }
356 } else {
357 // When no specific UUIDs provided (use all), filter out invalid ones
358 for (uint uuid: primitives_to_include) {
359 if (context->doesPrimitiveExist(uuid)) {
360 valid_primitives.push_back(uuid);
361 } else if (printmessages) {
362 std::cerr << "WARNING (CollisionDetection::buildBVH): Skipping invalid UUID " << uuid << std::endl;
363 }
364 }
365
366 if (valid_primitives.empty()) {
367 if (printmessages) {
368 std::cerr << "WARNING (CollisionDetection::buildBVH): No valid primitives found after filtering" << std::endl;
369 }
370 return;
371 }
372 }
373
374 primitives_to_include = valid_primitives;
375
376 // Check if the primitive set has actually changed before clearing cache
377 std::set<uint> new_primitive_set(primitives_to_include.begin(), primitives_to_include.end());
378 std::set<uint> old_primitive_set(primitive_indices.begin(), primitive_indices.end());
379
380 bool primitive_set_changed = (new_primitive_set != old_primitive_set);
381
382 if (primitive_set_changed) {
383 // Clear primitive cache only when primitive set changes - CRITICAL for performance
384 primitive_cache.clear();
385 }
386
387 // Clear existing BVH
388 bvh_nodes.clear();
389 primitive_indices.clear();
390
391 // Copy primitives to indices array
392 primitive_indices = primitives_to_include;
393
394 // Pre-allocate BVH nodes to avoid excessive resizing
395 // For N primitives, we need at most 2*N-1 nodes for a complete binary tree
396 size_t max_nodes = std::max(size_t(1), 2 * primitives_to_include.size());
397 bvh_nodes.clear();
398 bvh_nodes.resize(max_nodes); // Pre-allocate ALL nodes at once to avoid any resizing
399 next_available_node_index = 1; // Track next available node (0 is root)
400
401 // OPTIMIZATION: Pre-cache bounding boxes with dirty flagging to avoid repeated expensive calculations
402 // Only clear cache for primitives that no longer exist or are dirty
403 std::unordered_set<uint> current_primitives(primitives_to_include.begin(), primitives_to_include.end());
404
405 // Remove cached entries for primitives that no longer exist
406 auto cache_it = primitive_aabbs_cache.begin();
407 while (cache_it != primitive_aabbs_cache.end()) {
408 if (current_primitives.find(cache_it->first) == current_primitives.end()) {
409 cache_it = primitive_aabbs_cache.erase(cache_it);
410 } else {
411 ++cache_it;
412 }
413 }
414
415 // Update only dirty or missing cache entries
416 for (uint UUID: primitives_to_include) {
417 if (!context->doesPrimitiveExist(UUID)) {
418 continue; // Skip invalid primitive
419 }
420
421 // Only update if not cached or marked as dirty
422 bool needs_update = (primitive_aabbs_cache.find(UUID) == primitive_aabbs_cache.end()) || (dirty_primitive_cache.find(UUID) != dirty_primitive_cache.end());
423
424 if (needs_update) {
425 vec3 aabb_min, aabb_max;
426 context->getPrimitiveBoundingBox(UUID, aabb_min, aabb_max);
427 primitive_aabbs_cache[UUID] = {aabb_min, aabb_max};
428 dirty_primitive_cache.erase(UUID); // Mark as clean
429 }
430 }
431
432 // Build BVH recursively starting from root
433 buildBVHRecursive(0, 0, primitive_indices.size(), 0);
434
435 // Resize to actual nodes used (much more efficient than shrink_to_fit)
436 bvh_nodes.resize(next_available_node_index);
437
438 // Clear any stale optimized structures since we just rebuilt the BVH
439 // They will be rebuilt on demand by ensureOptimizedBVH() when needed
440 bvh_nodes_soa.clear();
441 bvh_nodes_soa.node_count = 0;
442
443 // Transfer to GPU if acceleration is enabled
444#ifdef HELIOS_CUDA_AVAILABLE
445 if (gpu_acceleration_enabled) {
446 transferBVHToGPU();
447 }
448#endif
449
450 // Update internal tracking - record what we have processed
451 // This allows us to avoid redundant rebuilds until user marks geometry clean
452 std::vector<uint> context_deleted_uuids = context->getDeletedUUIDs();
453
454 // Track ALL primitives that are now in the BVH, not just dirty ones
455 // This ensures isBVHValid() works correctly when new geometry is added
456 last_processed_uuids.clear();
457 last_processed_uuids.insert(primitives_to_include.begin(), primitives_to_include.end());
458
459 last_processed_deleted_uuids.clear();
460 last_processed_deleted_uuids.insert(context_deleted_uuids.begin(), context_deleted_uuids.end());
461
462 // Update BVH geometry tracking for caching
463 last_bvh_geometry.clear();
464 last_bvh_geometry.insert(primitives_to_include.begin(), primitives_to_include.end());
465 bvh_dirty = false;
466 soa_dirty = true; // SoA needs rebuild after BVH change
467}
468
470 markBVHDirty();
471 buildBVH();
472}
473
475 automatic_bvh_rebuilds = false;
476}
477
479 automatic_bvh_rebuilds = true;
480}
481
483 hierarchical_bvh_enabled = true;
484 static_bvh_valid = false; // Force rebuild of static BVH
485}
486
488 hierarchical_bvh_enabled = false;
489 // Clear static BVH data
490 static_bvh_nodes.clear();
491 static_bvh_primitives.clear();
492 static_bvh_valid = false;
493 last_static_bvh_geometry.clear();
494}
495
496void CollisionDetection::updateHierarchicalBVH(const std::set<uint> &requested_geometry, bool force_rebuild) {
497
498 // Step 1: Build/update static BVH if needed
499 if (!static_bvh_valid || force_rebuild || static_geometry_cache != last_static_bvh_geometry) {
501 }
502
503 // Step 2: Separate dynamic geometry (not in static cache)
504 std::vector<uint> dynamic_geometry;
505 for (uint uuid: requested_geometry) {
506 if (static_geometry_cache.find(uuid) == static_geometry_cache.end()) {
507 dynamic_geometry.push_back(uuid);
508 }
509 }
510
511
512 // Step 3: Build dynamic BVH with remaining geometry (this is smaller and faster)
513 if (!dynamic_geometry.empty()) {
514 buildBVH(dynamic_geometry); // Use existing buildBVH for dynamic part
515 } else {
516 // No dynamic geometry - just clear the dynamic BVH
517 bvh_nodes.clear();
518 primitive_indices.clear();
519 }
520
521 // Update cache
522 last_bvh_geometry = requested_geometry;
523 bvh_dirty = false;
524 soa_dirty = true; // SoA needs rebuild after BVH change
525}
526
528 if (static_geometry_cache.empty()) {
529 static_bvh_nodes.clear();
530 static_bvh_primitives.clear();
531 static_bvh_valid = false;
532 return;
533 }
534
535 std::vector<uint> static_primitives(static_geometry_cache.begin(), static_geometry_cache.end());
536
537
538 // Build BVH for static geometry (reuse existing GPU BVH building logic)
539 // For now, we'll store it in the static BVH structures but use same building method
540 std::vector<BVHNode> temp_nodes;
541 std::vector<uint> temp_primitives;
542
543 // Swap in static storage for building
544 std::swap(bvh_nodes, temp_nodes);
545 std::swap(primitive_indices, temp_primitives);
546
547 // Build BVH using existing method
548 buildBVH(static_primitives);
549
550 // Store result in static BVH and restore dynamic BVH
551 static_bvh_nodes = bvh_nodes;
552 static_bvh_primitives = primitive_indices;
553 std::swap(bvh_nodes, temp_nodes);
554 std::swap(primitive_indices, temp_primitives);
555
556 static_bvh_valid = true;
557 last_static_bvh_geometry = static_geometry_cache;
558}
559
560void CollisionDetection::updateBVH(const std::vector<uint> &UUIDs, bool force_rebuild) {
561 // Convert input to set for efficient comparison
562 std::set<uint> requested_geometry(UUIDs.begin(), UUIDs.end());
563
564 // Check if geometry has changed significantly
565 bool geometry_changed = (requested_geometry != last_bvh_geometry) || bvh_dirty;
566
567 if (!geometry_changed && !force_rebuild) {
568 return;
569 }
570
571 // Use hierarchical BVH approach if enabled
572 if (hierarchical_bvh_enabled) {
573 updateHierarchicalBVH(requested_geometry, force_rebuild);
574 return;
575 }
576
577 // Determine if we need a full rebuild or can do incremental update
578 if (force_rebuild || bvh_nodes.empty()) {
579 // Full rebuild required
580 buildBVH(UUIDs);
581 } else {
582 // Check how much geometry has changed
583 std::set<uint> added_geometry, removed_geometry;
584
585 // Find added geometry (in requested but not in last_bvh_geometry)
586 std::set_difference(requested_geometry.begin(), requested_geometry.end(), last_bvh_geometry.begin(), last_bvh_geometry.end(), std::inserter(added_geometry, added_geometry.begin()));
587
588 // Find removed geometry (in last_bvh_geometry but not in requested)
589 std::set_difference(last_bvh_geometry.begin(), last_bvh_geometry.end(), requested_geometry.begin(), requested_geometry.end(), std::inserter(removed_geometry, removed_geometry.begin()));
590
591 // If more than 20% of geometry changed, do full rebuild, otherwise incremental
592 size_t total_change = added_geometry.size() + removed_geometry.size();
593 size_t current_size = std::max(last_bvh_geometry.size(), requested_geometry.size());
594
595 // For plant growth, we want to favor incremental updates since they're mostly additions
596 // Use a more aggressive threshold that considers the type of changes
597 bool mostly_additions = (removed_geometry.size() < added_geometry.size() * 0.1f); // <10% removals
598 float change_threshold = mostly_additions ? 0.5f : 0.2f; // Higher threshold for growth scenarios
599
600 if (current_size == 0 || (float(total_change) / float(current_size)) > change_threshold) {
601 buildBVH(UUIDs);
602 } else {
603 // Implement incremental update by selective insertion/removal
604 incrementalUpdateBVH(added_geometry, removed_geometry, requested_geometry);
605 }
606 }
607
608 // Update tracking
609 last_bvh_geometry = requested_geometry;
610 bvh_dirty = false;
611 soa_dirty = true; // SoA needs rebuild after BVH change
612}
613
614void CollisionDetection::setStaticGeometry(const std::vector<uint> &UUIDs) {
615 static_geometry_cache.clear();
616 static_geometry_cache.insert(UUIDs.begin(), UUIDs.end());
617}
618
619void CollisionDetection::ensureBVHCurrent() {
620 // If automatic rebuilds are disabled, skip all automatic updates
621 if (!automatic_bvh_rebuilds) {
622 return;
623 }
624
625 // If BVH is completely empty, build it with all geometry
626 if (bvh_nodes.empty()) {
627 if (printmessages) {
628 std::cout << "Building initial BVH..." << std::endl;
629 }
630 buildBVH();
631 return;
632 }
633
634 // Use two-level dirty tracking pattern (similar to Visualizer plugin)
635 // Get dirty UUIDs from Context (but don't clear them - that's for the user)
636 std::vector<uint> context_dirty_uuids = context->getDirtyUUIDs(false); // Don't include deleted
637 std::vector<uint> context_deleted_uuids = context->getDeletedUUIDs();
638
639 // Convert to sets for efficient comparison
640 std::set<uint> current_dirty(context_dirty_uuids.begin(), context_dirty_uuids.end());
641 std::set<uint> current_deleted(context_deleted_uuids.begin(), context_deleted_uuids.end());
642
643 // Check if there are new changes since our last processing
644 bool has_new_dirty = false;
645 bool has_new_deleted = false;
646
647 // Check for new dirty UUIDs we haven't processed
648 for (uint uuid: current_dirty) {
649 if (last_processed_uuids.find(uuid) == last_processed_uuids.end()) {
650 has_new_dirty = true;
651 break;
652 }
653 }
654
655 // Check for new deleted UUIDs we haven't processed
656 for (uint uuid: current_deleted) {
657 if (last_processed_deleted_uuids.find(uuid) == last_processed_deleted_uuids.end()) {
658 has_new_deleted = true;
659 break;
660 }
661 }
662
663 // Only rebuild if there are genuinely new changes since our last processing
664 if (has_new_dirty || has_new_deleted) {
665 if (printmessages) {
666 std::cout << "Geometry has changed since last BVH build, rebuilding..." << std::endl;
667 }
668
669 buildBVH(); // This will update our internal tracking
670 }
671
672 // Note: We do NOT call context->markGeometryClean() here
673 // That should only be done by the user after all plugins have processed the changes
674}
675
677 if (bvh_nodes.empty()) {
678 return false;
679 }
680
681 // Check if there are new primitives in the context that aren't in our BVH
682 std::vector<uint> all_context_uuids = context->getAllUUIDs();
683 for (uint uuid: all_context_uuids) {
684 if (last_processed_uuids.find(uuid) == last_processed_uuids.end()) {
685 return false; // Found a primitive that's not in our BVH
686 }
687 }
688
689 // Check for deleted UUIDs we haven't processed
690 std::vector<uint> context_deleted_uuids = context->getDeletedUUIDs();
691 for (uint uuid: context_deleted_uuids) {
692 if (last_processed_deleted_uuids.find(uuid) == last_processed_deleted_uuids.end()) {
693 return false; // Found new deleted UUID we haven't processed
694 }
695 }
696
697 // Check if any primitives in our BVH have been deleted
698 for (uint uuid: primitive_indices) {
699 if (!context->doesPrimitiveExist(uuid)) {
700 return false; // A primitive in our BVH no longer exists
701 }
702 }
703
704 return true; // BVH is current with respect to geometry changes
705}
706
708#ifdef HELIOS_CUDA_AVAILABLE
709 gpu_acceleration_enabled = true;
710 if (!bvh_nodes.empty()) {
711 transferBVHToGPU();
712 }
713#else
714 if (printmessages) {
715 std::cerr << "WARNING: GPU acceleration requested but CUDA not available. Ignoring request." << std::endl;
716 }
717#endif
718}
719
721 gpu_acceleration_enabled = false;
722#ifdef HELIOS_CUDA_AVAILABLE
723 freeGPUMemory();
724#endif
725}
726
728 return gpu_acceleration_enabled;
729}
730
732 printmessages = false;
733}
734
736 printmessages = true;
737}
738
740 return primitive_indices.size();
741}
742
743void CollisionDetection::getBVHStatistics(size_t &node_count, size_t &leaf_count, size_t &max_depth) const {
744
745 node_count = bvh_nodes.size();
746 leaf_count = 0;
747 max_depth = 0;
748
749 // Simple traversal to count leaves and find max depth
750 std::function<void(uint, size_t)> traverse = [&](uint node_idx, size_t depth) {
751 if (node_idx >= bvh_nodes.size())
752 return;
753
754 const BVHNode &node = bvh_nodes[node_idx];
755 max_depth = std::max(max_depth, depth);
756
757 if (node.is_leaf) {
758 leaf_count++;
759 } else {
760 if (node.left_child != 0xFFFFFFFF) {
761 traverse(node.left_child, depth + 1);
762 }
763 if (node.right_child != 0xFFFFFFFF) {
764 traverse(node.right_child, depth + 1);
765 }
766 }
767 };
768
769 if (!bvh_nodes.empty()) {
770 traverse(0, 0);
771 }
772}
773
774void CollisionDetection::calculateAABB(const std::vector<uint> &primitives, vec3 &aabb_min, vec3 &aabb_max) const {
775
776 if (primitives.empty()) {
777 aabb_min = make_vec3(0, 0, 0);
778 aabb_max = make_vec3(0, 0, 0);
779 return;
780 }
781
782 // Find first valid primitive for initialization
783 size_t first_valid = 0;
784 while (first_valid < primitives.size() && !context->doesPrimitiveExist(primitives[first_valid])) {
785 first_valid++;
786 }
787 if (first_valid >= primitives.size()) {
788 // No valid primitives found
789 aabb_min = make_vec3(0, 0, 0);
790 aabb_max = make_vec3(0, 0, 0);
791 return;
792 }
793
794 // Initialize with first valid primitive's bounding box
795 context->getPrimitiveBoundingBox(primitives[first_valid], aabb_min, aabb_max);
796
797 // Expand to include all remaining valid primitives
798 for (size_t i = first_valid + 1; i < primitives.size(); i++) {
799 if (!context->doesPrimitiveExist(primitives[i])) {
800 continue; // Skip invalid primitive
801 }
802 vec3 prim_min, prim_max;
803 context->getPrimitiveBoundingBox(primitives[i], prim_min, prim_max);
804
805 aabb_min.x = std::min(aabb_min.x, prim_min.x);
806 aabb_min.y = std::min(aabb_min.y, prim_min.y);
807 aabb_min.z = std::min(aabb_min.z, prim_min.z);
808
809 aabb_max.x = std::max(aabb_max.x, prim_max.x);
810 aabb_max.y = std::max(aabb_max.y, prim_max.y);
811 aabb_max.z = std::max(aabb_max.z, prim_max.z);
812 }
813}
814
815void CollisionDetection::buildBVHRecursive(uint node_index, size_t primitive_start, size_t primitive_count, int depth) {
816
817 // Node should already be pre-allocated
818 if (node_index >= bvh_nodes.size()) {
819 throw std::runtime_error("CollisionDetection: BVH recursive access exceeded pre-allocated capacity");
820 }
821
822 // Bounds check for primitive_indices access
823 if (primitive_start + primitive_count > primitive_indices.size()) {
824 throw std::runtime_error("CollisionDetection: BVH primitive bounds check failed - primitive_start(" + std::to_string(primitive_start) + ") + primitive_count(" + std::to_string(primitive_count) + ") > primitive_indices.size(" +
825 std::to_string(primitive_indices.size()) + ")");
826 }
827
828 BVHNode &node = bvh_nodes[node_index];
829
830 // Calculate bounding box for this node using cached AABBs
831 if (primitive_count == 0) {
832 node.aabb_min = make_vec3(0, 0, 0);
833 node.aabb_max = make_vec3(0, 0, 0);
834 } else {
835 // Initialize with first primitive's cached AABB
836 uint first_uuid = primitive_indices[primitive_start];
837 auto it = primitive_aabbs_cache.find(first_uuid);
838 if (it == primitive_aabbs_cache.end()) {
839 // Handle missing primitive - use zero bounds
840 node.aabb_min = make_vec3(0, 0, 0);
841 node.aabb_max = make_vec3(0, 0, 0);
842 return;
843 }
844 const auto &first_cached_aabb = it->second;
845 node.aabb_min = first_cached_aabb.first;
846 node.aabb_max = first_cached_aabb.second;
847
848 // Expand to include all primitives in this node
849 for (size_t i = 1; i < primitive_count; i++) {
850 uint uuid = primitive_indices[primitive_start + i];
851 auto it = primitive_aabbs_cache.find(uuid);
852 if (it == primitive_aabbs_cache.end()) {
853 continue; // Skip missing primitive
854 }
855 const auto &cached_aabb = it->second;
856 node.aabb_min.x = std::min(node.aabb_min.x, cached_aabb.first.x);
857 node.aabb_min.y = std::min(node.aabb_min.y, cached_aabb.first.y);
858 node.aabb_min.z = std::min(node.aabb_min.z, cached_aabb.first.z);
859
860 node.aabb_max.x = std::max(node.aabb_max.x, cached_aabb.second.x);
861 node.aabb_max.y = std::max(node.aabb_max.y, cached_aabb.second.y);
862 node.aabb_max.z = std::max(node.aabb_max.z, cached_aabb.second.z);
863 }
864 }
865
866 // Stopping criteria - make this a leaf
867 // TEMPORARY: Very aggressive stopping for large scenes to prevent timeout
868 const int MAX_PRIMITIVES_PER_LEAF = (primitive_indices.size() > 500000) ? 500 : 100; // Much larger leaves for big scenes
869 const int MAX_DEPTH = (primitive_indices.size() > 500000) ? 6 : 10; // Shallower trees for big scenes
870
871 if (primitive_count <= MAX_PRIMITIVES_PER_LEAF || depth >= MAX_DEPTH) {
872 // Make leaf node
873 node.is_leaf = true;
874 node.primitive_start = primitive_start;
875 node.primitive_count = primitive_count;
876 node.left_child = 0xFFFFFFFF;
877 node.right_child = 0xFFFFFFFF;
878 return;
879 }
880
881 // Find best split using Surface Area Heuristic (simplified)
882 vec3 extent = node.aabb_max - node.aabb_min;
883 int split_axis = 0; // Split along longest axis
884 if (extent.y > extent.x)
885 split_axis = 1;
886 if (extent.z > (split_axis == 0 ? extent.x : extent.y))
887 split_axis = 2;
888
889 // In-place sort using pre-cached centroids to avoid temporary vector allocations
890 // This is critical for memory efficiency during recursive BVH construction
891 std::sort(primitive_indices.begin() + primitive_start, primitive_indices.begin() + primitive_start + primitive_count, [&](uint a, uint b) {
892 // Use cache lookup with bounds checking for safety
893 auto it_a = primitive_aabbs_cache.find(a);
894 auto it_b = primitive_aabbs_cache.find(b);
895 if (it_a == primitive_aabbs_cache.end() || it_b == primitive_aabbs_cache.end()) {
896 return a < b; // Fallback to UUID ordering for missing primitives
897 }
898
899 const auto &aabb_a = it_a->second;
900 const auto &aabb_b = it_b->second;
901
902 // Compute centroids inline to avoid vec3 temporaries
903 float centroid_a_coord, centroid_b_coord;
904 if (split_axis == 0) {
905 centroid_a_coord = 0.5f * (aabb_a.first.x + aabb_a.second.x);
906 centroid_b_coord = 0.5f * (aabb_b.first.x + aabb_b.second.x);
907 } else if (split_axis == 1) {
908 centroid_a_coord = 0.5f * (aabb_a.first.y + aabb_a.second.y);
909 centroid_b_coord = 0.5f * (aabb_b.first.y + aabb_b.second.y);
910 } else {
911 centroid_a_coord = 0.5f * (aabb_a.first.z + aabb_a.second.z);
912 centroid_b_coord = 0.5f * (aabb_b.first.z + aabb_b.second.z);
913 }
914
915 return centroid_a_coord < centroid_b_coord;
916 });
917
918 // Split in middle
919 size_t split_index = primitive_count / 2;
920
921 // Allocate child nodes from pre-allocated array (no resizing needed)
922 uint left_child_index = next_available_node_index++;
923 uint right_child_index = next_available_node_index++;
924
925 // Ensure we don't exceed pre-allocated capacity
926 if (right_child_index >= bvh_nodes.size()) {
927 throw std::runtime_error("CollisionDetection: BVH node allocation exceeded pre-calculated capacity");
928 }
929
930 // Re-get the node reference after potential reallocation
931 BVHNode &updated_node = bvh_nodes[node_index];
932 updated_node.left_child = left_child_index;
933 updated_node.right_child = right_child_index;
934 updated_node.is_leaf = false;
935 updated_node.primitive_start = 0;
936 updated_node.primitive_count = 0;
937
938 // Recursively build child nodes
939 buildBVHRecursive(left_child_index, primitive_start, split_index, depth + 1);
940 buildBVHRecursive(right_child_index, primitive_start + split_index, primitive_count - split_index, depth + 1);
941}
942
943std::vector<uint> CollisionDetection::traverseBVH_CPU(const vec3 &query_aabb_min, const vec3 &query_aabb_max) {
944
945 std::vector<uint> results;
946
947 if (bvh_nodes.empty()) {
948 return results;
949 }
950
951 // Stack-based traversal to avoid recursion
952 std::vector<uint> node_stack;
953 node_stack.push_back(0); // Start with root node
954
955 while (!node_stack.empty()) {
956 uint node_idx = node_stack.back();
957 node_stack.pop_back();
958
959 if (node_idx >= bvh_nodes.size())
960 continue;
961
962 const BVHNode &node = bvh_nodes[node_idx];
963
964 // Test if query AABB intersects node AABB
965 if (!aabbIntersect(query_aabb_min, query_aabb_max, node.aabb_min, node.aabb_max)) {
966 continue;
967 }
968
969 if (node.is_leaf) {
970 // Check each primitive in this leaf for intersection
971 for (uint i = 0; i < node.primitive_count; i++) {
972 uint primitive_id = primitive_indices[node.primitive_start + i];
973
974 // Get this primitive's AABB
975 if (!context->doesPrimitiveExist(primitive_id)) {
976 continue; // Skip invalid primitive
977 }
978 vec3 prim_min, prim_max;
979 context->getPrimitiveBoundingBox(primitive_id, prim_min, prim_max);
980
981 // Only add to results if AABBs actually intersect
982 if (aabbIntersect(query_aabb_min, query_aabb_max, prim_min, prim_max)) {
983 results.push_back(primitive_id);
984 }
985 }
986 } else {
987 // Add child nodes to stack for processing
988 if (node.left_child != 0xFFFFFFFF) {
989 node_stack.push_back(node.left_child);
990 }
991 if (node.right_child != 0xFFFFFFFF) {
992 node_stack.push_back(node.right_child);
993 }
994 }
995 }
996
997 return results;
998}
999
1000#ifdef HELIOS_CUDA_AVAILABLE
1001std::vector<uint> CollisionDetection::traverseBVH_GPU(const vec3 &query_aabb_min, const vec3 &query_aabb_max) {
1002 if (!gpu_memory_allocated) {
1003 helios_runtime_error("ERROR: GPU traversal requested but GPU memory is not allocated. Call buildBVH() or transferBVHToGPU() first.");
1004 }
1005
1006 // Prepare single query
1007 float query_min_array[3] = {query_aabb_min.x, query_aabb_min.y, query_aabb_min.z};
1008 float query_max_array[3] = {query_aabb_max.x, query_aabb_max.y, query_aabb_max.z};
1009
1010 // Prepare primitive AABB arrays
1011 std::vector<float> primitive_min_array(primitive_indices.size() * 3);
1012 std::vector<float> primitive_max_array(primitive_indices.size() * 3);
1013
1014 for (size_t i = 0; i < primitive_indices.size(); i++) {
1015 uint uuid = primitive_indices[i];
1016 auto it = primitive_aabbs_cache.find(uuid);
1017 if (it == primitive_aabbs_cache.end()) {
1018 continue; // Skip missing primitive
1019 }
1020 const auto &cached_aabb = it->second;
1021
1022 primitive_min_array[i * 3] = cached_aabb.first.x;
1023 primitive_min_array[i * 3 + 1] = cached_aabb.first.y;
1024 primitive_min_array[i * 3 + 2] = cached_aabb.first.z;
1025
1026 primitive_max_array[i * 3] = cached_aabb.second.x;
1027 primitive_max_array[i * 3 + 1] = cached_aabb.second.y;
1028 primitive_max_array[i * 3 + 2] = cached_aabb.second.z;
1029 }
1030
1031 const int max_results = 1000; // Reasonable limit
1032 std::vector<unsigned int> results(max_results);
1033 unsigned int result_count = 0;
1034
1035 // Call CUDA kernel wrapper
1036 launchBVHTraversal(d_bvh_nodes, bvh_nodes.size(), d_primitive_indices, primitive_indices.size(), primitive_min_array.data(), primitive_max_array.data(), query_min_array, query_max_array, 1, results.data(), &result_count, max_results);
1037
1038 // Convert to return format
1039 std::vector<uint> final_results;
1040 for (unsigned int i = 0; i < result_count; i++) {
1041 final_results.push_back(results[i]);
1042 }
1043
1044 return final_results;
1045}
1046#endif
1047
1048bool CollisionDetection::aabbIntersect(const vec3 &min1, const vec3 &max1, const vec3 &min2, const vec3 &max2) {
1049 return (min1.x <= max2.x && max1.x >= min2.x) && (min1.y <= max2.y && max1.y >= min2.y) && (min1.z <= max2.z && max1.z >= min2.z);
1050}
1051
1052bool CollisionDetection::rayAABBIntersect(const vec3 &origin, const vec3 &direction, const vec3 &aabb_min, const vec3 &aabb_max, float &t_min, float &t_max) const {
1053
1054 t_min = 0.0f;
1055 t_max = std::numeric_limits<float>::max();
1056
1057 // Check intersection with each pair of parallel planes (X, Y, Z)
1058 for (int i = 0; i < 3; i++) {
1059 float dir_component = (i == 0) ? direction.x : (i == 1) ? direction.y : direction.z;
1060 float orig_component = (i == 0) ? origin.x : (i == 1) ? origin.y : origin.z;
1061 float min_component = (i == 0) ? aabb_min.x : (i == 1) ? aabb_min.y : aabb_min.z;
1062 float max_component = (i == 0) ? aabb_max.x : (i == 1) ? aabb_max.y : aabb_max.z;
1063
1064 if (std::abs(dir_component) < 1e-9f) {
1065 // Ray is parallel to the slab
1066 if (orig_component < min_component || orig_component > max_component) {
1067 return false; // Ray is outside the slab and parallel to it
1068 }
1069 } else {
1070 // Compute intersection t values with the two planes
1071 float t1 = (min_component - orig_component) / dir_component;
1072 float t2 = (max_component - orig_component) / dir_component;
1073
1074 // Make sure t1 is the near intersection and t2 is the far intersection
1075 if (t1 > t2) {
1076 std::swap(t1, t2);
1077 }
1078
1079 // Update the overall near and far intersection parameters
1080 t_min = std::max(t_min, t1);
1081 t_max = std::min(t_max, t2);
1082
1083 // If the near intersection is farther than the far intersection, no intersection
1084 if (t_min > t_max) {
1085 return false;
1086 }
1087 }
1088 }
1089
1090 // Ray intersects AABB if t_min <= t_max and the intersection is in front of ray origin
1091 return t_max >= 0.0f;
1092}
1093
1094bool CollisionDetection::coneAABBIntersect(const Cone &cone, const vec3 &aabb_min, const vec3 &aabb_max) {
1095 // Check if apex is inside AABB
1096 if (cone.apex.x >= aabb_min.x && cone.apex.x <= aabb_max.x && cone.apex.y >= aabb_min.y && cone.apex.y <= aabb_max.y && cone.apex.z >= aabb_min.z && cone.apex.z <= aabb_max.z) {
1097 return true; // Apex is inside AABB, definite intersection
1098 }
1099
1100 // Early rejection using bounding sphere around AABB
1101 vec3 box_center = 0.5f * (aabb_min + aabb_max);
1102 vec3 box_half_extents = 0.5f * (aabb_max - aabb_min);
1103 float box_radius = box_half_extents.magnitude();
1104
1105 // For infinite cone, check if any AABB corner is inside the cone
1106 if (cone.height <= 0.0f) {
1107 // Check all 8 corners of the AABB
1108 for (int i = 0; i < 8; i++) {
1109 vec3 corner = make_vec3((i & 1) ? aabb_max.x : aabb_min.x, (i & 2) ? aabb_max.y : aabb_min.y, (i & 4) ? aabb_max.z : aabb_min.z);
1110
1111 // Vector from apex to corner
1112 vec3 apex_to_corner = corner - cone.apex;
1113 float distance_along_axis = apex_to_corner * cone.axis;
1114
1115 // Point must be in front of apex
1116 if (distance_along_axis > 0) {
1117 // Check if point is within cone angle
1118 float cos_angle = distance_along_axis / apex_to_corner.magnitude();
1119 if (cos_angle >= cosf(cone.half_angle)) {
1120 return true; // This corner is inside the cone
1121 }
1122 }
1123 }
1124
1125 // Check if cone axis intersects AABB
1126 // Use ray-AABB intersection with cone axis as ray
1127 float t_min = 0.0f;
1128 float t_max = std::numeric_limits<float>::max();
1129
1130 for (int i = 0; i < 3; i++) {
1131 float axis_component = (i == 0) ? cone.axis.x : (i == 1) ? cone.axis.y : cone.axis.z;
1132 float apex_component = (i == 0) ? cone.apex.x : (i == 1) ? cone.apex.y : cone.apex.z;
1133 float min_component = (i == 0) ? aabb_min.x : (i == 1) ? aabb_min.y : aabb_min.z;
1134 float max_component = (i == 0) ? aabb_max.x : (i == 1) ? aabb_max.y : aabb_max.z;
1135
1136 if (std::abs(axis_component) < 1e-6f) {
1137 // Ray is parallel to slab
1138 if (apex_component < min_component || apex_component > max_component) {
1139 return false; // Ray is outside slab
1140 }
1141 } else {
1142 // Compute intersection t values
1143 float t1 = (min_component - apex_component) / axis_component;
1144 float t2 = (max_component - apex_component) / axis_component;
1145
1146 if (t1 > t2)
1147 std::swap(t1, t2);
1148
1149 t_min = std::max(t_min, t1);
1150 t_max = std::min(t_max, t2);
1151
1152 if (t_min > t_max) {
1153 return false; // No intersection
1154 }
1155 }
1156 }
1157
1158 // If we get here, the cone axis intersects the AABB
1159 // But for narrow cones, we need to check if the AABB is within the cone angle
1160 if (t_min >= 0 && t_max >= 0) {
1161 // Check if the intersection region on the axis is within the cone angle
1162 // Find the closest point on the axis within the intersection region
1163 float t_check = std::max(0.0f, t_min);
1164 vec3 axis_point = cone.apex + cone.axis * t_check;
1165
1166 // Find the closest point in the AABB to this axis point
1167 vec3 closest_in_box = make_vec3(std::max(aabb_min.x, std::min(axis_point.x, aabb_max.x)), std::max(aabb_min.y, std::min(axis_point.y, aabb_max.y)), std::max(aabb_min.z, std::min(axis_point.z, aabb_max.z)));
1168
1169 // Check if this closest point is within the cone
1170 vec3 apex_to_point = closest_in_box - cone.apex;
1171 float distance_along_axis = apex_to_point * cone.axis;
1172
1173 if (distance_along_axis > 0) {
1174 float distance_to_point = apex_to_point.magnitude();
1175 if (distance_to_point > 0) {
1176 float cos_angle = distance_along_axis / distance_to_point;
1177 if (cos_angle >= cosf(cone.half_angle)) {
1178 return true;
1179 }
1180 }
1181 }
1182 }
1183 } else {
1184 // Finite cone case
1185 // Check if any AABB corner is inside the finite cone
1186 for (int i = 0; i < 8; i++) {
1187 vec3 corner = make_vec3((i & 1) ? aabb_max.x : aabb_min.x, (i & 2) ? aabb_max.y : aabb_min.y, (i & 4) ? aabb_max.z : aabb_min.z);
1188
1189 // Vector from apex to corner
1190 vec3 apex_to_corner = corner - cone.apex;
1191 float distance_along_axis = apex_to_corner * cone.axis;
1192
1193 // Check if point is within cone height and in front of apex
1194 if (distance_along_axis > 0 && distance_along_axis <= cone.height) {
1195 // Check if point is within cone angle
1196 float cos_angle = distance_along_axis / apex_to_corner.magnitude();
1197 if (cos_angle >= cosf(cone.half_angle)) {
1198 return true; // This corner is inside the cone
1199 }
1200 }
1201 }
1202
1203 // Also need to check if cone base intersects AABB
1204 vec3 base_center = cone.apex + cone.axis * cone.height;
1205 float base_radius = cone.height * tanf(cone.half_angle);
1206
1207 // Simple sphere-AABB check for cone base
1208 vec3 closest_point = make_vec3(std::max(aabb_min.x, std::min(base_center.x, aabb_max.x)), std::max(aabb_min.y, std::min(base_center.y, aabb_max.y)), std::max(aabb_min.z, std::min(base_center.z, aabb_max.z)));
1209
1210 float dist_sq = (closest_point - base_center).magnitude();
1211 if (dist_sq <= base_radius) {
1212 return true; // Base circle intersects AABB
1213 }
1214 }
1215
1216 // More sophisticated tests could be added here for edge cases
1217 // For now, return false if no intersection found
1218 return false;
1219}
1220
1221bool CollisionDetection::coneAABBIntersectFast(const Cone &cone, const vec3 &aabb_min, const vec3 &aabb_max) {
1222 // Fast-path cone-AABB intersection optimized for high-throughput filtering
1223 // Uses aggressive early rejection to eliminate 99%+ of geometry with minimal computation
1224
1225 // Fast rejection test 1: Check if apex is inside AABB (very common case, worth checking first)
1226 if (cone.apex.x >= aabb_min.x && cone.apex.x <= aabb_max.x && cone.apex.y >= aabb_min.y && cone.apex.y <= aabb_max.y && cone.apex.z >= aabb_min.z && cone.apex.z <= aabb_max.z) {
1227 return true; // Apex inside AABB - definite intersection
1228 }
1229
1230 // Fast rejection test 2: Behind-apex test (eliminates geometry behind cone)
1231 vec3 box_center = 0.5f * (aabb_min + aabb_max);
1232 vec3 apex_to_center = box_center - cone.apex;
1233 float distance_along_axis = apex_to_center * cone.axis;
1234
1235 if (distance_along_axis <= 0.0f) {
1236 return false; // AABB is completely behind cone apex
1237 }
1238
1239 // Fast rejection test 3: Height test for finite cones
1240 if (cone.height > 0.0f && distance_along_axis > cone.height) {
1241 // Box center is beyond cone height, but we need to check if any part of box is within height
1242 vec3 box_half_extents = 0.5f * (aabb_max - aabb_min);
1243 float box_radius = box_half_extents.magnitude();
1244 if (distance_along_axis - box_radius > cone.height) {
1245 return false; // Entire AABB is beyond cone height
1246 }
1247 }
1248
1249 // Fast rejection test 4: Cone angle bounding sphere test (fast approximation)
1250 float max_distance = (cone.height > 0.0f) ? cone.height : distance_along_axis;
1251 float max_radius_at_distance = max_distance * tanf(cone.half_angle);
1252
1253 // Find closest point on cone axis to box center
1254 vec3 axis_point = cone.apex + cone.axis * distance_along_axis;
1255 float distance_from_axis = (box_center - axis_point).magnitude();
1256
1257 // Conservative bounding sphere test
1258 vec3 box_half_extents = 0.5f * (aabb_max - aabb_min);
1259 float box_radius = box_half_extents.magnitude();
1260
1261 if (distance_from_axis > max_radius_at_distance + box_radius) {
1262 return false; // AABB is completely outside cone's maximum radius
1263 }
1264
1265 // If we get here, AABB might intersect cone - fall back to precise test
1266 // This should only happen for a small percentage of AABBs
1267 return coneAABBIntersect(cone, aabb_min, aabb_max);
1268}
1269
1270// -------- RASTERIZATION-BASED COLLISION DETECTION IMPLEMENTATION --------
1271
1272int CollisionDetection::calculateOptimalBinCount(float cone_half_angle, int geometry_count) {
1273 // Target: ~1 degree angular resolution, scaled by cone size
1274 float base_resolution = M_PI / 180.0f; // 1 degree in radians
1275 float cone_solid_angle = 2.0f * M_PI * (1.0f - cosf(cone_half_angle));
1276 int optimal_bins = (int) (cone_solid_angle / (base_resolution * base_resolution));
1277
1278 // Clamp based on geometry complexity and performance
1279 int min_bins = 64; // Always enough resolution for gap detection
1280 int max_bins = std::min(1024, geometry_count * 4); // Don't exceed geometry complexity
1281
1282 return std::clamp(optimal_bins, min_bins, max_bins);
1283}
1284
1285std::vector<uint> CollisionDetection::filterPrimitivesParallel(const Cone &cone, const std::vector<uint> &primitive_uuids) {
1286 std::vector<uint> filtered_uuids;
1287
1288 if (primitive_uuids.empty()) {
1289 return filtered_uuids;
1290 }
1291
1292 // Reserve space for result vector
1293 filtered_uuids.reserve(primitive_uuids.size() / 10); // Estimate ~10% will pass filter
1294
1295#ifdef _OPENMP
1296 // Use thread-local vectors to avoid synchronization overhead
1297 const int num_threads = omp_get_max_threads();
1298 std::vector<std::vector<uint>> thread_results(num_threads);
1299
1300 // Pre-allocate thread-local storage
1301 for (int i = 0; i < num_threads; i++) {
1302 thread_results[i].reserve(primitive_uuids.size() / (num_threads * 10));
1303 }
1304
1305// Always parallelize AABB filtering (high value, low overhead)
1306#pragma omp parallel
1307 {
1308 int thread_id = omp_get_thread_num();
1309 std::vector<uint> &local_results = thread_results[thread_id];
1310
1311#pragma omp for nowait
1312 for (int i = 0; i < static_cast<int>(primitive_uuids.size()); i++) {
1313 uint uuid = primitive_uuids[i];
1314
1315 // Get primitive vertices and calculate AABB
1316 if (context->doesPrimitiveExist(uuid)) {
1317 std::vector<vec3> vertices = context->getPrimitiveVertices(uuid);
1318 if (!vertices.empty()) {
1319 // Calculate AABB from vertices
1320 vec3 aabb_min = vertices[0];
1321 vec3 aabb_max = vertices[0];
1322 for (const vec3 &vertex: vertices) {
1323 aabb_min = make_vec3(std::min(aabb_min.x, vertex.x), std::min(aabb_min.y, vertex.y), std::min(aabb_min.z, vertex.z));
1324 aabb_max = make_vec3(std::max(aabb_max.x, vertex.x), std::max(aabb_max.y, vertex.y), std::max(aabb_max.z, vertex.z));
1325 }
1326
1327 // Use fast cone-AABB intersection test
1328 if (coneAABBIntersectFast(cone, aabb_min, aabb_max)) {
1329 local_results.push_back(uuid);
1330 }
1331 }
1332 }
1333 }
1334 }
1335
1336 // Merge results from all threads
1337 size_t total_count = 0;
1338 for (const auto &thread_result: thread_results) {
1339 total_count += thread_result.size();
1340 }
1341
1342 filtered_uuids.reserve(total_count);
1343 for (const auto &thread_result: thread_results) {
1344 filtered_uuids.insert(filtered_uuids.end(), thread_result.begin(), thread_result.end());
1345 }
1346#else
1347 // Serial fallback when OpenMP is not available
1348 for (size_t i = 0; i < primitive_uuids.size(); i++) {
1349 uint uuid = primitive_uuids[i];
1350
1351 // Get primitive vertices and calculate AABB
1352 if (context->doesPrimitiveExist(uuid)) {
1353 std::vector<vec3> vertices = context->getPrimitiveVertices(uuid);
1354 if (!vertices.empty()) {
1355 // Calculate AABB from vertices
1356 vec3 aabb_min = vertices[0];
1357 vec3 aabb_max = vertices[0];
1358 for (const vec3 &vertex: vertices) {
1359 aabb_min = make_vec3(std::min(aabb_min.x, vertex.x), std::min(aabb_min.y, vertex.y), std::min(aabb_min.z, vertex.z));
1360 aabb_max = make_vec3(std::max(aabb_max.x, vertex.x), std::max(aabb_max.y, vertex.y), std::max(aabb_max.z, vertex.z));
1361 }
1362
1363 // Use fast cone-AABB intersection test
1364 if (coneAABBIntersectFast(cone, aabb_min, aabb_max)) {
1365 filtered_uuids.push_back(uuid);
1366 }
1367 }
1368 }
1369 }
1370#endif
1371
1372 return filtered_uuids;
1373}
1374
1375float CollisionDetection::cartesianToSphericalCone(const vec3 &vector, const vec3 &cone_axis, float &theta, float &phi) {
1376 float distance = vector.magnitude();
1377 if (distance < 1e-6f) {
1378 theta = 0.0f;
1379 phi = 0.0f;
1380 return 0.0f;
1381 }
1382
1383 vec3 normalized_vector = vector / distance;
1384
1385 // Calculate phi (polar angle from cone axis)
1386 float cos_phi = normalized_vector * cone_axis;
1387 phi = acosf(std::clamp(cos_phi, -1.0f, 1.0f));
1388
1389 // Calculate theta (azimuthal angle around cone axis)
1390 // Create orthonormal basis from cone axis
1391 vec3 up = (abs(cone_axis.z) < 0.999f) ? make_vec3(0, 0, 1) : make_vec3(1, 0, 0);
1392 vec3 right = cross(cone_axis, up);
1393 right.normalize();
1394 vec3 forward = cross(right, cone_axis);
1395
1396 // Project vector onto perpendicular plane
1397 vec3 projected = normalized_vector - cone_axis * cos_phi;
1398 if (projected.magnitude() > 1e-6f) {
1399 projected.normalize();
1400 float cos_theta = projected * right;
1401 float sin_theta = projected * forward;
1402 theta = atan2f(sin_theta, cos_theta);
1403 if (theta < 0.0f)
1404 theta += 2.0f * M_PI; // Ensure [0, 2π]
1405 } else {
1406 theta = 0.0f; // Vector is along cone axis
1407 }
1408
1409 return distance;
1410}
1411
1412bool CollisionDetection::sphericalCoordsToBinIndices(float theta, float phi, const AngularBins &bins, int &theta_bin, int &phi_bin) {
1413 // Check if angles are within valid cone range
1414 if (phi < 0.0f || theta < 0.0f || theta >= 2.0f * M_PI) {
1415 return false;
1416 }
1417
1418 // Map to bin indices
1419 theta_bin = (int) (theta * bins.theta_divisions / (2.0f * M_PI));
1420 phi_bin = (int) (phi * bins.phi_divisions / M_PI); // Assume phi range is [0, PI] for full sphere
1421
1422 // Clamp to valid ranges
1423 theta_bin = std::clamp(theta_bin, 0, bins.theta_divisions - 1);
1424 phi_bin = std::clamp(phi_bin, 0, bins.phi_divisions - 1);
1425
1426 return true;
1427}
1428
1429void CollisionDetection::projectGeometryToBins(const Cone &cone, const std::vector<uint> &filtered_uuids, AngularBins &bins) {
1430 bins.clear();
1431
1432 const int PARALLEL_THRESHOLD = 500; // Empirically determined threshold
1433
1434 if (filtered_uuids.size() > PARALLEL_THRESHOLD) {
1435// Conditional parallel projection for large geometry sets
1436#ifdef _OPENMP
1437 const int num_threads = omp_get_max_threads();
1438 std::vector<AngularBins> thread_bins(num_threads, AngularBins(bins.theta_divisions, bins.phi_divisions));
1439
1440#pragma omp parallel
1441 {
1442 int thread_id = omp_get_thread_num();
1443 AngularBins &local_bins = thread_bins[thread_id];
1444
1445#pragma omp for nowait
1446 for (int i = 0; i < static_cast<int>(filtered_uuids.size()); i++) {
1447 uint uuid = filtered_uuids[i];
1448
1449 if (context->doesPrimitiveExist(uuid)) {
1450 std::vector<vec3> vertices = context->getPrimitiveVertices(uuid);
1451
1452 // Project each vertex to spherical coordinates
1453 for (const vec3 &vertex: vertices) {
1454 vec3 apex_to_vertex = vertex - cone.apex;
1455 float distance = apex_to_vertex.magnitude();
1456
1457 if (distance > 1e-6f) {
1458 float theta, phi;
1459 cartesianToSphericalCone(apex_to_vertex, cone.axis, theta, phi);
1460
1461 // Skip if outside cone angle
1462 if (phi <= cone.half_angle) {
1463 int theta_bin, phi_bin;
1464 if (sphericalCoordsToBinIndices(theta, phi, local_bins, theta_bin, phi_bin)) {
1465 local_bins.setCovered(theta_bin, phi_bin, distance);
1466 }
1467 }
1468 }
1469 }
1470 }
1471 }
1472 }
1473
1474 // Merge thread-local bins into global bins
1475 for (const auto &thread_bin: thread_bins) {
1476 for (int theta = 0; theta < bins.theta_divisions; theta++) {
1477 for (int phi = 0; phi < bins.phi_divisions; phi++) {
1478 if (thread_bin.isCovered(theta, phi)) {
1479 int index = theta * bins.phi_divisions + phi;
1480 float thread_depth = thread_bin.depth_values[index];
1481 bins.setCovered(theta, phi, thread_depth);
1482 }
1483 }
1484 }
1485 }
1486#else
1487 // Fallback to serial if OpenMP not available
1488 projectGeometryToBinsSerial(cone, filtered_uuids, bins);
1489#endif
1490 } else {
1491 // Serial projection for small geometry sets
1492 projectGeometryToBinsSerial(cone, filtered_uuids, bins);
1493 }
1494}
1495
1496void CollisionDetection::projectGeometryToBinsSerial(const Cone &cone, const std::vector<uint> &filtered_uuids, AngularBins &bins) {
1497 for (uint uuid: filtered_uuids) {
1498 if (context->doesPrimitiveExist(uuid)) {
1499 std::vector<vec3> vertices = context->getPrimitiveVertices(uuid);
1500
1501 // Project each vertex to spherical coordinates
1502 for (const vec3 &vertex: vertices) {
1503 vec3 apex_to_vertex = vertex - cone.apex;
1504 float distance = apex_to_vertex.magnitude();
1505
1506 if (distance > 1e-6f) {
1507 float theta, phi;
1508 cartesianToSphericalCone(apex_to_vertex, cone.axis, theta, phi);
1509
1510 // Skip if outside cone angle
1511 if (phi <= cone.half_angle) {
1512 int theta_bin, phi_bin;
1513 if (sphericalCoordsToBinIndices(theta, phi, bins, theta_bin, phi_bin)) {
1514 bins.setCovered(theta_bin, phi_bin, distance);
1515 }
1516 }
1517 }
1518 }
1519 }
1520 }
1521}
1522
1523std::vector<CollisionDetection::Gap> CollisionDetection::findGapsInCoverageMap(const AngularBins &bins, const Cone &cone) {
1524 std::vector<Gap> gaps;
1525
1526 // Connected component analysis to find contiguous free regions
1527 std::vector<std::vector<bool>> visited(bins.theta_divisions, std::vector<bool>(bins.phi_divisions, false));
1528
1529 for (int theta = 0; theta < bins.theta_divisions; theta++) {
1530 for (int phi = 0; phi < bins.phi_divisions; phi++) {
1531 if (!bins.isCovered(theta, phi) && !visited[theta][phi]) {
1532 // Found start of new gap - flood fill to find full extent
1533 Gap gap = floodFillGap(bins, theta, phi, visited, cone);
1534
1535 // Only keep gaps meeting minimum size thresholds
1536 const float MIN_GAP_SIZE_STERADIANS = 0.01f; // Minimum gap size
1537 if (gap.angular_size > MIN_GAP_SIZE_STERADIANS) {
1538 gaps.push_back(gap);
1539 }
1540 }
1541 }
1542 }
1543
1544 // Sort gaps by angular size (largest first)
1545 std::sort(gaps.begin(), gaps.end(), [](const Gap &a, const Gap &b) { return a.angular_size > b.angular_size; });
1546
1547 return gaps;
1548}
1549
1550CollisionDetection::Gap CollisionDetection::floodFillGap(const AngularBins &bins, int start_theta, int start_phi, std::vector<std::vector<bool>> &visited, const Cone &cone) {
1551 Gap gap;
1552 std::queue<std::pair<int, int>> queue;
1553 queue.push({start_theta, start_phi});
1554
1555 float total_solid_angle = 0;
1556 vec3 weighted_center(0, 0, 0);
1557
1558 while (!queue.empty()) {
1559 auto [theta, phi] = queue.front();
1560 queue.pop();
1561
1562 if (visited[theta][phi] || bins.isCovered(theta, phi))
1563 continue;
1564 visited[theta][phi] = true;
1565
1566 // Calculate solid angle contribution of this bin
1567 float bin_solid_angle = calculateBinSolidAngle(theta, phi, bins, cone.half_angle);
1568 total_solid_angle += bin_solid_angle;
1569
1570 // Accumulate weighted center direction
1571 vec3 bin_direction = binIndicesToCartesian(theta, phi, bins, cone);
1572 weighted_center = weighted_center + bin_direction * bin_solid_angle;
1573
1574 // Add unoccupied neighbors to queue
1575 addUnoccupiedNeighbors(theta, phi, bins, visited, queue);
1576 }
1577
1578 gap.angular_size = total_solid_angle;
1579 gap.center_direction = weighted_center.magnitude() > 1e-6f ? weighted_center.normalize() : cone.axis;
1580
1581 return gap;
1582}
1583
1584float CollisionDetection::calculateBinSolidAngle(int theta_bin, int phi_bin, const AngularBins &bins, float cone_half_angle) {
1585 // Calculate solid angle of a single bin
1586 float theta_step = 2.0f * M_PI / bins.theta_divisions;
1587 float phi_step = cone_half_angle / bins.phi_divisions;
1588
1589 // For small angles, solid angle ≈ θ_step × φ_step × sin(φ)
1590 float phi = (phi_bin + 0.5f) * phi_step;
1591 return theta_step * phi_step * sinf(phi);
1592}
1593
1594vec3 CollisionDetection::binIndicesToCartesian(int theta_bin, int phi_bin, const AngularBins &bins, const Cone &cone) {
1595 // Convert bin indices back to cartesian direction
1596 float theta = (theta_bin + 0.5f) * 2.0f * M_PI / bins.theta_divisions;
1597 float phi = (phi_bin + 0.5f) * cone.half_angle / bins.phi_divisions;
1598
1599 // Create orthonormal basis from cone axis (same as in cartesianToSphericalCone)
1600 vec3 up = (abs(cone.axis.z) < 0.999f) ? make_vec3(0, 0, 1) : make_vec3(1, 0, 0);
1601 vec3 right = cross(cone.axis, up);
1602 right.normalize();
1603 vec3 forward = cross(right, cone.axis);
1604
1605 // Convert spherical to cartesian
1606 float sin_phi = sinf(phi);
1607 float cos_phi = cosf(phi);
1608 float sin_theta = sinf(theta);
1609 float cos_theta = cosf(theta);
1610
1611 vec3 direction = cone.axis * cos_phi + (right * cos_theta + forward * sin_theta) * sin_phi;
1612 return direction.normalize();
1613}
1614
1615void CollisionDetection::addUnoccupiedNeighbors(int theta, int phi, const AngularBins &bins, std::vector<std::vector<bool>> &visited, std::queue<std::pair<int, int>> &queue) {
1616 // Add 4-connected neighbors (up, down, left, right in bin space)
1617 const int neighbors[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}};
1618
1619 for (int i = 0; i < 4; i++) {
1620 int new_theta = theta + neighbors[i][0];
1621 int new_phi = phi + neighbors[i][1];
1622
1623 // Handle theta wraparound (circular)
1624 if (new_theta < 0)
1625 new_theta += bins.theta_divisions;
1626 if (new_theta >= bins.theta_divisions)
1627 new_theta -= bins.theta_divisions;
1628
1629 // Check phi bounds (no wraparound)
1630 if (new_phi >= 0 && new_phi < bins.phi_divisions) {
1631 if (!visited[new_theta][new_phi] && !bins.isCovered(new_theta, new_phi)) {
1632 queue.push({new_theta, new_phi});
1633 }
1634 }
1635 }
1636}
1637
1638// -------- MAIN RASTERIZED FINDOPTIMALCONEPATH IMPLEMENTATION --------
1639
1640CollisionDetection::OptimalPathResult CollisionDetection::findOptimalConePath(const vec3 &apex, const vec3 &centralAxis, float half_angle, float height, int initialSamples) {
1641
1642 OptimalPathResult result;
1643 result.direction = centralAxis;
1644 result.direction.normalize();
1645 result.collisionCount = 0;
1646 result.confidence = 0.0f;
1647
1648 // Validate input parameters
1649 if (initialSamples <= 0 || half_angle <= 0.0f || half_angle > M_PI) {
1650 if (printmessages) {
1651 std::cerr << "WARNING: Invalid parameters for findOptimalConePath" << std::endl;
1652 }
1653 return result;
1654 }
1655
1656 if (bvh_nodes.empty()) {
1657 // No geometry to collide with, central axis is optimal
1658 result.confidence = 1.0f;
1659 return result;
1660 }
1661
1662 // Use original fish-eye camera gap detection algorithm
1663 std::vector<Gap> detected_gaps = detectGapsInCone(apex, centralAxis, half_angle, height, initialSamples);
1664
1665 if (detected_gaps.empty()) {
1666 // No gaps found, fall back to central axis
1667 // if (printmessages) {
1668 // std::cerr << "WARNING: No gaps detected in cone, using central axis" << std::endl;
1669 // }
1670 result.confidence = 0.1f;
1671 return result;
1672 }
1673
1674 // Score gaps using fish-eye metric
1675 scoreGapsByFishEyeMetric(detected_gaps, centralAxis);
1676
1677 // Find optimal direction toward highest-scoring gap
1678 result.direction = findOptimalGapDirection(detected_gaps, centralAxis);
1679
1680 // Count collisions along optimal direction for reporting using modern ray-tracing
1681 float max_distance = (height > 0.0f) ? height : -1.0f;
1682 HitResult direction_hit = castRay(apex, result.direction, max_distance);
1683 result.collisionCount = direction_hit.hit ? 1 : 0;
1684
1685 // Calculate confidence based on gap quality
1686 if (!detected_gaps.empty()) {
1687 // Higher confidence for larger, well-defined gaps
1688 const Gap &best_gap = detected_gaps[0]; // Assuming first is best after sorting
1689 result.confidence = std::min(1.0f, best_gap.angular_size * 10.0f); // Scale angular size to confidence
1690 }
1691
1692 return result;
1693}
1694
1695#ifdef HELIOS_CUDA_AVAILABLE
1696void CollisionDetection::allocateGPUMemory() {
1697 if (gpu_memory_allocated) {
1698 freeGPUMemory(); // Clean up existing allocation
1699 }
1700
1701 if (bvh_nodes.empty() || primitive_indices.empty()) {
1702 return; // Nothing to allocate
1703 }
1704
1705 // Initialize pointers to nullptr for safety
1706 d_bvh_nodes = nullptr;
1707 d_primitive_indices = nullptr;
1708
1709 // Calculate sizes
1710 size_t bvh_size = bvh_nodes.size() * sizeof(GPUBVHNode);
1711 size_t indices_size = primitive_indices.size() * sizeof(uint);
1712
1713 // Validate sizes are reasonable
1714 if (bvh_size == 0 || indices_size == 0) {
1715 helios_runtime_error("ERROR: Invalid BVH or primitive data sizes for GPU allocation");
1716 }
1717
1718 // Allocate BVH nodes
1719 cudaError_t err = cudaMalloc(&d_bvh_nodes, bvh_size);
1720 if (err != cudaSuccess) {
1721 helios_runtime_error("CUDA error allocating BVH nodes: " + std::string(cudaGetErrorString(err)));
1722 }
1723
1724 // Allocate primitive indices
1725 err = cudaMalloc((void **) &d_primitive_indices, indices_size);
1726 if (err != cudaSuccess) {
1727 cudaFree(d_bvh_nodes);
1728 d_bvh_nodes = nullptr;
1729 helios_runtime_error("CUDA error allocating primitive indices: " + std::string(cudaGetErrorString(err)));
1730 }
1731
1732 // Mark as allocated only after both allocations succeeded
1733 gpu_memory_allocated = true;
1734}
1735#endif
1736
1737#ifdef HELIOS_CUDA_AVAILABLE
1738void CollisionDetection::freeGPUMemory() {
1739 if (!gpu_memory_allocated)
1740 return;
1741
1742 if (d_bvh_nodes) {
1743 cudaFree(d_bvh_nodes);
1744 d_bvh_nodes = nullptr;
1745 }
1746
1747 if (d_primitive_indices) {
1748 cudaFree(d_primitive_indices);
1749 d_primitive_indices = nullptr;
1750 }
1751
1752 gpu_memory_allocated = false;
1753}
1754#endif
1755
1756#ifdef HELIOS_CUDA_AVAILABLE
1757void CollisionDetection::transferBVHToGPU() {
1758 if (!gpu_acceleration_enabled || bvh_nodes.empty()) {
1759 return;
1760 }
1761
1762 // Always reallocate GPU memory to handle size changes
1763 if (gpu_memory_allocated) {
1764 freeGPUMemory();
1765 }
1766 allocateGPUMemory();
1767
1768 // Verify allocation succeeded
1769 if (!gpu_memory_allocated || d_bvh_nodes == nullptr || d_primitive_indices == nullptr) {
1770 helios_runtime_error("ERROR: Failed to allocate GPU memory for BVH transfer");
1771 }
1772
1773 // Convert CPU BVH to GPU format
1774 std::vector<GPUBVHNode> gpu_nodes(bvh_nodes.size());
1775 for (size_t i = 0; i < bvh_nodes.size(); i++) {
1776 const BVHNode &cpu_node = bvh_nodes[i];
1777 GPUBVHNode &gpu_node = gpu_nodes[i];
1778
1779 gpu_node.aabb_min = heliosVecToFloat3(cpu_node.aabb_min);
1780 gpu_node.aabb_max = heliosVecToFloat3(cpu_node.aabb_max);
1781 gpu_node.left_child = cpu_node.left_child;
1782 gpu_node.right_child = cpu_node.right_child;
1783 gpu_node.primitive_start = cpu_node.primitive_start;
1784 gpu_node.primitive_count = cpu_node.primitive_count;
1785 gpu_node.is_leaf = cpu_node.is_leaf ? 1 : 0;
1786 gpu_node.padding = 0;
1787 }
1788
1789 // Transfer to GPU
1790 cudaError_t err = cudaMemcpy(d_bvh_nodes, gpu_nodes.data(), gpu_nodes.size() * sizeof(GPUBVHNode), cudaMemcpyHostToDevice);
1791 if (err != cudaSuccess) {
1792 helios_runtime_error("CUDA error transferring BVH nodes: " + std::string(cudaGetErrorString(err)));
1793 }
1794
1795 err = cudaMemcpy(d_primitive_indices, primitive_indices.data(), primitive_indices.size() * sizeof(uint), cudaMemcpyHostToDevice);
1796 if (err != cudaSuccess) {
1797 helios_runtime_error("CUDA error transferring primitive indices: " + std::string(cudaGetErrorString(err)));
1798 }
1799}
1800#endif
1801
1802void CollisionDetection::markBVHDirty() {
1803 // Clear internal tracking so BVH will be rebuilt on next access
1804 last_processed_uuids.clear();
1805 last_processed_deleted_uuids.clear();
1806 last_bvh_geometry.clear();
1807 bvh_dirty = true;
1808
1809 // Note: Don't clear primitive_cache here - it will be cleared only when
1810 // buildBVH() detects actual primitive set changes, not just geometry updates
1811
1812 // Free GPU memory since BVH will be rebuilt
1813#ifdef HELIOS_CUDA_AVAILABLE
1814 freeGPUMemory();
1815#endif
1816}
1817
1818void CollisionDetection::incrementalUpdateBVH(const std::set<uint> &added_geometry, const std::set<uint> &removed_geometry, const std::set<uint> &final_geometry) {
1819
1820 // Validate new geometries exist first
1821 for (uint uuid: added_geometry) {
1822 if (!context->doesPrimitiveExist(uuid)) {
1823 if (printmessages) {
1824 std::cerr << "Warning: Added primitive " << uuid << " does not exist, falling back to full rebuild" << std::endl;
1825 }
1826 std::vector<uint> final_primitives(final_geometry.begin(), final_geometry.end());
1827 buildBVH(final_primitives);
1828 return;
1829 }
1830 }
1831
1832 // For plant growth scenarios, most changes are additions (new leaves/branches)
1833 // We can optimize for this by caching primitive AABBs and selective reconstruction
1834
1835 // Update primitive AABB cache for new primitives
1836 for (uint uuid: added_geometry) {
1837 updatePrimitiveAABBCache(uuid);
1838 }
1839
1840 // Remove old primitives from cache
1841 for (uint uuid: removed_geometry) {
1842 primitive_aabbs_cache.erase(uuid);
1843 }
1844
1845 // For incremental updates, we use a two-stage approach:
1846 // 1. If the number of changes is small relative to tree size, do targeted insertion
1847 // 2. Otherwise, do optimized rebuild with cached AABBs
1848
1849 size_t total_changes = added_geometry.size() + removed_geometry.size();
1850 size_t current_size = final_geometry.size();
1851
1852 // If changes are very small (<5% of tree), try targeted insertion
1853 if (current_size > 0 && !bvh_nodes.empty() && (float(total_changes) / float(current_size)) < 0.05f) {
1854 // For very small changes, targeted insertion can be beneficial
1855 bool insertion_successful = true;
1856
1857 // Remove primitives from primitive_indices
1858 if (!removed_geometry.empty()) {
1859 primitive_indices.erase(std::remove_if(primitive_indices.begin(), primitive_indices.end(), [&removed_geometry](uint uuid) { return removed_geometry.find(uuid) != removed_geometry.end(); }), primitive_indices.end());
1860 }
1861
1862 // Add new primitives to primitive_indices
1863 for (uint uuid: added_geometry) {
1864 primitive_indices.push_back(uuid);
1865 }
1866
1867 // For small changes, rebuild only affected subtrees by invalidating nodes
1868 // This is more efficient than full rebuild for tiny changes
1869 if (insertion_successful && total_changes < 50) {
1870 if (printmessages) {
1871 std::cout << "Using targeted tree update for " << total_changes << " changes" << std::endl;
1872 }
1873
1874 // Mark BVH as needing rebalance but keep existing structure where possible
1875 // For now, we do a fast rebuild since true incremental tree rebalancing
1876 // requires complex algorithms that may not be worth the complexity
1877 optimizedRebuildBVH(final_geometry);
1878 return;
1879 }
1880 }
1881
1882 // Fall back to optimized full rebuild using cached AABBs
1883 std::vector<uint> final_primitives(final_geometry.begin(), final_geometry.end());
1884 buildBVH(final_primitives);
1885
1886 // Update tracking
1887 last_bvh_geometry = final_geometry;
1888 bvh_dirty = false;
1889 soa_dirty = true; // SoA needs rebuild after BVH change
1890}
1891
1892bool CollisionDetection::validateUUIDs(const std::vector<uint> &UUIDs) const {
1893 bool all_valid = true;
1894 for (uint UUID: UUIDs) {
1895 if (!context->doesPrimitiveExist(UUID)) {
1896 if (printmessages) {
1897 std::cerr << "WARNING (CollisionDetection::validateUUIDs): Primitive UUID " + std::to_string(UUID) + " does not exist - skipping" << std::endl;
1898 }
1899 all_valid = false;
1900 }
1901 }
1902 return all_valid;
1903}
1904
1905bool CollisionDetection::rayPrimitiveIntersection(const vec3 &origin, const vec3 &direction, uint primitive_UUID, float &distance) const {
1906 // Check if primitive exists first
1907 if (!context->doesPrimitiveExist(primitive_UUID)) {
1908 return false;
1909 }
1910
1911 try {
1912 // Get primitive type and vertices
1913 PrimitiveType type = context->getPrimitiveType(primitive_UUID);
1914 std::vector<vec3> vertices = context->getPrimitiveVertices(primitive_UUID);
1915
1916 if (vertices.empty()) {
1917 return false;
1918 }
1919
1920
1921 bool hit = false;
1922 float min_distance = std::numeric_limits<float>::max();
1923
1924 if (type == PRIMITIVE_TYPE_TRIANGLE) {
1925 // Triangle intersection using radiation model algorithm (proven to work)
1926 if (vertices.size() >= 3) {
1927 const vec3 &v0 = vertices[0];
1928 const vec3 &v1 = vertices[1];
1929 const vec3 &v2 = vertices[2];
1930
1931 // Use the same algorithm as radiation model's triangle_intersect
1932 float a = v0.x - v1.x, b = v0.x - v2.x, c = direction.x, d = v0.x - origin.x;
1933 float e = v0.y - v1.y, f = v0.y - v2.y, g = direction.y, h = v0.y - origin.y;
1934 float i = v0.z - v1.z, j = v0.z - v2.z, k = direction.z, l = v0.z - origin.z;
1935
1936 float m = f * k - g * j, n = h * k - g * l, p = f * l - h * j;
1937 float q = g * i - e * k, s = e * j - f * i;
1938
1939 float denom = a * m + b * q + c * s;
1940 if (std::abs(denom) < 1e-8f) {
1941 return false; // Ray is parallel to triangle
1942 }
1943
1944 float inv_denom = 1.0f / denom;
1945
1946 float e1 = d * m - b * n - c * p;
1947 float beta = e1 * inv_denom;
1948
1949 if (beta >= 0.0f) {
1950 float r = e * l - h * i;
1951 float e2 = a * n + d * q + c * r;
1952 float gamma = e2 * inv_denom;
1953
1954 if (gamma >= 0.0f && beta + gamma <= 1.0f) {
1955 float e3 = a * p - b * r + d * s;
1956 float t = e3 * inv_denom;
1957
1958 if (t > 1e-8f && t < min_distance) {
1959 min_distance = t;
1960 hit = true;
1961 }
1962 }
1963 }
1964 }
1965 } else if (type == PRIMITIVE_TYPE_PATCH) {
1966 // Patch (quadrilateral) intersection using radiation model algorithm
1967 if (vertices.size() >= 4) {
1968 const vec3 &v0 = vertices[0];
1969 const vec3 &v1 = vertices[1];
1970 const vec3 &v2 = vertices[2];
1971 const vec3 &v3 = vertices[3];
1972
1973 // Calculate patch vectors and normal (same as radiation model)
1974 vec3 anchor = v0;
1975 vec3 normal = cross(v1 - v0, v2 - v0);
1976 normal.normalize();
1977
1978 vec3 a = v1 - v0; // First edge vector
1979 vec3 b = v3 - v0; // Second edge vector
1980
1981 // Ray-plane intersection
1982 float denom = direction * normal;
1983 if (std::abs(denom) > 1e-8f) { // Not parallel to plane
1984 float t = (anchor - origin) * normal / denom;
1985
1986 if (t > 1e-8f && t < 1e8f) { // Valid intersection distance
1987 // Find intersection point
1988 vec3 p = origin + direction * t;
1989 vec3 d = p - anchor;
1990
1991 // Project onto patch coordinate system
1992 float ddota = d * a;
1993 float ddotb = d * b;
1994
1995 // Check if point is within patch bounds
1996 if (ddota >= 0.0f && ddota <= (a * a) && ddotb >= 0.0f && ddotb <= (b * b)) {
1997
1998 if (t < min_distance) {
1999 min_distance = t;
2000 hit = true;
2001 }
2002 }
2003 }
2004 }
2005 }
2006 } else if (type == PRIMITIVE_TYPE_VOXEL) {
2007 // Voxel (AABB) intersection using slab method
2008 if (vertices.size() == 8) {
2009 // Calculate AABB from 8 vertices
2010 vec3 aabb_min = vertices[0];
2011 vec3 aabb_max = vertices[0];
2012
2013 for (int i = 1; i < 8; i++) {
2014 aabb_min.x = std::min(aabb_min.x, vertices[i].x);
2015 aabb_min.y = std::min(aabb_min.y, vertices[i].y);
2016 aabb_min.z = std::min(aabb_min.z, vertices[i].z);
2017 aabb_max.x = std::max(aabb_max.x, vertices[i].x);
2018 aabb_max.y = std::max(aabb_max.y, vertices[i].y);
2019 aabb_max.z = std::max(aabb_max.z, vertices[i].z);
2020 }
2021
2022 // Ray-AABB intersection using slab method
2023 float t_near = -std::numeric_limits<float>::max();
2024 float t_far = std::numeric_limits<float>::max();
2025
2026 // Check intersection with each slab (X, Y, Z)
2027 for (int i = 0; i < 3; i++) {
2028 float ray_dir_component = (i == 0) ? direction.x : (i == 1) ? direction.y : direction.z;
2029 float ray_orig_component = (i == 0) ? origin.x : (i == 1) ? origin.y : origin.z;
2030 float aabb_min_component = (i == 0) ? aabb_min.x : (i == 1) ? aabb_min.y : aabb_min.z;
2031 float aabb_max_component = (i == 0) ? aabb_max.x : (i == 1) ? aabb_max.y : aabb_max.z;
2032
2033 if (std::abs(ray_dir_component) < 1e-8f) {
2034 // Ray is parallel to slab
2035 if (ray_orig_component < aabb_min_component || ray_orig_component > aabb_max_component) {
2036 return false; // Ray is outside slab and parallel - no intersection
2037 }
2038 } else {
2039 // Calculate intersection distances for this slab
2040 float t1 = (aabb_min_component - ray_orig_component) / ray_dir_component;
2041 float t2 = (aabb_max_component - ray_orig_component) / ray_dir_component;
2042
2043 // Ensure t1 <= t2
2044 if (t1 > t2) {
2045 std::swap(t1, t2);
2046 }
2047
2048 // Update near and far intersection distances
2049 t_near = std::max(t_near, t1);
2050 t_far = std::min(t_far, t2);
2051
2052 // Early exit if no intersection possible
2053 if (t_near > t_far) {
2054 return false;
2055 }
2056 }
2057 }
2058
2059 // Check if intersection is in front of ray origin
2060 if (t_far >= 0.0f && t_near < min_distance) {
2061 // Use t_near if it's positive (ray starts outside box), otherwise t_far (ray starts inside box)
2062 float intersection_distance = (t_near >= 1e-8f) ? t_near : t_far;
2063 if (intersection_distance >= 1e-8f) {
2064 min_distance = intersection_distance;
2065 hit = true;
2066 }
2067 }
2068 }
2069 }
2070
2071 if (hit) {
2072 distance = min_distance;
2073 return true;
2074 }
2075
2076 return false;
2077 } catch (const std::exception &e) {
2078 // Primitive no longer exists or can't be accessed
2079 return false;
2080 }
2081}
2082
2083void CollisionDetection::calculateGridIntersection(const vec3 &grid_center, const vec3 &grid_size, const helios::int3 &grid_divisions, const std::vector<uint> &UUIDs) {
2084 if (printmessages) {
2085 std::cerr << "WARNING: calculateGridIntersection not yet implemented" << std::endl;
2086 }
2087}
2088
2089std::vector<std::vector<std::vector<std::vector<uint>>>> CollisionDetection::getGridCells() {
2090 if (printmessages) {
2091 std::cerr << "WARNING: getGridCells not yet implemented" << std::endl;
2092 }
2093 return {};
2094}
2095
2096std::vector<uint> CollisionDetection::getGridIntersections(int i, int j, int k) {
2097 if (printmessages) {
2098 std::cerr << "WARNING: getGridIntersections not yet implemented" << std::endl;
2099 }
2100 return {};
2101}
2102
2103int CollisionDetection::optimizeLayout(const std::vector<uint> &UUIDs, float learning_rate, int max_iterations) {
2104 if (printmessages) {
2105 std::cerr << "WARNING: optimizeLayout not yet implemented" << std::endl;
2106 }
2107 return 0;
2108}
2109
2110int CollisionDetection::countRayIntersections(const vec3 &origin, const vec3 &direction, float max_distance) {
2111
2112 int intersection_count = 0;
2113
2114 if (bvh_nodes.empty()) {
2115 return intersection_count;
2116 }
2117
2118 // OPTIMIZATION: Minimum distance threshold to avoid self-intersection with nearby geometry
2119 // This prevents plant's own geometry (shoot tips, etc.) from occluding the entire cone view
2120 float min_distance = 0.05f; // 5cm minimum distance - ignore intersections closer than this
2121
2122 // Ensure the BVH is current before traversal
2123 const_cast<CollisionDetection *>(this)->ensureBVHCurrent();
2124
2125 // Stack-based traversal to avoid recursion
2126 std::vector<uint> node_stack;
2127 node_stack.push_back(0); // Start with root node
2128
2129 while (!node_stack.empty()) {
2130 uint node_idx = node_stack.back();
2131 node_stack.pop_back();
2132
2133 if (node_idx >= bvh_nodes.size())
2134 continue;
2135
2136 const BVHNode &node = bvh_nodes[node_idx];
2137
2138 // Test if ray intersects node AABB
2139 float t_min, t_max;
2140 if (!rayAABBIntersect(origin, direction, node.aabb_min, node.aabb_max, t_min, t_max)) {
2141 continue;
2142 }
2143
2144 // Check if intersection is within distance range (both min and max)
2145 if (t_max < min_distance) {
2146 continue; // Entire AABB is too close - skip
2147 }
2148 if (max_distance > 0.0f && t_min > max_distance) {
2149 continue; // Entire AABB is too far - skip
2150 }
2151
2152 if (node.is_leaf) {
2153 // Check each primitive in this leaf for ray intersection
2154 for (uint i = 0; i < node.primitive_count; i++) {
2155 uint primitive_id = primitive_indices[node.primitive_start + i];
2156
2157 // Get this primitive's AABB
2158 if (!context->doesPrimitiveExist(primitive_id)) {
2159 continue; // Skip invalid primitive
2160 }
2161 vec3 prim_min, prim_max;
2162 context->getPrimitiveBoundingBox(primitive_id, prim_min, prim_max);
2163
2164 // Test ray against primitive AABB
2165 float prim_t_min, prim_t_max;
2166 if (rayAABBIntersect(origin, direction, prim_min, prim_max, prim_t_min, prim_t_max)) {
2167 // Check distance constraints (both min and max)
2168 bool within_min_distance = prim_t_min >= min_distance;
2169 bool within_max_distance = (max_distance <= 0.0f) || (prim_t_min <= max_distance);
2170
2171 if (within_min_distance && within_max_distance) {
2172 intersection_count++;
2173 }
2174 }
2175 }
2176 } else {
2177 // Add child nodes to stack for further traversal
2178 if (node.left_child != 0xFFFFFFFF) {
2179 node_stack.push_back(node.left_child);
2180 }
2181 if (node.right_child != 0xFFFFFFFF) {
2182 node_stack.push_back(node.right_child);
2183 }
2184 }
2185 }
2186
2187 return intersection_count;
2188}
2189
2190bool CollisionDetection::findNearestRayIntersection(const vec3 &origin, const vec3 &direction, const std::set<uint> &candidate_UUIDs, float &nearest_distance, float max_distance) {
2191
2192 nearest_distance = std::numeric_limits<float>::max();
2193 bool found_intersection = false;
2194
2195 // Check if we need to traverse both static and dynamic BVHs
2196 bool check_static_bvh = hierarchical_bvh_enabled && static_bvh_valid && !static_bvh_nodes.empty();
2197 bool check_dynamic_bvh = !bvh_nodes.empty();
2198
2199 if (!check_static_bvh && !check_dynamic_bvh) {
2200 return false;
2201 }
2202
2203
2204 // Ensure the BVH is current before traversal
2205 const_cast<CollisionDetection *>(this)->ensureBVHCurrent();
2206
2207 // Lambda function to traverse a BVH and find ray intersections
2208 auto traverseBVH = [&](const std::vector<BVHNode> &nodes, const std::vector<uint> &primitives, const char *bvh_name) {
2209 if (nodes.empty())
2210 return;
2211
2212 // Stack-based traversal to avoid recursion
2213 std::vector<uint> node_stack;
2214 node_stack.push_back(0); // Start with root node
2215
2216 while (!node_stack.empty()) {
2217 uint node_idx = node_stack.back();
2218 node_stack.pop_back();
2219
2220 if (node_idx >= nodes.size()) {
2221 continue;
2222 }
2223
2224 const BVHNode &node = nodes[node_idx];
2225
2226 // Test if ray intersects node AABB
2227 float t_min, t_max;
2228 if (!rayAABBIntersect(origin, direction, node.aabb_min, node.aabb_max, t_min, t_max)) {
2229 continue;
2230 }
2231
2232 // Check if intersection is within distance range
2233 if (max_distance > 0.0f && t_min > max_distance) {
2234 continue; // Entire AABB is too far - skip
2235 }
2236
2237 // If we've already found a closer intersection than this AABB, skip it
2238 if (t_min > nearest_distance) {
2239 continue;
2240 }
2241
2242 if (node.is_leaf) {
2243 // Check each primitive in this leaf for ray intersection
2244 for (uint i = 0; i < node.primitive_count; i++) {
2245 uint primitive_id = primitives[node.primitive_start + i];
2246
2247
2248 // Skip if this primitive is not in the candidate set (unless candidate set is empty)
2249 if (!candidate_UUIDs.empty() && candidate_UUIDs.find(primitive_id) == candidate_UUIDs.end()) {
2250 continue;
2251 }
2252
2253
2254 // Get this primitive's AABB
2255 if (!context->doesPrimitiveExist(primitive_id)) {
2256 continue; // Skip invalid primitive
2257 }
2258
2259 vec3 prim_min, prim_max;
2260 context->getPrimitiveBoundingBox(primitive_id, prim_min, prim_max);
2261
2262 // Test ray against primitive AABB
2263 float prim_t_min, prim_t_max;
2264 if (rayAABBIntersect(origin, direction, prim_min, prim_max, prim_t_min, prim_t_max)) {
2265 // Check distance constraints
2266 bool within_max_distance = (max_distance <= 0.0f) || (prim_t_min <= max_distance);
2267
2268 if (within_max_distance && prim_t_min > 0.0f && prim_t_min < nearest_distance) {
2269 // For now, we use AABB intersection distance as an approximation
2270 // A more accurate implementation would perform exact ray-primitive intersection
2271 nearest_distance = prim_t_min;
2272 found_intersection = true;
2273 }
2274 }
2275 }
2276 } else {
2277 // Add child nodes to stack for further traversal
2278 if (node.left_child != 0xFFFFFFFF) {
2279 node_stack.push_back(node.left_child);
2280 }
2281 if (node.right_child != 0xFFFFFFFF) {
2282 node_stack.push_back(node.right_child);
2283 }
2284 }
2285 } // End of while loop
2286 }; // End of lambda
2287
2288 // First, traverse the static BVH if hierarchical BVH is enabled
2289 if (check_static_bvh) {
2290 traverseBVH(static_bvh_nodes, static_bvh_primitives, "static");
2291 }
2292
2293 // Then, traverse the dynamic BVH
2294 if (check_dynamic_bvh) {
2295 traverseBVH(bvh_nodes, primitive_indices, "dynamic");
2296 }
2297
2298 return found_intersection;
2299}
2300
2301bool CollisionDetection::findNearestPrimitiveDistance(const vec3 &origin, const vec3 &direction, const std::vector<uint> &candidate_UUIDs, float &distance, vec3 &obstacle_direction) {
2302
2303 if (candidate_UUIDs.empty()) {
2304 if (printmessages) {
2305 std::cerr << "WARNING (CollisionDetection::findNearestPrimitiveDistance): No candidate UUIDs provided" << std::endl;
2306 }
2307 return false;
2308 }
2309
2310 // Validate that direction is normalized
2311 float dir_magnitude = direction.magnitude();
2312 if (std::abs(dir_magnitude - 1.0f) > 1e-6f) {
2313 if (printmessages) {
2314 std::cerr << "WARNING (CollisionDetection::findNearestPrimitiveDistance): Direction vector is not normalized (magnitude = " << dir_magnitude << ")" << std::endl;
2315 }
2316 return false;
2317 }
2318
2319
2320 // Filter out invalid UUIDs
2321 std::vector<uint> valid_candidates;
2322 for (uint uuid: candidate_UUIDs) {
2323 if (context->doesPrimitiveExist(uuid)) {
2324 valid_candidates.push_back(uuid);
2325 } else if (printmessages) {
2326 std::cerr << "WARNING (CollisionDetection::findNearestPrimitiveDistance): Skipping invalid UUID " << uuid << std::endl;
2327 }
2328 }
2329
2330
2331 if (valid_candidates.empty()) {
2332 if (printmessages) {
2333 std::cerr << "WARNING (CollisionDetection::findNearestPrimitiveDistance): No valid candidate UUIDs after filtering" << std::endl;
2334 }
2335 return false;
2336 }
2337
2338 float nearest_distance_found = std::numeric_limits<float>::max();
2339 vec3 nearest_obstacle_direction;
2340 bool found_forward_surface = false;
2341
2342 // Check each candidate primitive to find the nearest "forward-facing" surface
2343 for (uint primitive_id: valid_candidates) {
2344 // Get primitive normal and a point on the surface using Context methods
2345 vec3 surface_normal = context->getPrimitiveNormal(primitive_id);
2346 std::vector<vec3> vertices = context->getPrimitiveVertices(primitive_id);
2347
2348 if (vertices.empty()) {
2349 continue; // Skip if no vertices
2350 }
2351
2352 // Use first vertex as a point on the plane
2353 vec3 point_on_plane = vertices[0];
2354
2355 // Calculate distance from origin to the plane
2356 vec3 to_origin = origin - point_on_plane;
2357 float distance_to_plane = to_origin * surface_normal;
2358
2359 // Distance is the absolute value
2360 float surface_distance = std::abs(distance_to_plane);
2361
2362 // The direction from origin to closest point on surface
2363 vec3 surface_direction;
2364 if (distance_to_plane > 0) {
2365 // Origin is on the positive side of the normal - direction to surface is -normal
2366 surface_direction = -surface_normal;
2367 } else {
2368 // Origin is on the negative side of the normal - direction to surface is +normal
2369 surface_direction = surface_normal;
2370 }
2371
2372 // Check if this surface is "in front" using dot product
2373 float dot_product = surface_direction * direction;
2374
2375 if (dot_product > 0.0f) { // Surface is in front of origin
2376 if (surface_distance < nearest_distance_found) {
2377 nearest_distance_found = surface_distance;
2378 nearest_obstacle_direction = surface_direction;
2379 found_forward_surface = true;
2380 }
2381 }
2382 }
2383
2384 if (found_forward_surface) {
2385 distance = nearest_distance_found;
2386 obstacle_direction = nearest_obstacle_direction;
2387 return true;
2388 }
2389
2390
2391 return false;
2392}
2393
2394bool CollisionDetection::findNearestSolidObstacleInCone(const vec3 &apex, const vec3 &axis, float half_angle, float height, const std::vector<uint> &candidate_UUIDs, float &distance, vec3 &obstacle_direction, int num_rays) {
2395
2396 // OPTIMIZATION: Use per-tree BVH if enabled for better scaling
2397 std::vector<uint> effective_candidates;
2398 if (tree_based_bvh_enabled) {
2399 // Get tree-relevant geometry - use proportional distance for spatial filtering
2400 float spatial_filter_distance = height * 1.25f; // 25% buffer beyond cone height
2401 effective_candidates = getRelevantGeometryForTree(apex, candidate_UUIDs, spatial_filter_distance);
2402
2403 } else {
2404 effective_candidates = candidate_UUIDs;
2405 }
2406
2407 if (effective_candidates.empty()) {
2408 return false; // No obstacles within detection range - normal for outer branches
2409 }
2410
2411 // Validate input parameters
2412 if (half_angle <= 0.0f || half_angle > M_PI / 2.0f) {
2413 if (printmessages) {
2414 std::cerr << "WARNING (CollisionDetection::findNearestSolidObstacleInCone): Invalid half_angle " << half_angle << std::endl;
2415 }
2416 return false;
2417 }
2418
2419 if (height <= 0.0f) {
2420 if (printmessages) {
2421 std::cerr << "WARNING (CollisionDetection::findNearestSolidObstacleInCone): Invalid height " << height << std::endl;
2422 }
2423 return false;
2424 }
2425
2426 // Ensure BVH is current
2427 ensureBVHCurrent();
2428
2429 // Check if BVH is empty
2430 if (bvh_nodes.empty()) {
2431 return false; // No geometry to collide with
2432 }
2433
2434 // Convert effective candidate UUIDs to set for efficient lookup
2435 std::set<uint> candidate_set(effective_candidates.begin(), effective_candidates.end());
2436
2437 // Generate ray directions within the cone
2438 std::vector<vec3> ray_directions = sampleDirectionsInCone(apex, axis, half_angle, num_rays);
2439
2440 float nearest_distance = std::numeric_limits<float>::max();
2441 vec3 nearest_direction;
2442 bool found_obstacle = false;
2443
2444 // Use modern batch ray-casting for better performance
2445 std::vector<RayQuery> ray_queries;
2446 ray_queries.reserve(ray_directions.size());
2447
2448 for (const vec3 &ray_dir: ray_directions) {
2449 ray_queries.emplace_back(apex, ray_dir, height, candidate_UUIDs);
2450 }
2451
2452 // Cast all rays in batch - automatically selects CPU/GPU based on count
2453 RayTracingStats ray_stats;
2454 std::vector<HitResult> hit_results = castRays(ray_queries, &ray_stats);
2455
2456 // Find the nearest obstacle from all ray results
2457 for (size_t i = 0; i < hit_results.size(); ++i) {
2458 const HitResult &result = hit_results[i];
2459
2460 if (result.hit && result.distance < nearest_distance) {
2461 nearest_distance = result.distance;
2462 nearest_direction = ray_directions[i];
2463 found_obstacle = true;
2464 }
2465 }
2466
2467 if (found_obstacle) {
2468 distance = nearest_distance;
2469 obstacle_direction = nearest_direction;
2470 return true;
2471 }
2472
2473 return false;
2474}
2475
2476bool CollisionDetection::findNearestSolidObstacleInCone(const vec3 &apex, const vec3 &axis, float half_angle, float height, const std::vector<uint> &candidate_UUIDs, const std::vector<uint> &plant_primitives, float &distance,
2477 vec3 &obstacle_direction, int num_rays) {
2478
2479 // OPTIMIZATION: Use per-tree BVH with plant primitive identification for better scaling
2480 std::vector<uint> effective_candidates;
2481 if (tree_based_bvh_enabled) {
2482 // Get tree-relevant geometry using plant primitives to identify the querying tree
2483 effective_candidates = getRelevantGeometryForTree(apex, plant_primitives, height);
2484
2485 if (printmessages && !effective_candidates.empty()) {
2486 std::cout << "Per-tree findNearestSolidObstacleInCone: Using " << effective_candidates.size() << " relevant targets instead of " << candidate_UUIDs.size() << " total targets" << std::endl;
2487 }
2488 } else {
2489 effective_candidates = candidate_UUIDs;
2490 }
2491
2492 if (effective_candidates.empty()) {
2493 return false; // No obstacles within detection range - normal for outer branches
2494 }
2495
2496 // Validate input parameters
2497 if (half_angle <= 0.0f || half_angle > M_PI / 2.0f) {
2498 if (printmessages) {
2499 std::cerr << "WARNING (CollisionDetection::findNearestSolidObstacleInCone): Invalid half_angle " << half_angle << std::endl;
2500 }
2501 return false;
2502 }
2503
2504 if (height <= 0.0f) {
2505 if (printmessages) {
2506 std::cerr << "WARNING (CollisionDetection::findNearestSolidObstacleInCone): Invalid height " << height << std::endl;
2507 }
2508 return false;
2509 }
2510
2511 // Ensure BVH is current
2512 ensureBVHCurrent();
2513
2514 // Check if BVH is empty
2515 if (bvh_nodes.empty()) {
2516 return false; // No geometry to collide with
2517 }
2518
2519 // Convert effective candidate UUIDs to set for efficient lookup
2520 std::set<uint> candidate_set(effective_candidates.begin(), effective_candidates.end());
2521
2522 // Generate ray directions within the cone
2523 std::vector<vec3> ray_directions = sampleDirectionsInCone(apex, axis, half_angle, num_rays);
2524
2525 float nearest_distance = std::numeric_limits<float>::max();
2526 vec3 nearest_direction;
2527 bool found_obstacle = false;
2528
2529 // Use modern batch ray-casting for better performance
2530 std::vector<RayQuery> ray_queries;
2531 ray_queries.reserve(ray_directions.size());
2532
2533 for (const vec3 &ray_dir: ray_directions) {
2534 ray_queries.emplace_back(apex, ray_dir, height, effective_candidates);
2535 }
2536
2537 // Cast all rays in batch - automatically selects CPU/GPU based on count
2538 RayTracingStats ray_stats;
2539 std::vector<HitResult> hit_results = castRays(ray_queries, &ray_stats);
2540
2541 // Find the nearest obstacle from all ray results
2542 for (size_t i = 0; i < hit_results.size(); ++i) {
2543 const HitResult &result = hit_results[i];
2544
2545 if (result.hit && result.distance < nearest_distance) {
2546 nearest_distance = result.distance;
2547 nearest_direction = ray_directions[i];
2548 found_obstacle = true;
2549 }
2550 }
2551
2552 if (found_obstacle) {
2553 distance = nearest_distance;
2554 obstacle_direction = nearest_direction;
2555 return true;
2556 }
2557
2558 return false;
2559}
2560
2561
2562std::vector<helios::vec3> CollisionDetection::sampleDirectionsInCone(const vec3 &apex, const vec3 &central_axis, float half_angle, int num_samples) {
2563
2564 std::vector<vec3> directions;
2565 directions.reserve(num_samples);
2566
2567 if (num_samples <= 0 || half_angle <= 0.0f) {
2568 return directions;
2569 }
2570
2571 // Normalize the central axis
2572 vec3 axis = central_axis;
2573 axis.normalize();
2574
2575 // Create an orthonormal basis with the central axis as the primary axis
2576 vec3 u, v;
2577 if (std::abs(axis.z) < 0.9f) {
2578 u = cross(axis, make_vec3(0, 0, 1));
2579 } else {
2580 u = cross(axis, make_vec3(1, 0, 0));
2581 }
2582 u.normalize();
2583 v = cross(axis, u);
2584 v.normalize();
2585
2586 // Generate uniform samples within the cone using rejection sampling on hemisphere
2587 std::random_device rd;
2588 std::mt19937 gen(rd());
2589 std::uniform_real_distribution<float> uniform_dist(0.0f, 1.0f);
2590
2591 int samples_generated = 0;
2592 int max_attempts = num_samples * 10; // Limit attempts to prevent infinite loops
2593 int attempts = 0;
2594
2595 while (samples_generated < num_samples && attempts < max_attempts) {
2596 attempts++;
2597
2598 // Generate uniform sample on unit hemisphere using spherical coordinates
2599 float u1 = uniform_dist(gen);
2600 float u2 = uniform_dist(gen);
2601
2602 // Use stratified sampling for better distribution
2603 if (samples_generated > 0) {
2604 float stratum_u1 = (float) samples_generated / (float) num_samples;
2605 float stratum_u2 = uniform_dist(gen);
2606 u1 = (stratum_u1 + u1 / (float) num_samples);
2607 if (u1 > 1.0f)
2608 u1 -= 1.0f;
2609 }
2610
2611 // Convert to spherical coordinates
2612 // For uniform sampling within cone, we need:
2613 // cos(theta) uniformly distributed between cos(half_angle) and 1
2614 float cos_half_angle = cosf(half_angle);
2615 float cos_theta = cos_half_angle + u1 * (1.0f - cos_half_angle);
2616 float sin_theta = sqrtf(1.0f - cos_theta * cos_theta);
2617 float phi = 2.0f * M_PI * u2;
2618
2619 // Convert to Cartesian coordinates in local coordinate system
2620 float x = sin_theta * cosf(phi);
2621 float y = sin_theta * sinf(phi);
2622 float z = cos_theta;
2623
2624 // Transform from local coordinates to world coordinates
2625 vec3 local_direction = make_vec3(x, y, z);
2626 vec3 world_direction = u * local_direction.x + v * local_direction.y + axis * local_direction.z;
2627 world_direction.normalize();
2628
2629 // Verify the direction is within the cone (numerical precision check)
2630 float dot_product = world_direction * axis;
2631 if (dot_product >= cos_half_angle - 1e-6f) {
2632 directions.push_back(world_direction);
2633 samples_generated++;
2634 }
2635 }
2636
2637 // If we couldn't generate enough samples, fill with the central axis
2638 while (directions.size() < (size_t) num_samples) {
2639 directions.push_back(axis);
2640 }
2641
2642 return directions;
2643}
2644
2645
2646// -------- SPATIAL GRID OPTIMIZATION --------
2647
2648std::vector<uint> CollisionDetection::getCandidatesUsingSpatialGrid(const Cone &cone, const vec3 &apex, const vec3 &central_axis, float half_angle, float height) {
2649 // Get cone AABB for grid cell determination
2650 vec3 cone_base = apex + central_axis * height;
2651 float cone_base_radius = height * tan(half_angle);
2652
2653 vec3 cone_aabb_min = vec3(std::min(apex.x, cone_base.x - cone_base_radius), std::min(apex.y, cone_base.y - cone_base_radius), std::min(apex.z, cone_base.z - cone_base_radius));
2654 vec3 cone_aabb_max = vec3(std::max(apex.x, cone_base.x + cone_base_radius), std::max(apex.y, cone_base.y + cone_base_radius), std::max(apex.z, cone_base.z + cone_base_radius));
2655
2656 // Use BVH primitive indices approach for better performance
2657 std::vector<uint> all_primitives;
2658 if (!primitive_indices.empty()) {
2659 all_primitives = primitive_indices; // Use BVH primitive indices
2660 } else {
2661 // Fallback: get all primitives from context (slower)
2662 all_primitives = context->getAllUUIDs();
2663 }
2664
2665 // Use existing filterPrimitivesParallel method with cone object
2666 return filterPrimitivesParallel(cone, all_primitives);
2667}
2668
2669// -------- HYBRID BVH + RASTERIZATION METHODS --------
2670
2671std::vector<uint> CollisionDetection::getCandidatePrimitivesInCone(const vec3 &apex, const vec3 &central_axis, float half_angle, float height) {
2672 std::vector<uint> candidates;
2673
2674 // Create cone object for filtering
2675 Cone cone{apex, central_axis, half_angle, height};
2676
2677 // OPTIMIZATION: Use spatial grid to avoid O(N) scaling with tree count
2678 candidates = getCandidatesUsingSpatialGrid(cone, apex, central_axis, half_angle, height);
2679
2680 return candidates;
2681}
2682
2683
2684// -------- GAP DETECTION IMPLEMENTATION --------
2685
2686std::vector<CollisionDetection::Gap> CollisionDetection::detectGapsInCone(const vec3 &apex, const vec3 &central_axis, float half_angle, float height, int num_samples) {
2687
2688 std::vector<Gap> gaps;
2689
2690 // Generate dense ray samples within the cone
2691 std::vector<vec3> sample_directions = sampleDirectionsInCone(apex, central_axis, half_angle, num_samples);
2692
2693 if (sample_directions.empty()) {
2694 return gaps;
2695 }
2696
2697 // Build ray sample data using modern ray-tracing API
2698 std::vector<RaySample> ray_samples;
2699 ray_samples.reserve(sample_directions.size());
2700
2701 float max_distance = (height > 0.0f) ? height : -1.0f;
2702
2703 // Use batch ray casting for better performance
2704 std::vector<RayQuery> ray_queries;
2705 ray_queries.reserve(sample_directions.size());
2706
2707 for (const vec3 &direction: sample_directions) {
2708 ray_queries.emplace_back(apex, direction, max_distance);
2709 }
2710
2711 // Cast all rays in batch - automatically selects CPU/GPU based on count
2712 RayTracingStats ray_stats;
2713 std::vector<HitResult> hit_results = castRays(ray_queries, &ray_stats);
2714
2715 // Process results to build ray samples
2716 for (size_t i = 0; i < hit_results.size(); ++i) {
2717 RaySample sample;
2718 sample.direction = sample_directions[i];
2719
2720 if (hit_results[i].hit) {
2721 sample.distance = hit_results[i].distance;
2722 sample.is_free = false;
2723 } else {
2724 sample.distance = (max_distance > 0.0f) ? max_distance : 1000.0f;
2725 sample.is_free = true;
2726 }
2727
2728 ray_samples.push_back(sample);
2729 }
2730
2731
2732 // Use a more sophisticated gap detection approach based on contiguous free regions
2733 // First, sort samples by angular position relative to central axis to identify contiguous regions
2734 std::vector<std::pair<float, size_t>> angular_positions;
2735 for (size_t i = 0; i < ray_samples.size(); ++i) {
2736 if (ray_samples[i].is_free) {
2737 // Calculate angular position in cone-relative coordinates
2738 float dot_product = ray_samples[i].direction * central_axis;
2739 dot_product = std::max(-1.0f, std::min(1.0f, dot_product));
2740 float angular_from_center = acosf(dot_product);
2741 angular_positions.push_back({angular_from_center, i});
2742 }
2743 }
2744
2745 if (angular_positions.empty()) {
2746 return gaps; // No free samples
2747 }
2748
2749 // Sort by angular position
2750 std::sort(angular_positions.begin(), angular_positions.end());
2751
2752 // Find contiguous free regions (gaps) with minimum size threshold
2753 std::vector<bool> processed(ray_samples.size(), false);
2754 float min_gap_angular_size = half_angle * 0.05f; // 5% of cone angle minimum gap size
2755
2756 for (size_t start = 0; start < angular_positions.size(); ++start) {
2757 size_t start_idx = angular_positions[start].second;
2758 if (processed[start_idx])
2759 continue;
2760
2761 Gap new_gap;
2762 new_gap.sample_indices.push_back(start_idx);
2763 processed[start_idx] = true;
2764
2765 // Extend gap by finding nearby free samples using k-nearest neighbor approach
2766 std::vector<float> distances_to_start;
2767 for (size_t j = 0; j < ray_samples.size(); ++j) {
2768 if (j != start_idx && ray_samples[j].is_free && !processed[j]) {
2769 float dot_product = ray_samples[start_idx].direction * ray_samples[j].direction;
2770 dot_product = std::max(-1.0f, std::min(1.0f, dot_product));
2771 float angular_distance = acosf(dot_product);
2772 distances_to_start.push_back(angular_distance);
2773 } else {
2774 distances_to_start.push_back(999.0f); // Large value for excluded samples
2775 }
2776 }
2777
2778 // Add nearby samples to gap using adaptive threshold
2779 float sample_density = 2.0f * half_angle / sqrtf((float) num_samples);
2780 float adaptive_threshold = sample_density * 3.0f; // 3x sample spacing for connection
2781
2782 for (size_t j = 0; j < ray_samples.size(); ++j) {
2783 if (j != start_idx && ray_samples[j].is_free && !processed[j] && distances_to_start[j] < adaptive_threshold) {
2784 new_gap.sample_indices.push_back(j);
2785 processed[j] = true;
2786 }
2787 }
2788
2789 // Only keep gaps that meet minimum size requirements
2790 if (new_gap.sample_indices.size() >= 5) { // Require at least 5 samples for a valid gap
2791 gaps.push_back(new_gap);
2792 }
2793 }
2794
2795 // Calculate gap properties
2796 for (Gap &gap: gaps) {
2797 // Calculate gap center direction (average of constituent directions)
2798 vec3 center(0, 0, 0);
2799 for (int idx: gap.sample_indices) {
2800 center = center + ray_samples[idx].direction;
2801 }
2802 center = center / (float) gap.sample_indices.size();
2803 center.normalize();
2804 gap.center_direction = center;
2805
2806 // Calculate angular size
2807 std::vector<RaySample> gap_samples;
2808 for (int idx: gap.sample_indices) {
2809 gap_samples.push_back(ray_samples[idx]);
2810 }
2811 gap.angular_size = calculateGapAngularSize(gap_samples, central_axis);
2812
2813 // Calculate angular distance from central axis
2814 float dot_product = gap.center_direction * central_axis;
2815 dot_product = std::max(-1.0f, std::min(1.0f, dot_product));
2816 gap.angular_distance = acosf(dot_product);
2817 }
2818
2819 // OPTIMIZATION: Spatial filtering - remove gaps that are too far from central axis
2820 // This reduces the number of gaps passed to the expensive scoring function
2821 if (gaps.size() > 10) {
2822 float max_angular_distance = half_angle * 0.8f; // Only consider gaps within 80% of cone angle
2823
2824 auto it = std::remove_if(gaps.begin(), gaps.end(), [max_angular_distance](const Gap &gap) { return gap.angular_distance > max_angular_distance; });
2825
2826 gaps.erase(it, gaps.end());
2827
2828 // If filtering removed too many gaps, keep at least the closest ones
2829 if (gaps.size() < 3 && gaps.size() > 0) {
2830 // Sort by angular distance and keep the closest ones
2831 std::partial_sort(gaps.begin(), gaps.begin() + std::min(size_t(3), gaps.size()), gaps.end(), [](const Gap &a, const Gap &b) { return a.angular_distance < b.angular_distance; });
2832 }
2833 }
2834
2835 return gaps;
2836}
2837
2838
2839float CollisionDetection::calculateGapAngularSize(const std::vector<RaySample> &gap_samples, const vec3 &central_axis) {
2840
2841 if (gap_samples.empty()) {
2842 return 0.0f;
2843 }
2844
2845 // Find the angular extent of the gap by finding min/max angles
2846 float min_angle = M_PI;
2847 float max_angle = 0.0f;
2848
2849 for (const RaySample &sample: gap_samples) {
2850 float dot_product = sample.direction * central_axis;
2851 dot_product = std::max(-1.0f, std::min(1.0f, dot_product));
2852 float angle = acosf(dot_product);
2853
2854 min_angle = std::min(min_angle, angle);
2855 max_angle = std::max(max_angle, angle);
2856 }
2857
2858 // Simple approximation: angular size as solid angle
2859 float angular_width = max_angle - min_angle;
2860
2861 // Convert to approximate solid angle (steradians)
2862 // This is a rough approximation: solid_angle ≈ π * (angular_width)^2
2863 float solid_angle = M_PI * angular_width * angular_width;
2864
2865 return solid_angle;
2866}
2867
2868void CollisionDetection::scoreGapsByFishEyeMetric(std::vector<Gap> &gaps, const vec3 &central_axis) {
2869
2870 // Early exit for small gap counts - full sort is fine
2871 if (gaps.size() <= 10) {
2872 for (Gap &gap: gaps) {
2873 // Fish-eye metric: prefer larger gaps closer to central axis
2874 // Gap size component (logarithmic scaling for larger gaps)
2875 float size_score = log(1.0f + gap.angular_size * 100.0f); // Scale up angular size
2876 // Distance penalty (exponential penalty for gaps far from center)
2877 float distance_penalty = exp(gap.angular_distance * 2.0f);
2878 // Combined score (higher is better)
2879 gap.score = size_score / distance_penalty;
2880 }
2881 // Sort gaps by score (highest first)
2882 std::sort(gaps.begin(), gaps.end(), [](const Gap &a, const Gap &b) { return a.score > b.score; });
2883 return;
2884 }
2885
2886 // OPTIMIZATION: For large gap counts, use partial sorting
2887 // We only need the top 3-5 gaps for collision avoidance
2888 const size_t max_gaps_needed = std::min(size_t(5), gaps.size());
2889
2890 // Calculate scores for all gaps
2891 for (Gap &gap: gaps) {
2892 // Fish-eye metric: prefer larger gaps closer to central axis
2893
2894 // Gap size component (logarithmic scaling for larger gaps)
2895 float size_score = log(1.0f + gap.angular_size * 100.0f); // Scale up angular size
2896
2897 // Distance penalty (exponential penalty for gaps far from center)
2898 float distance_penalty = exp(gap.angular_distance * 2.0f);
2899
2900 // Combined score (higher is better)
2901 gap.score = size_score / distance_penalty;
2902 }
2903
2904 // Use partial_sort to get only the top N gaps - O(n log k) instead of O(n log n)
2905 std::partial_sort(gaps.begin(), gaps.begin() + max_gaps_needed, gaps.end(), [](const Gap &a, const Gap &b) { return a.score > b.score; });
2906
2907 // Resize to keep only the top gaps to avoid processing unnecessary gaps later
2908 gaps.resize(max_gaps_needed);
2909}
2910
2911helios::vec3 CollisionDetection::findOptimalGapDirection(const std::vector<Gap> &gaps, const vec3 &central_axis) {
2912
2913 if (gaps.empty()) {
2914 // No gaps found, return central axis
2915 vec3 result = central_axis;
2916 result.normalize();
2917 return result;
2918 }
2919
2920 // Return direction toward the highest-scoring gap
2921 const Gap &best_gap = gaps[0];
2922 return best_gap.center_direction;
2923}
2924
2925// -------- SPATIAL OPTIMIZATION METHODS --------
2926
2927std::vector<std::pair<uint, uint>> CollisionDetection::findCollisionsWithinDistance(const std::vector<uint> &query_UUIDs, const std::vector<uint> &target_UUIDs, float max_distance) {
2928
2929 std::vector<std::pair<uint, uint>> collision_pairs;
2930
2931
2932 // Update primitive centroids cache for target geometry
2933 for (uint target_id: target_UUIDs) {
2934 if (primitive_centroids_cache.find(target_id) == primitive_centroids_cache.end()) {
2935 // Calculate and cache centroid for this primitive
2936 std::vector<vec3> vertices = context->getPrimitiveVertices(target_id);
2937 if (!vertices.empty()) {
2938 vec3 centroid = make_vec3(0, 0, 0);
2939 for (const vec3 &vertex: vertices) {
2940 centroid = centroid + vertex;
2941 }
2942 centroid = centroid / float(vertices.size());
2943 primitive_centroids_cache[target_id] = centroid;
2944 }
2945 }
2946 }
2947
2948 // For each query primitive, find nearby targets within distance
2949 for (uint query_id: query_UUIDs) {
2950 // Get query centroid
2951 std::vector<vec3> query_vertices = context->getPrimitiveVertices(query_id);
2952 if (query_vertices.empty())
2953 continue;
2954
2955 vec3 query_centroid = make_vec3(0, 0, 0);
2956 for (const vec3 &vertex: query_vertices) {
2957 query_centroid = query_centroid + vertex;
2958 }
2959 query_centroid = query_centroid / float(query_vertices.size());
2960
2961 // Check distance to each target
2962 for (uint target_id: target_UUIDs) {
2963 if (query_id == target_id)
2964 continue; // Skip self-collision
2965
2966 auto target_centroid_it = primitive_centroids_cache.find(target_id);
2967 if (target_centroid_it != primitive_centroids_cache.end()) {
2968 vec3 target_centroid = target_centroid_it->second;
2969 float distance = (query_centroid - target_centroid).magnitude();
2970
2971 if (distance <= max_distance) {
2972 // Within distance threshold, check for actual collision
2973 std::vector<uint> single_query = {query_id};
2974 std::vector<uint> single_target = {target_id};
2975 std::vector<uint> empty_objects;
2976
2977 std::vector<uint> collisions = findCollisions(single_query, empty_objects, single_target, empty_objects);
2978 if (!collisions.empty()) {
2979 collision_pairs.push_back(std::make_pair(query_id, target_id));
2980 }
2981 }
2982 }
2983 }
2984 }
2985
2986 return collision_pairs;
2987}
2988
2990 if (distance <= 0.0f) {
2991 helios_runtime_error("ERROR (CollisionDetection::setMaxCollisionDistance): Distance must be positive");
2992 }
2993
2994 max_collision_distance = distance;
2995}
2996
2998 return max_collision_distance;
2999}
3000
3001std::vector<uint> CollisionDetection::filterGeometryByDistance(const helios::vec3 &query_center, float max_radius, const std::vector<uint> &candidate_UUIDs) {
3002
3003 std::vector<uint> filtered_UUIDs;
3004
3005 // Get list of candidates (either provided or all primitives)
3006 std::vector<uint> candidates;
3007 if (candidate_UUIDs.empty()) {
3008 candidates = context->getAllUUIDs();
3009 } else {
3010 candidates = candidate_UUIDs;
3011 }
3012
3013 // Filter candidates by distance
3014 for (uint candidate_id: candidates) {
3015 // Skip if primitive doesn't exist
3016 if (!context->doesPrimitiveExist(candidate_id)) {
3017 continue;
3018 }
3019
3020 // Get primitive centroid (calculate if not cached)
3021 vec3 centroid;
3022 auto cache_it = primitive_centroids_cache.find(candidate_id);
3023 if (cache_it != primitive_centroids_cache.end()) {
3024 centroid = cache_it->second;
3025 } else {
3026 // Calculate and cache centroid
3027 std::vector<vec3> vertices = context->getPrimitiveVertices(candidate_id);
3028 if (vertices.empty())
3029 continue;
3030
3031 centroid = make_vec3(0, 0, 0);
3032 for (const vec3 &vertex: vertices) {
3033 centroid = centroid + vertex;
3034 }
3035 centroid = centroid / float(vertices.size());
3036 primitive_centroids_cache[candidate_id] = centroid;
3037 }
3038
3039 // Check distance from query center
3040 float distance = (query_center - centroid).magnitude();
3041 if (distance <= max_radius) {
3042 filtered_UUIDs.push_back(candidate_id);
3043 }
3044 }
3045
3046 return filtered_UUIDs;
3047}
3048
3049// -------- VOXEL RAY PATH LENGTH CALCULATIONS --------
3050
3051void CollisionDetection::calculateVoxelRayPathLengths(const vec3 &grid_center, const vec3 &grid_size, const helios::int3 &grid_divisions, const std::vector<vec3> &ray_origins, const std::vector<vec3> &ray_directions) {
3052
3053 if (ray_origins.size() != ray_directions.size()) {
3054 helios_runtime_error("ERROR (CollisionDetection::calculateVoxelRayPathLengths): ray_origins and ray_directions vectors must have same size");
3055 }
3056
3057 if (ray_origins.empty()) {
3058 if (printmessages) {
3059 std::cerr << "WARNING (CollisionDetection::calculateVoxelRayPathLengths): No rays provided" << std::endl;
3060 }
3061 return;
3062 }
3063
3064 // Initialize voxel data structures for the given grid
3065 initializeVoxelData(grid_center, grid_size, grid_divisions);
3066
3067 // Ensure BVH and primitive cache are built before parallel section
3068 // This prevents thread-safety issues when multiple threads try to build them
3069 ensureBVHCurrent();
3070 if (primitive_cache.empty()) {
3071 buildPrimitiveCache();
3072 }
3073
3074 // Choose GPU or CPU implementation based on acceleration setting
3075#ifdef HELIOS_CUDA_AVAILABLE
3077 calculateVoxelRayPathLengths_GPU(ray_origins, ray_directions);
3078 } else {
3079 calculateVoxelRayPathLengths_CPU(ray_origins, ray_directions);
3080 }
3081#else
3082 calculateVoxelRayPathLengths_CPU(ray_origins, ray_directions);
3083#endif
3084}
3085
3086void CollisionDetection::setVoxelTransmissionProbability(int P_denom, int P_trans, const helios::int3 &ijk) {
3087 if (!validateVoxelIndices(ijk)) {
3088 helios_runtime_error("ERROR (CollisionDetection::setVoxelTransmissionProbability): Invalid voxel indices");
3089 }
3090
3091 if (!voxel_data_initialized) {
3092 helios_runtime_error("ERROR (CollisionDetection::setVoxelTransmissionProbability): Voxel data not initialized. Call calculateVoxelRayPathLengths first.");
3093 }
3094
3095 if (use_flat_arrays) {
3096 size_t flat_idx = flatIndex(ijk);
3097 voxel_ray_counts_flat[flat_idx] = P_denom;
3098 voxel_transmitted_flat[flat_idx] = P_trans;
3099 } else {
3100 voxel_ray_counts[ijk.x][ijk.y][ijk.z] = P_denom;
3101 voxel_transmitted[ijk.x][ijk.y][ijk.z] = P_trans;
3102 }
3103}
3104
3105void CollisionDetection::getVoxelTransmissionProbability(const helios::int3 &ijk, int &P_denom, int &P_trans) const {
3106 if (!validateVoxelIndices(ijk)) {
3107 helios_runtime_error("ERROR (CollisionDetection::getVoxelTransmissionProbability): Invalid voxel indices");
3108 }
3109
3110 if (!voxel_data_initialized) {
3111 P_denom = 0;
3112 P_trans = 0;
3113 return;
3114 }
3115
3116 if (use_flat_arrays) {
3117 size_t flat_idx = flatIndex(ijk);
3118 P_denom = voxel_ray_counts_flat[flat_idx];
3119 P_trans = voxel_transmitted_flat[flat_idx];
3120 } else {
3121 P_denom = voxel_ray_counts[ijk.x][ijk.y][ijk.z];
3122 P_trans = voxel_transmitted[ijk.x][ijk.y][ijk.z];
3123 }
3124}
3125
3126void CollisionDetection::setVoxelRbar(float r_bar, const helios::int3 &ijk) {
3127 if (!validateVoxelIndices(ijk)) {
3128 helios_runtime_error("ERROR (CollisionDetection::setVoxelRbar): Invalid voxel indices");
3129 }
3130
3131 if (!voxel_data_initialized) {
3132 helios_runtime_error("ERROR (CollisionDetection::setVoxelRbar): Voxel data not initialized. Call calculateVoxelRayPathLengths first.");
3133 }
3134
3135 if (use_flat_arrays) {
3136 size_t flat_idx = flatIndex(ijk);
3137 // Store r_bar * ray_count so that getVoxelRbar returns the correct value when it divides
3138 int ray_count = voxel_ray_counts_flat[flat_idx];
3139 if (ray_count == 0) {
3140 ray_count = 1; // Set to 1 to avoid division by zero
3141 voxel_ray_counts_flat[flat_idx] = 1;
3142 }
3143 voxel_path_lengths_flat[flat_idx] = r_bar * static_cast<float>(ray_count);
3144 } else {
3145 // Store r_bar * ray_count so that getVoxelRbar returns the correct value when it divides
3146 int ray_count = voxel_ray_counts[ijk.x][ijk.y][ijk.z];
3147 if (ray_count == 0) {
3148 ray_count = 1; // Set to 1 to avoid division by zero
3149 voxel_ray_counts[ijk.x][ijk.y][ijk.z] = 1;
3150 }
3151 voxel_path_lengths[ijk.x][ijk.y][ijk.z] = r_bar * static_cast<float>(ray_count);
3152 }
3153}
3154
3156 if (!validateVoxelIndices(ijk)) {
3157 helios_runtime_error("ERROR (CollisionDetection::getVoxelRbar): Invalid voxel indices");
3158 }
3159
3160 if (!voxel_data_initialized) {
3161 return 0.0f;
3162 }
3163
3164 if (use_flat_arrays) {
3165 size_t flat_idx = flatIndex(ijk);
3166 int ray_count = voxel_ray_counts_flat[flat_idx];
3167 if (ray_count == 0) {
3168 return 0.0f;
3169 }
3170 // If this was set directly via setVoxelRbar, return it as-is
3171 // If this accumulated from ray calculations, compute the average
3172 return voxel_path_lengths_flat[flat_idx] / static_cast<float>(ray_count);
3173 } else {
3174 int ray_count = voxel_ray_counts[ijk.x][ijk.y][ijk.z];
3175 if (ray_count == 0) {
3176 return 0.0f;
3177 }
3178 // If this was set directly via setVoxelRbar, return it as-is
3179 // If this accumulated from ray calculations, compute the average
3180 return voxel_path_lengths[ijk.x][ijk.y][ijk.z] / static_cast<float>(ray_count);
3181 }
3182}
3183
3184void CollisionDetection::getVoxelRayHitCounts(const helios::int3 &ijk, int &hit_before, int &hit_after, int &hit_inside) const {
3185 if (!validateVoxelIndices(ijk)) {
3186 helios_runtime_error("ERROR (CollisionDetection::getVoxelRayHitCounts): Invalid voxel indices");
3187 }
3188
3189 if (!voxel_data_initialized) {
3190 hit_before = 0;
3191 hit_after = 0;
3192 hit_inside = 0;
3193 return;
3194 }
3195
3196 if (use_flat_arrays) {
3197 size_t flat_idx = flatIndex(ijk);
3198 hit_before = voxel_hit_before_flat[flat_idx];
3199 hit_after = voxel_hit_after_flat[flat_idx];
3200 hit_inside = voxel_hit_inside_flat[flat_idx];
3201 } else {
3202 hit_before = voxel_hit_before[ijk.x][ijk.y][ijk.z];
3203 hit_after = voxel_hit_after[ijk.x][ijk.y][ijk.z];
3204 hit_inside = voxel_hit_inside[ijk.x][ijk.y][ijk.z];
3205 }
3206}
3207
3208std::vector<float> CollisionDetection::getVoxelRayPathLengths(const helios::int3 &ijk) const {
3209 if (!validateVoxelIndices(ijk)) {
3210 helios_runtime_error("ERROR (CollisionDetection::getVoxelRayPathLengths): Invalid voxel indices");
3211 }
3212
3213 if (!voxel_data_initialized) {
3214 return std::vector<float>();
3215 }
3216
3217 if (use_flat_arrays) {
3218 // Use flat array structure with offsets
3219 size_t flat_idx = flatIndex(ijk);
3220
3221 // Bounds checking for flat array access
3222 if (flat_idx >= voxel_individual_path_offsets.size() || flat_idx >= voxel_individual_path_counts.size()) {
3223 return std::vector<float>();
3224 }
3225
3226 size_t offset = voxel_individual_path_offsets[flat_idx];
3227 size_t count = voxel_individual_path_counts[flat_idx];
3228
3229 // Additional bounds checking for the data array
3230 if (count == 0 || offset + count > voxel_individual_path_lengths_flat.size()) {
3231 return std::vector<float>();
3232 }
3233
3234 std::vector<float> result;
3235 result.reserve(count);
3236
3237 for (size_t i = 0; i < count; ++i) {
3238 result.push_back(voxel_individual_path_lengths_flat[offset + i]);
3239 }
3240
3241 return result;
3242 } else {
3243 return voxel_individual_path_lengths[ijk.x][ijk.y][ijk.z];
3244 }
3245}
3246
3248 voxel_ray_counts.clear();
3249 voxel_transmitted.clear();
3250 voxel_path_lengths.clear();
3251 voxel_hit_before.clear();
3252 voxel_hit_after.clear();
3253 voxel_hit_inside.clear();
3254 voxel_individual_path_lengths.clear();
3255 voxel_data_initialized = false;
3256
3257 if (printmessages) {
3258 std::cout << "Voxel data cleared." << std::endl;
3259 }
3260}
3261
3262// -------- VOXEL RAY PATH LENGTH HELPER METHODS --------
3263
3264void CollisionDetection::initializeVoxelData(const vec3 &grid_center, const vec3 &grid_size, const helios::int3 &grid_divisions) {
3265
3266 // Check if we need to reinitialize (grid parameters changed)
3267 bool need_reinit = !voxel_data_initialized || (grid_center - voxel_grid_center).magnitude() > 1e-6 || (grid_size - voxel_grid_size).magnitude() > 1e-6 || grid_divisions.x != voxel_grid_divisions.x || grid_divisions.y != voxel_grid_divisions.y ||
3268 grid_divisions.z != voxel_grid_divisions.z;
3269
3270 if (!need_reinit) {
3271 // Just clear existing data but keep structure
3272 if (use_flat_arrays) {
3273 // Clear flat arrays
3274 size_t total_voxels = static_cast<size_t>(grid_divisions.x) * grid_divisions.y * grid_divisions.z;
3275 std::fill(voxel_ray_counts_flat.begin(), voxel_ray_counts_flat.end(), 0);
3276 std::fill(voxel_transmitted_flat.begin(), voxel_transmitted_flat.end(), 0);
3277 std::fill(voxel_path_lengths_flat.begin(), voxel_path_lengths_flat.end(), 0.0f);
3278 std::fill(voxel_hit_before_flat.begin(), voxel_hit_before_flat.end(), 0);
3279 std::fill(voxel_hit_after_flat.begin(), voxel_hit_after_flat.end(), 0);
3280 std::fill(voxel_hit_inside_flat.begin(), voxel_hit_inside_flat.end(), 0);
3281
3282 // Clear individual path lengths
3283 voxel_individual_path_lengths_flat.clear();
3284 std::fill(voxel_individual_path_offsets.begin(), voxel_individual_path_offsets.end(), 0);
3285 std::fill(voxel_individual_path_counts.begin(), voxel_individual_path_counts.end(), 0);
3286 } else {
3287 // Clear nested vectors
3288 for (int i = 0; i < grid_divisions.x; i++) {
3289 for (int j = 0; j < grid_divisions.y; j++) {
3290 for (int k = 0; k < grid_divisions.z; k++) {
3291 voxel_ray_counts[i][j][k] = 0;
3292 voxel_transmitted[i][j][k] = 0;
3293 voxel_path_lengths[i][j][k] = 0.0f;
3294 voxel_hit_before[i][j][k] = 0;
3295 voxel_hit_after[i][j][k] = 0;
3296 voxel_hit_inside[i][j][k] = 0;
3297 voxel_individual_path_lengths[i][j][k].clear();
3298 }
3299 }
3300 }
3301 }
3302 return;
3303 }
3304
3305 // Store grid parameters
3306 voxel_grid_center = grid_center;
3307 voxel_grid_size = grid_size;
3308 voxel_grid_divisions = grid_divisions;
3309
3310 // Enable flat arrays for better performance
3311 use_flat_arrays = true;
3312
3313 if (use_flat_arrays) {
3314 // Initialize optimized flat arrays (Structure-of-Arrays)
3315 size_t total_voxels = static_cast<size_t>(grid_divisions.x) * grid_divisions.y * grid_divisions.z;
3316
3317 voxel_ray_counts_flat.assign(total_voxels, 0);
3318 voxel_transmitted_flat.assign(total_voxels, 0);
3319 voxel_path_lengths_flat.assign(total_voxels, 0.0f);
3320 voxel_hit_before_flat.assign(total_voxels, 0);
3321 voxel_hit_after_flat.assign(total_voxels, 0);
3322 voxel_hit_inside_flat.assign(total_voxels, 0);
3323
3324 // Individual path lengths with dynamic storage
3325 voxel_individual_path_lengths_flat.clear();
3326 voxel_individual_path_offsets.assign(total_voxels, 0);
3327 voxel_individual_path_counts.assign(total_voxels, 0);
3328
3329 // Reserve reasonable initial capacity for individual paths
3330 voxel_individual_path_lengths_flat.reserve(total_voxels * 10); // Average 10 paths per voxel
3331
3332 } else {
3333 // Fallback to nested vectors for compatibility
3334 voxel_ray_counts.resize(grid_divisions.x);
3335 voxel_transmitted.resize(grid_divisions.x);
3336 voxel_path_lengths.resize(grid_divisions.x);
3337 voxel_hit_before.resize(grid_divisions.x);
3338 voxel_hit_after.resize(grid_divisions.x);
3339 voxel_hit_inside.resize(grid_divisions.x);
3340 voxel_individual_path_lengths.resize(grid_divisions.x);
3341
3342 for (int i = 0; i < grid_divisions.x; i++) {
3343 voxel_ray_counts[i].resize(grid_divisions.y);
3344 voxel_transmitted[i].resize(grid_divisions.y);
3345 voxel_path_lengths[i].resize(grid_divisions.y);
3346 voxel_hit_before[i].resize(grid_divisions.y);
3347 voxel_hit_after[i].resize(grid_divisions.y);
3348 voxel_hit_inside[i].resize(grid_divisions.y);
3349 voxel_individual_path_lengths[i].resize(grid_divisions.y);
3350
3351 for (int j = 0; j < grid_divisions.y; j++) {
3352 voxel_ray_counts[i][j].resize(grid_divisions.z, 0);
3353 voxel_transmitted[i][j].resize(grid_divisions.z, 0);
3354 voxel_path_lengths[i][j].resize(grid_divisions.z, 0.0f);
3355 voxel_hit_before[i][j].resize(grid_divisions.z, 0);
3356 voxel_hit_after[i][j].resize(grid_divisions.z, 0);
3357 voxel_hit_inside[i][j].resize(grid_divisions.z, 0);
3358 voxel_individual_path_lengths[i][j].resize(grid_divisions.z);
3359 }
3360 }
3361 }
3362
3363 voxel_data_initialized = true;
3364}
3365
3366bool CollisionDetection::validateVoxelIndices(const helios::int3 &ijk) const {
3367 return (ijk.x >= 0 && ijk.x < voxel_grid_divisions.x && ijk.y >= 0 && ijk.y < voxel_grid_divisions.y && ijk.z >= 0 && ijk.z < voxel_grid_divisions.z);
3368}
3369
3370void CollisionDetection::calculateVoxelAABB(const helios::int3 &ijk, vec3 &voxel_min, vec3 &voxel_max) const {
3371 vec3 voxel_size = make_vec3(voxel_grid_size.x / static_cast<float>(voxel_grid_divisions.x), voxel_grid_size.y / static_cast<float>(voxel_grid_divisions.y), voxel_grid_size.z / static_cast<float>(voxel_grid_divisions.z));
3372
3373 vec3 grid_min = voxel_grid_center - 0.5f * voxel_grid_size;
3374
3375 voxel_min = grid_min + make_vec3(static_cast<float>(ijk.x) * voxel_size.x, static_cast<float>(ijk.y) * voxel_size.y, static_cast<float>(ijk.z) * voxel_size.z);
3376
3377 voxel_max = voxel_min + voxel_size;
3378}
3379
3380std::vector<std::pair<helios::int3, float>> CollisionDetection::traverseVoxelGrid(const vec3 &ray_origin, const vec3 &ray_direction) const {
3381 std::vector<std::pair<helios::int3, float>> traversed_voxels;
3382
3383 // Grid bounds
3384 vec3 grid_min = voxel_grid_center - 0.5f * voxel_grid_size;
3385 vec3 grid_max = voxel_grid_center + 0.5f * voxel_grid_size;
3386 vec3 voxel_size = make_vec3(voxel_grid_size.x / static_cast<float>(voxel_grid_divisions.x), voxel_grid_size.y / static_cast<float>(voxel_grid_divisions.y), voxel_grid_size.z / static_cast<float>(voxel_grid_divisions.z));
3387
3388 // Test if ray intersects grid at all
3389 float t_grid_min, t_grid_max;
3390 if (!rayAABBIntersect(ray_origin, ray_direction, grid_min, grid_max, t_grid_min, t_grid_max)) {
3391 return traversed_voxels; // Empty - ray doesn't hit grid
3392 }
3393
3394 // Ensure intersection is in forward direction
3395 if (t_grid_max <= 1e-6) {
3396 return traversed_voxels; // Grid is behind ray
3397 }
3398
3399 // Clamp t_grid_min to 0 if ray starts inside grid
3400 t_grid_min = std::max(0.0f, t_grid_min);
3401
3402 // Fast path for single voxel grids
3403 if (voxel_grid_divisions.x == 1 && voxel_grid_divisions.y == 1 && voxel_grid_divisions.z == 1) {
3404 float path_length = t_grid_max - t_grid_min;
3405 if (path_length > 1e-6f) {
3406 traversed_voxels.emplace_back(helios::make_int3(0, 0, 0), path_length);
3407 }
3408 return traversed_voxels;
3409 }
3410
3411 // Starting position in grid space
3412 vec3 start_pos = ray_origin + t_grid_min * ray_direction;
3413
3414 // Convert to voxel indices (clamped to grid bounds)
3415 helios::int3 current_voxel;
3416 current_voxel.x = static_cast<int>(std::floor((start_pos.x - grid_min.x) / voxel_size.x));
3417 current_voxel.y = static_cast<int>(std::floor((start_pos.y - grid_min.y) / voxel_size.y));
3418 current_voxel.z = static_cast<int>(std::floor((start_pos.z - grid_min.z) / voxel_size.z));
3419
3420 // Clamp to grid bounds
3421 current_voxel.x = std::max(0, std::min(current_voxel.x, voxel_grid_divisions.x - 1));
3422 current_voxel.y = std::max(0, std::min(current_voxel.y, voxel_grid_divisions.y - 1));
3423 current_voxel.z = std::max(0, std::min(current_voxel.z, voxel_grid_divisions.z - 1));
3424
3425 // DDA algorithm parameters
3426 helios::int3 step;
3427 vec3 t_delta, t_max;
3428
3429 // Set up stepping direction and delta t values
3430 for (int i = 0; i < 3; i++) {
3431 float dir_comp = (i == 0) ? ray_direction.x : (i == 1) ? ray_direction.y : ray_direction.z;
3432 float size_comp = (i == 0) ? voxel_size.x : (i == 1) ? voxel_size.y : voxel_size.z;
3433 float grid_min_comp = (i == 0) ? grid_min.x : (i == 1) ? grid_min.y : grid_min.z;
3434 float start_comp = (i == 0) ? start_pos.x : (i == 1) ? start_pos.y : start_pos.z;
3435 int current_comp = (i == 0) ? current_voxel.x : (i == 1) ? current_voxel.y : current_voxel.z;
3436 int max_comp = (i == 0) ? voxel_grid_divisions.x : (i == 1) ? voxel_grid_divisions.y : voxel_grid_divisions.z;
3437
3438 if (std::abs(dir_comp) < 1e-8f) {
3439 // Ray is parallel to this axis
3440 if (i == 0) {
3441 step.x = 0;
3442 t_delta.x = 1e30f; // Large value
3443 t_max.x = 1e30f;
3444 } else if (i == 1) {
3445 step.y = 0;
3446 t_delta.y = 1e30f;
3447 t_max.y = 1e30f;
3448 } else {
3449 step.z = 0;
3450 t_delta.z = 1e30f;
3451 t_max.z = 1e30f;
3452 }
3453 } else {
3454 // Calculate step direction and delta t
3455 if (i == 0) {
3456 step.x = (dir_comp > 0) ? 1 : -1;
3457 t_delta.x = std::abs(size_comp / dir_comp);
3458
3459 if (step.x > 0) {
3460 t_max.x = t_grid_min + (grid_min_comp + (current_comp + 1) * size_comp - start_comp) / dir_comp;
3461 } else {
3462 t_max.x = t_grid_min + (grid_min_comp + current_comp * size_comp - start_comp) / dir_comp;
3463 }
3464 } else if (i == 1) {
3465 step.y = (dir_comp > 0) ? 1 : -1;
3466 t_delta.y = std::abs(size_comp / dir_comp);
3467
3468 if (step.y > 0) {
3469 t_max.y = t_grid_min + (grid_min_comp + (current_comp + 1) * size_comp - start_comp) / dir_comp;
3470 } else {
3471 t_max.y = t_grid_min + (grid_min_comp + current_comp * size_comp - start_comp) / dir_comp;
3472 }
3473 } else {
3474 step.z = (dir_comp > 0) ? 1 : -1;
3475 t_delta.z = std::abs(size_comp / dir_comp);
3476
3477 if (step.z > 0) {
3478 t_max.z = t_grid_min + (grid_min_comp + (current_comp + 1) * size_comp - start_comp) / dir_comp;
3479 } else {
3480 t_max.z = t_grid_min + (grid_min_comp + current_comp * size_comp - start_comp) / dir_comp;
3481 }
3482 }
3483 }
3484 }
3485
3486 // Traverse the grid
3487 float current_t = t_grid_min;
3488
3489 while (validateVoxelIndices(current_voxel) && current_t < t_grid_max) {
3490 // Calculate path length through this voxel
3491 float next_t = std::min({t_max.x, t_max.y, t_max.z, t_grid_max});
3492 float path_length = next_t - current_t;
3493
3494 if (path_length > 1e-6f) {
3495 traversed_voxels.emplace_back(current_voxel, path_length);
3496 }
3497
3498 // Move to next voxel
3499 if (next_t >= t_grid_max) {
3500 break; // Reached end of grid
3501 }
3502
3503 // Determine which axis to step along
3504 if (t_max.x <= t_max.y && t_max.x <= t_max.z) {
3505 current_voxel.x += step.x;
3506 t_max.x += t_delta.x;
3507 } else if (t_max.y <= t_max.z) {
3508 current_voxel.y += step.y;
3509 t_max.y += t_delta.y;
3510 } else {
3511 current_voxel.z += step.z;
3512 t_max.z += t_delta.z;
3513 }
3514
3515 current_t = next_t;
3516 }
3517
3518 return traversed_voxels;
3519}
3520
3521void CollisionDetection::calculateVoxelRayPathLengths_CPU(const std::vector<vec3> &ray_origins, const std::vector<vec3> &ray_directions) {
3522
3523
3524 auto start_time = std::chrono::high_resolution_clock::now();
3525
3526 // Performance profiling variables
3527 std::atomic<long long> total_raycast_time(0);
3528 std::atomic<int> raycast_count(0);
3529
3530 // NOTE: BVH currency should be ensured by caller before calling this function
3531 // to avoid rebuilding BVH on every batch of rays
3532
3533 const int num_rays = static_cast<int>(ray_origins.size());
3534
3535 // PERFORMANCE OPTIMIZATION: Use thread-local storage to eliminate atomic operations
3536 const int total_voxels = voxel_grid_divisions.x * voxel_grid_divisions.y * voxel_grid_divisions.z;
3537
3538 // Pre-compute grid bounds for early culling
3539 vec3 grid_min = voxel_grid_center - 0.5f * voxel_grid_size;
3540 vec3 grid_max = voxel_grid_center + 0.5f * voxel_grid_size;
3541
3542#ifdef _OPENMP
3543 const int num_threads = omp_get_max_threads();
3544
3545 // Thread-local accumulation arrays to eliminate atomic operations
3546 std::vector<std::vector<int>> thread_ray_counts(num_threads, std::vector<int>(total_voxels, 0));
3547 std::vector<std::vector<float>> thread_path_lengths(num_threads, std::vector<float>(total_voxels, 0.0f));
3548 std::vector<std::vector<int>> thread_hit_before(num_threads, std::vector<int>(total_voxels, 0));
3549 std::vector<std::vector<int>> thread_hit_after(num_threads, std::vector<int>(total_voxels, 0));
3550 std::vector<std::vector<int>> thread_hit_inside(num_threads, std::vector<int>(total_voxels, 0));
3551 std::vector<std::vector<int>> thread_transmitted(num_threads, std::vector<int>(total_voxels, 0));
3552 std::vector<std::vector<std::vector<float>>> thread_individual_paths(num_threads, std::vector<std::vector<float>>(total_voxels));
3553
3554// Use OpenMP for parallel processing with thread-local accumulation
3555#pragma omp parallel for schedule(dynamic)
3556 for (int ray_idx = 0; ray_idx < num_rays; ray_idx++) {
3557 const int thread_id = omp_get_thread_num();
3558 const vec3 &ray_origin = ray_origins[ray_idx];
3559 const vec3 &ray_direction = ray_directions[ray_idx];
3560
3561 // Early culling: Skip rays that don't intersect the grid at all
3562 float t_grid_min, t_grid_max;
3563 if (!rayAABBIntersect(ray_origin, ray_direction, grid_min, grid_max, t_grid_min, t_grid_max) || t_grid_max <= 1e-6) {
3564 continue; // Skip this ray entirely
3565 }
3566
3567 // Use DDA traversal to get only intersected voxels
3568 auto traversed_voxels = traverseVoxelGrid(ray_origin, ray_direction);
3569
3570 // Only perform expensive ray classification for rays that actually intersect the grid
3571 if (traversed_voxels.empty()) {
3572 continue; // Skip rays that don't traverse any voxels
3573 }
3574
3575 // Perform ray classification once per ray (outside voxel loop for efficiency)
3576 auto raycast_start = std::chrono::high_resolution_clock::now();
3577 RayQuery query(ray_origin, ray_direction, -1.0f, {});
3578 HitResult hit = castRay(query);
3579 auto raycast_end = std::chrono::high_resolution_clock::now();
3580
3581 // Profile raycast performance
3582 total_raycast_time += std::chrono::duration_cast<std::chrono::microseconds>(raycast_end - raycast_start).count();
3583 raycast_count++;
3584
3585 float hit_distance = hit.hit ? hit.distance : 1e30f; // Large value if no hit
3586
3587 // Process each intersected voxel
3588 for (const auto &voxel_data: traversed_voxels) {
3589 const helios::int3 &voxel_idx = voxel_data.first;
3590 float path_length = voxel_data.second;
3591
3592 // Calculate voxel bounds for ray classification
3593 vec3 voxel_min, voxel_max;
3594 calculateVoxelAABB(voxel_idx, voxel_min, voxel_max);
3595
3596 // Get t_min and t_max for this voxel
3597 float t_min, t_max;
3598 rayAABBIntersect(ray_origin, ray_direction, voxel_min, voxel_max, t_min, t_max);
3599
3600 // Ensure valid intersection
3601 if (t_min < 0)
3602 t_min = 0;
3603
3604 // Perform ray classification for Beer's law calculations
3605 bool hit_before = false;
3606 bool hit_after = false;
3607 bool hit_inside = false;
3608
3609 if (hit.hit) {
3610 // Classify the hit based on where it occurs relative to voxel
3611 if (hit_distance < t_min) {
3612 // Hit occurs before entering voxel
3613 hit_before = true;
3614 } else if (hit_distance >= t_min && hit_distance <= t_max) {
3615 // Hit occurs inside voxel
3616 hit_inside = true;
3617 hit_after = true; // Also count as hit_after since it's after entering
3618 } else {
3619 // Hit occurs after exiting voxel
3620 hit_after = true;
3621 }
3622 } else {
3623 // No geometry hit - ray is transmitted through entire scene
3624 hit_after = true; // Consider this as reaching the voxel
3625 }
3626
3627 // PERFORMANCE OPTIMIZATION: Use thread-local accumulation (no synchronization!)
3628 size_t flat_idx = flatIndex(voxel_idx);
3629
3630 // All operations are now thread-local - no atomic operations needed!
3631 thread_ray_counts[thread_id][flat_idx]++;
3632 thread_path_lengths[thread_id][flat_idx] += path_length;
3633
3634 if (hit_before) {
3635 thread_hit_before[thread_id][flat_idx]++;
3636 }
3637 if (hit_after) {
3638 thread_hit_after[thread_id][flat_idx]++;
3639 }
3640 if (hit_inside) {
3641 thread_hit_inside[thread_id][flat_idx]++;
3642 } else {
3643 thread_transmitted[thread_id][flat_idx]++;
3644 }
3645
3646 // Store individual path lengths in thread-local storage (no critical section!)
3647 thread_individual_paths[thread_id][flat_idx].push_back(path_length);
3648 }
3649 }
3650
3651 // PERFORMANCE OPTIMIZATION: Reduction phase - combine thread-local results
3652 // This single reduction eliminates hundreds of thousands of atomic operations!
3653 for (int thread_id = 0; thread_id < num_threads; ++thread_id) {
3654 for (int voxel_idx = 0; voxel_idx < total_voxels; ++voxel_idx) {
3655 voxel_ray_counts_flat[voxel_idx] += thread_ray_counts[thread_id][voxel_idx];
3656 voxel_path_lengths_flat[voxel_idx] += thread_path_lengths[thread_id][voxel_idx];
3657 voxel_hit_before_flat[voxel_idx] += thread_hit_before[thread_id][voxel_idx];
3658 voxel_hit_after_flat[voxel_idx] += thread_hit_after[thread_id][voxel_idx];
3659 voxel_hit_inside_flat[voxel_idx] += thread_hit_inside[thread_id][voxel_idx];
3660 voxel_transmitted_flat[voxel_idx] += thread_transmitted[thread_id][voxel_idx];
3661 }
3662 }
3663
3664 // Post-process for flat array storage - aggregate individual paths from thread-local storage
3665 if (use_flat_arrays) {
3666 // Consolidate individual path lengths into flat storage with proper offsets
3667 voxel_individual_path_lengths_flat.clear();
3668
3669 // Calculate total size needed from all threads
3670 size_t total_paths = 0;
3671 for (int thread_id = 0; thread_id < num_threads; ++thread_id) {
3672 for (int voxel_idx = 0; voxel_idx < total_voxels; ++voxel_idx) {
3673 total_paths += thread_individual_paths[thread_id][voxel_idx].size();
3674 }
3675 }
3676 voxel_individual_path_lengths_flat.reserve(total_paths);
3677
3678 // Build flat array and offsets by aggregating from all threads
3679 size_t current_offset = 0;
3680 for (int voxel_idx = 0; voxel_idx < total_voxels; ++voxel_idx) {
3681 voxel_individual_path_offsets[voxel_idx] = current_offset;
3682 size_t voxel_path_count = 0;
3683
3684 // Aggregate paths from all threads for this voxel
3685 for (int thread_id = 0; thread_id < num_threads; ++thread_id) {
3686 for (float path_length: thread_individual_paths[thread_id][voxel_idx]) {
3687 voxel_individual_path_lengths_flat.push_back(path_length);
3688 voxel_path_count++;
3689 }
3690 }
3691
3692 voxel_individual_path_counts[voxel_idx] = voxel_path_count;
3693 current_offset += voxel_path_count;
3694 }
3695 }
3696#else
3697 // Serial fallback when OpenMP is not available
3698 const int num_threads = 1;
3699 const int thread_id = 0;
3700
3701 // Direct accumulation arrays for serial processing
3702 std::vector<int> serial_ray_counts(total_voxels, 0);
3703 std::vector<float> serial_path_lengths(total_voxels, 0.0f);
3704 std::vector<int> serial_hit_before(total_voxels, 0);
3705 std::vector<int> serial_hit_after(total_voxels, 0);
3706 std::vector<int> serial_hit_inside(total_voxels, 0);
3707 std::vector<int> serial_transmitted(total_voxels, 0);
3708 std::vector<std::vector<float>> serial_individual_paths(total_voxels);
3709
3710 // Serial processing without OpenMP pragmas
3711 for (int ray_idx = 0; ray_idx < num_rays; ray_idx++) {
3712 const vec3 &ray_origin = ray_origins[ray_idx];
3713 const vec3 &ray_direction = ray_directions[ray_idx];
3714
3715 // Early culling: Skip rays that don't intersect the grid at all
3716 float t_grid_min, t_grid_max;
3717 if (!rayAABBIntersect(ray_origin, ray_direction, grid_min, grid_max, t_grid_min, t_grid_max) || t_grid_max <= 1e-6) {
3718 continue; // Skip this ray entirely
3719 }
3720
3721 // Use DDA traversal to get only intersected voxels
3722 auto traversed_voxels = traverseVoxelGrid(ray_origin, ray_direction);
3723
3724 // Only perform expensive ray classification for rays that actually intersect the grid
3725 if (traversed_voxels.empty()) {
3726 continue; // Skip rays that don't traverse any voxels
3727 }
3728
3729 // Perform ray classification once per ray (outside voxel loop for efficiency)
3730 auto raycast_start = std::chrono::high_resolution_clock::now();
3731 RayQuery query(ray_origin, ray_direction, -1.0f, {});
3732 HitResult hit = castRay(query);
3733 auto raycast_end = std::chrono::high_resolution_clock::now();
3734
3735 // Profile raycast performance
3736 total_raycast_time += std::chrono::duration_cast<std::chrono::microseconds>(raycast_end - raycast_start).count();
3737 raycast_count++;
3738
3739 float hit_distance = hit.hit ? hit.distance : 1e30f; // Large value if no hit
3740
3741 // Process each intersected voxel
3742 for (const auto &voxel_data: traversed_voxels) {
3743 const helios::int3 &voxel_idx = voxel_data.first;
3744 float path_length = voxel_data.second;
3745
3746 // Calculate voxel bounds for ray classification
3747 vec3 voxel_min, voxel_max;
3748 calculateVoxelAABB(voxel_idx, voxel_min, voxel_max);
3749
3750 // Get t_min and t_max for this voxel
3751 float t_min, t_max;
3752 rayAABBIntersect(ray_origin, ray_direction, voxel_min, voxel_max, t_min, t_max);
3753
3754 // Ensure valid intersection
3755 if (t_min < 0)
3756 t_min = 0;
3757
3758 // Perform ray classification for Beer's law calculations
3759 bool hit_before = false;
3760 bool hit_after = false;
3761 bool hit_inside = false;
3762
3763 if (hit.hit) {
3764 // Classify the hit based on where it occurs relative to voxel
3765 if (hit_distance < t_min) {
3766 // Hit occurs before entering voxel
3767 hit_before = true;
3768 } else if (hit_distance >= t_min && hit_distance <= t_max) {
3769 // Hit occurs inside voxel
3770 hit_inside = true;
3771 hit_after = true; // Also count as hit_after since it's after entering
3772 } else {
3773 // Hit occurs after exiting voxel
3774 hit_after = true;
3775 }
3776 } else {
3777 // No geometry hit - ray is transmitted through entire scene
3778 hit_after = true; // Consider this as reaching the voxel
3779 }
3780
3781 // Direct accumulation for serial processing
3782 size_t flat_idx = flatIndex(voxel_idx);
3783
3784 serial_ray_counts[flat_idx]++;
3785 serial_path_lengths[flat_idx] += path_length;
3786
3787 if (hit_before) {
3788 serial_hit_before[flat_idx]++;
3789 }
3790 if (hit_after) {
3791 serial_hit_after[flat_idx]++;
3792 }
3793 if (hit_inside) {
3794 serial_hit_inside[flat_idx]++;
3795 } else {
3796 serial_transmitted[flat_idx]++;
3797 }
3798
3799 // Store individual path lengths for serial processing
3800 serial_individual_paths[flat_idx].push_back(path_length);
3801 }
3802 }
3803
3804 // Direct assignment from serial arrays to final storage
3805 for (int voxel_idx = 0; voxel_idx < total_voxels; ++voxel_idx) {
3806 voxel_ray_counts_flat[voxel_idx] += serial_ray_counts[voxel_idx];
3807 voxel_path_lengths_flat[voxel_idx] += serial_path_lengths[voxel_idx];
3808 voxel_hit_before_flat[voxel_idx] += serial_hit_before[voxel_idx];
3809 voxel_hit_after_flat[voxel_idx] += serial_hit_after[voxel_idx];
3810 voxel_hit_inside_flat[voxel_idx] += serial_hit_inside[voxel_idx];
3811 voxel_transmitted_flat[voxel_idx] += serial_transmitted[voxel_idx];
3812 }
3813
3814 // Post-process for flat array storage - aggregate individual paths from serial storage
3815 if (use_flat_arrays) {
3816 // Consolidate individual path lengths into flat storage with proper offsets
3817 voxel_individual_path_lengths_flat.clear();
3818
3819 // Calculate total size needed
3820 size_t total_paths = 0;
3821 for (int voxel_idx = 0; voxel_idx < total_voxels; ++voxel_idx) {
3822 total_paths += serial_individual_paths[voxel_idx].size();
3823 }
3824 voxel_individual_path_lengths_flat.reserve(total_paths);
3825
3826 // Build flat array and offsets
3827 size_t current_offset = 0;
3828 for (int voxel_idx = 0; voxel_idx < total_voxels; ++voxel_idx) {
3829 voxel_individual_path_offsets[voxel_idx] = current_offset;
3830 size_t voxel_path_count = serial_individual_paths[voxel_idx].size();
3831
3832 // Copy paths to flat array
3833 for (float path_length: serial_individual_paths[voxel_idx]) {
3834 voxel_individual_path_lengths_flat.push_back(path_length);
3835 }
3836
3837 voxel_individual_path_counts[voxel_idx] = voxel_path_count;
3838 current_offset += voxel_path_count;
3839 }
3840 }
3841#endif
3842
3843 auto end_time = std::chrono::high_resolution_clock::now();
3844 auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
3845
3846 // Performance profiling output
3847 long long avg_raycast_time = raycast_count > 0 ? total_raycast_time.load() / raycast_count.load() : 0;
3848 //
3849 // if (printmessages) {
3850 // // Report some statistics
3851 // int total_ray_voxel_intersections = 0;
3852 // if (use_flat_arrays) {
3853 // // Sum from flat arrays
3854 // for (size_t i = 0; i < voxel_ray_counts_flat.size(); i++) {
3855 // total_ray_voxel_intersections += voxel_ray_counts_flat[i];
3856 // }
3857 // } else {
3858 // // Sum from nested vectors
3859 // for (int i = 0; i < voxel_grid_divisions.x; i++) {
3860 // for (int j = 0; j < voxel_grid_divisions.y; j++) {
3861 // for (int k = 0; k < voxel_grid_divisions.z; k++) {
3862 // total_ray_voxel_intersections += voxel_ray_counts[i][j][k];
3863 // }
3864 // }
3865 // }
3866 // }
3867 // }
3868}
3869
3870#ifdef HELIOS_CUDA_AVAILABLE
3871void CollisionDetection::calculateVoxelRayPathLengths_GPU(const std::vector<vec3> &ray_origins, const std::vector<vec3> &ray_directions) {
3872 if (printmessages) {
3873 }
3874
3875 auto start_time = std::chrono::high_resolution_clock::now();
3876
3877 const int num_rays = static_cast<int>(ray_origins.size());
3878 const int total_voxels = voxel_grid_divisions.x * voxel_grid_divisions.y * voxel_grid_divisions.z;
3879
3880 // Prepare data for GPU kernel
3881 std::vector<float> h_ray_origins(num_rays * 3);
3882 std::vector<float> h_ray_directions(num_rays * 3);
3883 std::vector<int> h_voxel_ray_counts(total_voxels, 0);
3884 std::vector<float> h_voxel_path_lengths(total_voxels, 0.0f);
3885 std::vector<int> h_voxel_transmitted(total_voxels, 0);
3886 std::vector<int> h_voxel_hit_before(total_voxels, 0);
3887 std::vector<int> h_voxel_hit_after(total_voxels, 0);
3888 std::vector<int> h_voxel_hit_inside(total_voxels, 0);
3889
3890 // Convert ray data to flat arrays
3891 for (int i = 0; i < num_rays; i++) {
3892 h_ray_origins[i * 3 + 0] = ray_origins[i].x;
3893 h_ray_origins[i * 3 + 1] = ray_origins[i].y;
3894 h_ray_origins[i * 3 + 2] = ray_origins[i].z;
3895
3896 h_ray_directions[i * 3 + 0] = ray_directions[i].x;
3897 h_ray_directions[i * 3 + 1] = ray_directions[i].y;
3898 h_ray_directions[i * 3 + 2] = ray_directions[i].z;
3899 }
3900
3901 // Check if there are any primitives in the scene for geometry detection
3902 int primitive_count = static_cast<int>(primitive_cache.size());
3903
3904 // Launch CUDA kernel
3905 launchVoxelRayPathLengths(num_rays, h_ray_origins.data(), h_ray_directions.data(), voxel_grid_center.x, voxel_grid_center.y, voxel_grid_center.z, voxel_grid_size.x, voxel_grid_size.y, voxel_grid_size.z, voxel_grid_divisions.x,
3906 voxel_grid_divisions.y, voxel_grid_divisions.z, primitive_count, h_voxel_ray_counts.data(), h_voxel_path_lengths.data(), h_voxel_transmitted.data(), h_voxel_hit_before.data(), h_voxel_hit_after.data(),
3907 h_voxel_hit_inside.data());
3908
3909 // Copy results back to class data structures
3910 if (use_flat_arrays) {
3911 // Copy directly to flat arrays
3912 voxel_ray_counts_flat = h_voxel_ray_counts;
3913 voxel_path_lengths_flat = h_voxel_path_lengths;
3914 voxel_transmitted_flat = h_voxel_transmitted;
3915
3916 // Copy hit classification data from GPU kernel
3917 voxel_hit_before_flat = h_voxel_hit_before;
3918 voxel_hit_after_flat = h_voxel_hit_after;
3919 voxel_hit_inside_flat = h_voxel_hit_inside;
3920
3921 // Initialize individual path length data structures
3922 // Note: GPU implementation currently provides aggregate data only
3923 // For now, initialize empty individual path data to prevent crashes
3924 voxel_individual_path_lengths_flat.clear();
3925 std::fill(voxel_individual_path_offsets.begin(), voxel_individual_path_offsets.end(), 0);
3926 std::fill(voxel_individual_path_counts.begin(), voxel_individual_path_counts.end(), 0);
3927
3928 // TODO: Implement proper individual path length collection in GPU kernel
3929 // For now, estimate individual path lengths based on ray geometry
3930 // This is a workaround until the GPU kernel can provide individual path data
3931 for (int voxel_idx = 0; voxel_idx < total_voxels; ++voxel_idx) {
3932 voxel_individual_path_offsets[voxel_idx] = voxel_individual_path_lengths_flat.size();
3933
3934 if (h_voxel_ray_counts[voxel_idx] > 0) {
3935 // Calculate individual path lengths for each ray by simulating ray-voxel intersection
3936 // This is an approximation but more accurate than using just averages
3937 std::vector<float> estimated_paths;
3938
3939 // Convert flat voxel index back to 3D coordinates
3940 int voxel_z = voxel_idx % voxel_grid_divisions.z;
3941 int voxel_y = (voxel_idx / voxel_grid_divisions.z) % voxel_grid_divisions.y;
3942 int voxel_x = voxel_idx / (voxel_grid_divisions.y * voxel_grid_divisions.z);
3943
3944 // Calculate voxel bounds
3945 vec3 voxel_size = voxel_grid_size;
3946 voxel_size.x /= voxel_grid_divisions.x;
3947 voxel_size.y /= voxel_grid_divisions.y;
3948 voxel_size.z /= voxel_grid_divisions.z;
3949
3950 vec3 voxel_min = voxel_grid_center - voxel_grid_size * 0.5f;
3951 voxel_min.x += voxel_x * voxel_size.x;
3952 voxel_min.y += voxel_y * voxel_size.y;
3953 voxel_min.z += voxel_z * voxel_size.z;
3954
3955 vec3 voxel_max = voxel_min + voxel_size;
3956
3957 // Check each ray to see if it intersects this voxel and calculate path length
3958 for (int ray_idx = 0; ray_idx < num_rays; ++ray_idx) {
3959 vec3 ray_origin = ray_origins[ray_idx];
3960 vec3 ray_dir = ray_directions[ray_idx];
3961
3962 // Ray-box intersection algorithm
3963 float t_min = 0.0f;
3964 float t_max = std::numeric_limits<float>::max();
3965
3966 // Check intersection with each axis-aligned slab
3967 for (int axis = 0; axis < 3; ++axis) {
3968 float origin_comp = (axis == 0) ? ray_origin.x : (axis == 1) ? ray_origin.y : ray_origin.z;
3969 float dir_comp = (axis == 0) ? ray_dir.x : (axis == 1) ? ray_dir.y : ray_dir.z;
3970 float min_comp = (axis == 0) ? voxel_min.x : (axis == 1) ? voxel_min.y : voxel_min.z;
3971 float max_comp = (axis == 0) ? voxel_max.x : (axis == 1) ? voxel_max.y : voxel_max.z;
3972
3973 if (std::abs(dir_comp) < 1e-9f) {
3974 // Ray is parallel to slab
3975 if (origin_comp < min_comp || origin_comp > max_comp) {
3976 t_max = -1.0f; // No intersection
3977 break;
3978 }
3979 } else {
3980 float t1 = (min_comp - origin_comp) / dir_comp;
3981 float t2 = (max_comp - origin_comp) / dir_comp;
3982
3983 if (t1 > t2)
3984 std::swap(t1, t2);
3985
3986 t_min = std::max(t_min, t1);
3987 t_max = std::min(t_max, t2);
3988
3989 if (t_min > t_max)
3990 break; // No intersection
3991 }
3992 }
3993
3994 // If there's a valid intersection, calculate path length
3995 if (t_max > t_min && t_max > 0.0f) {
3996 float entry_t = std::max(0.0f, t_min);
3997 float exit_t = t_max;
3998 float path_length = exit_t - entry_t;
3999
4000 if (path_length > 1e-6f) {
4001 estimated_paths.push_back(path_length);
4002 }
4003 }
4004 }
4005
4006 // Store the estimated paths
4007 for (float path_length: estimated_paths) {
4008 voxel_individual_path_lengths_flat.push_back(path_length);
4009 }
4010 voxel_individual_path_counts[voxel_idx] = estimated_paths.size();
4011 } else {
4012 voxel_individual_path_counts[voxel_idx] = 0;
4013 }
4014 }
4015
4016 } else {
4017 // Copy to nested vectors
4018 for (int i = 0; i < voxel_grid_divisions.x; i++) {
4019 for (int j = 0; j < voxel_grid_divisions.y; j++) {
4020 for (int k = 0; k < voxel_grid_divisions.z; k++) {
4021 int flat_idx = i * voxel_grid_divisions.y * voxel_grid_divisions.z + j * voxel_grid_divisions.z + k;
4022 voxel_ray_counts[i][j][k] = h_voxel_ray_counts[flat_idx];
4023 voxel_path_lengths[i][j][k] = h_voxel_path_lengths[flat_idx];
4024 voxel_transmitted[i][j][k] = h_voxel_transmitted[flat_idx];
4025
4026 // Copy hit classification data from GPU kernel
4027 voxel_hit_before[i][j][k] = h_voxel_hit_before[flat_idx];
4028 voxel_hit_after[i][j][k] = h_voxel_hit_after[flat_idx];
4029 voxel_hit_inside[i][j][k] = h_voxel_hit_inside[flat_idx];
4030
4031 // Initialize individual path lengths (approximation)
4032 voxel_individual_path_lengths[i][j][k].clear();
4033 if (h_voxel_ray_counts[flat_idx] > 0) {
4034 float avg_path_length = h_voxel_path_lengths[flat_idx] / h_voxel_ray_counts[flat_idx];
4035 for (int ray = 0; ray < h_voxel_ray_counts[flat_idx]; ++ray) {
4036 voxel_individual_path_lengths[i][j][k].push_back(avg_path_length);
4037 }
4038 }
4039 }
4040 }
4041 }
4042 }
4043
4044 auto end_time = std::chrono::high_resolution_clock::now();
4045 auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
4046
4047 if (printmessages) {
4048
4049 // Report some statistics
4050 int total_ray_voxel_intersections = 0;
4051 for (const auto &count: h_voxel_ray_counts) {
4052 total_ray_voxel_intersections += count;
4053 }
4054 }
4055}
4056#endif
4057
4058void CollisionDetection::ensureOptimizedBVH() {
4059 // Convert standard BVH to Structure-of-Arrays format for optimal cache performance
4060 // Only rebuild if BVH structure has actually changed OR SoA is explicitly dirty
4061 if (soa_dirty || bvh_nodes_soa.node_count != bvh_nodes.size() || bvh_nodes_soa.aabb_mins.empty()) {
4062
4063 if (bvh_nodes.empty()) {
4064 bvh_nodes_soa.clear();
4065 bvh_nodes_soa.node_count = 0;
4066 return;
4067 }
4068
4069 size_t node_count = bvh_nodes.size();
4070
4071 // OPTIMIZATION 1: Pre-allocate all arrays to exact size (no reserve+push_back)
4072 bvh_nodes_soa.node_count = node_count;
4073 bvh_nodes_soa.aabb_mins.resize(node_count);
4074 bvh_nodes_soa.aabb_maxs.resize(node_count);
4075 bvh_nodes_soa.left_children.resize(node_count);
4076 bvh_nodes_soa.right_children.resize(node_count);
4077 bvh_nodes_soa.primitive_starts.resize(node_count);
4078 bvh_nodes_soa.primitive_counts.resize(node_count);
4079 bvh_nodes_soa.is_leaf_flags.resize(node_count);
4080
4081 // OPTIMIZATION 2: Direct assignment instead of push_back (much faster)
4082 for (size_t i = 0; i < node_count; ++i) {
4083 const BVHNode &node = bvh_nodes[i];
4084
4085 // Hot data: frequently accessed during traversal
4086 bvh_nodes_soa.aabb_mins[i] = node.aabb_min;
4087 bvh_nodes_soa.aabb_maxs[i] = node.aabb_max;
4088 bvh_nodes_soa.left_children[i] = node.left_child;
4089 bvh_nodes_soa.right_children[i] = node.right_child;
4090
4091 // Cold data: accessed less frequently
4092 bvh_nodes_soa.primitive_starts[i] = node.primitive_start;
4093 bvh_nodes_soa.primitive_counts[i] = node.primitive_count;
4094 bvh_nodes_soa.is_leaf_flags[i] = node.is_leaf ? 1 : 0;
4095 }
4096
4097 // Mark SoA as clean after successful conversion
4098 soa_dirty = false;
4099 }
4100}
4101
4102void CollisionDetection::updatePrimitiveAABBCache(uint uuid) {
4103 // Check if primitive exists
4104 if (!context->doesPrimitiveExist(uuid)) {
4105 return;
4106 }
4107
4108 try {
4109 // Get primitive vertices to compute AABB
4110 std::vector<vec3> vertices = context->getPrimitiveVertices(uuid);
4111 if (vertices.empty()) {
4112 return;
4113 }
4114
4115 // Compute AABB from vertices
4116 vec3 aabb_min = vertices[0];
4117 vec3 aabb_max = vertices[0];
4118
4119 for (const vec3 &vertex: vertices) {
4120 aabb_min.x = std::min(aabb_min.x, vertex.x);
4121 aabb_min.y = std::min(aabb_min.y, vertex.y);
4122 aabb_min.z = std::min(aabb_min.z, vertex.z);
4123
4124 aabb_max.x = std::max(aabb_max.x, vertex.x);
4125 aabb_max.y = std::max(aabb_max.y, vertex.y);
4126 aabb_max.z = std::max(aabb_max.z, vertex.z);
4127 }
4128
4129 // Store in cache
4130 primitive_aabbs_cache[uuid] = std::make_pair(aabb_min, aabb_max);
4131
4132 } catch (const std::exception &e) {
4133 if (printmessages) {
4134 std::cerr << "Warning: Failed to cache AABB for primitive " << uuid << ": " << e.what() << std::endl;
4135 }
4136 }
4137}
4138
4139void CollisionDetection::optimizedRebuildBVH(const std::set<uint> &final_geometry) {
4140 // Convert to vector for buildBVH compatibility
4141 std::vector<uint> final_primitives(final_geometry.begin(), final_geometry.end());
4142
4143 // Ensure all primitive AABBs are cached
4144 for (uint uuid: final_geometry) {
4145 if (primitive_aabbs_cache.find(uuid) == primitive_aabbs_cache.end()) {
4146 updatePrimitiveAABBCache(uuid);
4147 }
4148 }
4149
4150 if (printmessages) {
4151 std::cout << "Optimized rebuild with " << final_primitives.size() << " primitives (using cached AABBs)" << std::endl;
4152 }
4153
4154 // Use regular buildBVH but with pre-cached AABBs for efficiency
4155 buildBVH(final_primitives);
4156
4157 // Update tracking
4158 last_bvh_geometry = final_geometry;
4159 bvh_dirty = false;
4160 soa_dirty = true; // SoA needs rebuild after BVH change
4161}
4162
4163// -------- TREE-BASED BVH IMPLEMENTATION --------
4164
4165void CollisionDetection::enableTreeBasedBVH(float isolation_distance) {
4166 tree_based_bvh_enabled = true;
4167 tree_isolation_distance = isolation_distance;
4168}
4169
4171 tree_based_bvh_enabled = false;
4172 tree_bvh_map.clear();
4173 object_to_tree_map.clear();
4174}
4175
4177 return tree_based_bvh_enabled;
4178}
4179
4181 if (static_obstacle_primitives.empty() || obstacle_spatial_grid_initialized) {
4182 return;
4183 }
4184
4185 // Set cell size to be roughly the static obstacle distance for optimal performance
4186 obstacle_spatial_grid.cell_size = 20.0f; // Slightly larger than MAX_STATIC_OBSTACLE_DISTANCE
4187 obstacle_spatial_grid.grid_cells.clear();
4188
4189 // Insert each static obstacle into the spatial grid
4190 for (uint obstacle_prim: static_obstacle_primitives) {
4191 if (context->doesPrimitiveExist(obstacle_prim)) {
4192 helios::vec3 min_corner, max_corner;
4193 context->getPrimitiveBoundingBox(obstacle_prim, min_corner, max_corner);
4194 helios::vec3 prim_center = (min_corner + max_corner) * 0.5f;
4195
4196 int64_t grid_key = obstacle_spatial_grid.getGridKey(prim_center.x, prim_center.y);
4197 obstacle_spatial_grid.grid_cells[grid_key].push_back(obstacle_prim);
4198 }
4199 }
4200
4201 obstacle_spatial_grid_initialized = true;
4202}
4203
4204std::vector<uint> CollisionDetection::ObstacleSpatialGrid::getRelevantObstacles(const helios::vec3 &position, float radius) const {
4205 std::vector<uint> relevant_obstacles;
4206
4207 // Calculate the range of grid cells to search
4208 int32_t min_grid_x = static_cast<int32_t>(std::floor((position.x - radius) / cell_size));
4209 int32_t max_grid_x = static_cast<int32_t>(std::floor((position.x + radius) / cell_size));
4210 int32_t min_grid_y = static_cast<int32_t>(std::floor((position.y - radius) / cell_size));
4211 int32_t max_grid_y = static_cast<int32_t>(std::floor((position.y + radius) / cell_size));
4212
4213 // Search all relevant grid cells
4214 for (int32_t grid_x = min_grid_x; grid_x <= max_grid_x; ++grid_x) {
4215 for (int32_t grid_y = min_grid_y; grid_y <= max_grid_y; ++grid_y) {
4216 int64_t grid_key = (static_cast<int64_t>(grid_x) << 32) | static_cast<uint32_t>(grid_y);
4217
4218 auto cell_it = grid_cells.find(grid_key);
4219 if (cell_it != grid_cells.end()) {
4220 // Add all obstacles from this cell (could add distance filtering here if needed)
4221 relevant_obstacles.insert(relevant_obstacles.end(), cell_it->second.begin(), cell_it->second.end());
4222 }
4223 }
4224 }
4225
4226 return relevant_obstacles;
4227}
4228
4229void CollisionDetection::registerTree(uint tree_object_id, const std::vector<uint> &tree_primitives) {
4230 if (!tree_based_bvh_enabled) {
4231 if (printmessages) {
4232 std::cout << "WARNING: Tree registration ignored - tree-based BVH not enabled" << std::endl;
4233 }
4234 return;
4235 }
4236
4237 // Calculate tree spatial bounds
4238 vec3 tree_center(0, 0, 0);
4239 vec3 aabb_min(1e30f, 1e30f, 1e30f);
4240 vec3 aabb_max(-1e30f, -1e30f, -1e30f);
4241
4242 for (uint prim_uuid: tree_primitives) {
4243 if (context->doesPrimitiveExist(prim_uuid)) {
4244 vec3 prim_min, prim_max;
4245 context->getPrimitiveBoundingBox(prim_uuid, prim_min, prim_max);
4246
4247 aabb_min.x = std::min(aabb_min.x, prim_min.x);
4248 aabb_min.y = std::min(aabb_min.y, prim_min.y);
4249 aabb_min.z = std::min(aabb_min.z, prim_min.z);
4250
4251 aabb_max.x = std::max(aabb_max.x, prim_max.x);
4252 aabb_max.y = std::max(aabb_max.y, prim_max.y);
4253 aabb_max.z = std::max(aabb_max.z, prim_max.z);
4254 }
4255 }
4256
4257 tree_center = (aabb_min + aabb_max) * 0.5f;
4258 float tree_radius = (aabb_max - aabb_min).magnitude() * 0.5f;
4259
4260 // Create or update tree BVH entry
4261 TreeBVH &tree_bvh = tree_bvh_map[tree_object_id];
4262 tree_bvh.tree_object_id = tree_object_id;
4263 tree_bvh.tree_center = tree_center;
4264 tree_bvh.tree_radius = tree_radius;
4265
4266 // Update object-to-tree mapping
4267 for (uint prim_uuid: tree_primitives) {
4268 object_to_tree_map[prim_uuid] = tree_object_id;
4269 }
4270}
4271
4272void CollisionDetection::setStaticObstacles(const std::vector<uint> &obstacle_primitives) {
4273 static_obstacle_primitives.clear();
4274
4275 // Validate and store static obstacles
4276 for (uint prim_uuid: obstacle_primitives) {
4277 if (context->doesPrimitiveExist(prim_uuid)) {
4278 static_obstacle_primitives.push_back(prim_uuid);
4279 }
4280 }
4281
4282 // Initialize spatial grid for fast obstacle lookup
4283 obstacle_spatial_grid_initialized = false;
4285}
4286
4287std::vector<uint> CollisionDetection::getRelevantGeometryForTree(const helios::vec3 &query_position, const std::vector<uint> &query_primitives, float max_distance) {
4288 std::vector<uint> relevant_geometry;
4289
4290 if (!tree_based_bvh_enabled) {
4291 // If tree-based BVH is disabled, return empty to signal using all geometry
4292 return relevant_geometry;
4293 }
4294
4295 // Use spatial grid for fast static obstacle lookup if available
4296 const float MAX_STATIC_OBSTACLE_DISTANCE = max_distance; // Use collision detection distance for static obstacles
4297
4298 if (obstacle_spatial_grid_initialized && !static_obstacle_primitives.empty()) {
4299 // Fast spatial grid lookup - O(1) instead of O(N)
4300 std::vector<uint> candidate_obstacles = obstacle_spatial_grid.getRelevantObstacles(query_position, MAX_STATIC_OBSTACLE_DISTANCE);
4301
4302
4303 // Still need distance check for precise filtering within grid cells
4304 for (uint static_prim: candidate_obstacles) {
4305 if (context->doesPrimitiveExist(static_prim)) {
4306 helios::vec3 min_corner, max_corner;
4307 context->getPrimitiveBoundingBox(static_prim, min_corner, max_corner);
4308 helios::vec3 prim_center = (min_corner + max_corner) * 0.5f;
4309 float distance = (query_position - prim_center).magnitude();
4310 if (distance < MAX_STATIC_OBSTACLE_DISTANCE) {
4311 relevant_geometry.push_back(static_prim);
4312 }
4313 }
4314 }
4315
4316 } else {
4317 // Fallback to linear search if spatial grid not available
4318 for (uint static_prim: static_obstacle_primitives) {
4319 if (context->doesPrimitiveExist(static_prim)) {
4320 helios::vec3 min_corner, max_corner;
4321 context->getPrimitiveBoundingBox(static_prim, min_corner, max_corner);
4322 helios::vec3 prim_center = (min_corner + max_corner) * 0.5f;
4323 float distance = (query_position - prim_center).magnitude();
4324 if (distance < MAX_STATIC_OBSTACLE_DISTANCE) {
4325 relevant_geometry.push_back(static_prim);
4326 }
4327 }
4328 }
4329 }
4330
4331 // Find which tree this query belongs to
4332 uint source_tree_id = 0;
4333 if (!query_primitives.empty()) {
4334 // Use the first query primitive to identify source tree
4335 auto it = object_to_tree_map.find(query_primitives[0]);
4336 if (it != object_to_tree_map.end()) {
4337 source_tree_id = it->second;
4338 }
4339 }
4340
4341 // If we couldn't identify the source tree, find the closest tree to query position
4342 if (source_tree_id == 0) {
4343 float min_distance = 1e30f;
4344 for (const auto &tree_pair: tree_bvh_map) {
4345 const TreeBVH &tree = tree_pair.second;
4346 float distance = (query_position - tree.tree_center).magnitude();
4347 if (distance < min_distance) {
4348 min_distance = distance;
4349 source_tree_id = tree.tree_object_id;
4350 }
4351 }
4352 }
4353
4354 // Add geometry from source tree and nearby trees within interaction distance
4355 for (const auto &tree_pair: tree_bvh_map) {
4356 const TreeBVH &tree = tree_pair.second;
4357 uint tree_id = tree_pair.first;
4358
4359 if (tree_id == source_tree_id) {
4360 // Always include source tree's own geometry
4361 for (uint prim_uuid: tree.primitive_indices) {
4362 if (context->doesPrimitiveExist(prim_uuid)) {
4363 relevant_geometry.push_back(prim_uuid);
4364 }
4365 }
4366 } else {
4367 // Check if other trees are within interaction distance
4368 float distance = (query_position - tree.tree_center).magnitude();
4369 float interaction_threshold = tree_isolation_distance + tree.tree_radius;
4370
4371 if (distance < interaction_threshold) {
4372 // Include nearby tree's geometry
4373 for (uint prim_uuid: tree.primitive_indices) {
4374 if (context->doesPrimitiveExist(prim_uuid)) {
4375 relevant_geometry.push_back(prim_uuid);
4376 }
4377 }
4378 }
4379 }
4380 }
4381
4382 return relevant_geometry;
4383}