1.3.64
 
Loading...
Searching...
No Matches
CollisionDetection.cpp
Go to the documentation of this file.
1
16#include "CollisionDetection.h"
17#include <atomic>
18#include <functional>
19#include <limits>
20#include <queue>
21
22#ifdef _OPENMP
23#include <omp.h>
24#endif
25
26// SIMD headers
27#ifdef __AVX2__
28#include <immintrin.h>
29#elif defined(__SSE4_1__)
30#include <smmintrin.h>
31#elif defined(__SSE2__)
32#include <emmintrin.h>
33#endif
34
35#ifdef HELIOS_CUDA_AVAILABLE
36#include <cuda_runtime.h>
37#endif
38
39using namespace helios;
40
41#ifdef HELIOS_CUDA_AVAILABLE
42// GPU BVH node structure (must match the one in .cu file)
43struct GPUBVHNode {
44 float3 aabb_min, aabb_max;
45 unsigned int left_child, right_child;
46 unsigned int primitive_start, primitive_count;
47 unsigned int is_leaf, padding;
48};
49
50// External CUDA functions
51extern "C" {
52void launchBVHTraversal(void *h_nodes, int node_count, unsigned int *h_primitive_indices, int primitive_count, float *h_primitive_aabb_min, float *h_primitive_aabb_max, float *h_query_aabb_min, float *h_query_aabb_max, int num_queries,
53 unsigned int *h_results, unsigned int *h_result_counts, int max_results_per_query);
54bool launchVoxelRayPathLengths(int num_rays, float *h_ray_origins, float *h_ray_directions, float grid_center_x, float grid_center_y, float grid_center_z, float grid_size_x, float grid_size_y, float grid_size_z, int grid_divisions_x,
55 int grid_divisions_y, int grid_divisions_z, int primitive_count, int *h_voxel_ray_counts, float *h_voxel_path_lengths, int *h_voxel_transmitted, int *h_voxel_hit_before, int *h_voxel_hit_after, int *h_voxel_hit_inside);
56// Warp-efficient GPU kernels
57void launchWarpEfficientBVH(void *h_bvh_soa_gpu, unsigned int *h_primitive_indices, int primitive_count, float *h_primitive_aabb_min, float *h_primitive_aabb_max, float *h_ray_origins, float *h_ray_directions, float *h_ray_max_distances,
58 int num_rays, unsigned int *h_results, unsigned int *h_result_counts, int max_results_per_ray);
59}
60
61// Helper function to convert helios::vec3 to float3
62inline float3 heliosVecToFloat3(const helios::vec3 &v) {
63 return make_float3(v.x, v.y, v.z);
64}
65#endif
66
68
69 if (a_context == nullptr) {
70 helios_runtime_error("ERROR (CollisionDetection::CollisionDetection): Context is null");
71 }
72
73 context = a_context;
74 printmessages = true;
75
76#ifdef HELIOS_CUDA_AVAILABLE
77 gpu_acceleration_enabled = true;
78#else
79 gpu_acceleration_enabled = false;
80#endif
81
82 // Initialize GPU memory pointers
83 d_bvh_nodes = nullptr;
84 d_primitive_indices = nullptr;
85 gpu_memory_allocated = false;
86
87 // Initialize BVH caching variables
88 bvh_dirty = true;
89 soa_dirty = true; // SoA needs initial build
90 automatic_bvh_rebuilds = true; // Default: allow automatic rebuilds
91
92 // Initialize hierarchical BVH variables
93 hierarchical_bvh_enabled = false;
94 static_bvh_valid = false;
95
96 // Initialize tree-based BVH variables
97 tree_based_bvh_enabled = false;
98 tree_isolation_distance = 5.0f; // Default 5 meter isolation distance
99 obstacle_spatial_grid_initialized = false;
100
101// Issue warning if OpenMP not available
102#ifndef _OPENMP
103 static bool openmp_warning_issued = false;
104 if (printmessages && !openmp_warning_issued) {
105 std::cout << "WARNING (CollisionDetection): OpenMP not available. Using serial CPU implementation. "
106 << "Performance will be significantly slower. Consider installing OpenMP for parallel execution." << std::endl;
107 openmp_warning_issued = true;
108 }
109#endif
110
111 // Initialize grid parameters
112 grid_center = make_vec3(0, 0, 0);
113 grid_size = make_vec3(1, 1, 1);
114 grid_divisions = helios::make_int3(1, 1, 1);
115
116 // Initialize voxel data parameters
117 voxel_data_initialized = false;
118 voxel_grid_center = make_vec3(0, 0, 0);
119 voxel_grid_size = make_vec3(1, 1, 1);
120 voxel_grid_divisions = helios::make_int3(1, 1, 1);
121 use_flat_arrays = false; // Default to false, enabled during initialization
122
123 // Initialize spatial optimization parameters
124 max_collision_distance = 10.0f; // Default 10 meter maximum distance
125}
126
128#ifdef HELIOS_CUDA_AVAILABLE
129 freeGPUMemory();
130#endif
131}
132
133std::vector<uint> CollisionDetection::findCollisions(uint UUID, bool allow_spatial_culling) {
134 return findCollisions(std::vector<uint>{UUID}, allow_spatial_culling);
135}
136
137std::vector<uint> CollisionDetection::findCollisions(const std::vector<uint> &UUIDs, bool allow_spatial_culling) {
138
140 warnings.setEnabled(printmessages);
141
142 if (UUIDs.empty()) {
143 warnings.addWarning("no_uuids_provided", "No UUIDs provided");
144 warnings.report(std::cerr);
145 return {};
146 }
147
148 // Validate UUIDs - throw exception if any are invalid
149 std::vector<uint> valid_UUIDs;
150 for (uint uuid: UUIDs) {
151 if (context->doesPrimitiveExist(uuid)) {
152 valid_UUIDs.push_back(uuid);
153 } else {
154 helios_runtime_error("ERROR (CollisionDetection::findCollisions): Invalid UUID " + std::to_string(uuid) + " provided");
155 }
156 }
157
158 // Automatically rebuild BVH if geometry has changed or BVH is empty
159 ensureBVHCurrent();
160
161 std::vector<uint> all_collisions;
162
163 for (uint UUID: valid_UUIDs) {
164
165 // Get bounding box for query primitive
166 if (!context->doesPrimitiveExist(UUID)) {
167 continue; // Skip invalid primitive
168 }
169 vec3 aabb_min, aabb_max;
170 context->getPrimitiveBoundingBox(UUID, aabb_min, aabb_max);
171
172 std::vector<uint> collisions;
173
174#ifdef HELIOS_CUDA_AVAILABLE
175 if (gpu_acceleration_enabled && gpu_memory_allocated) {
176 collisions = traverseBVH_GPU(aabb_min, aabb_max);
177 } else {
178 collisions = traverseBVH_CPU(aabb_min, aabb_max);
179 }
180#else
181 collisions = traverseBVH_CPU(aabb_min, aabb_max);
182#endif
183
184 // Remove the query UUID from results
185 collisions.erase(std::remove(collisions.begin(), collisions.end(), UUID), collisions.end());
186
187 // Add to overall results
188 all_collisions.insert(all_collisions.end(), collisions.begin(), collisions.end());
189 }
190
191 // Remove duplicates
192 std::sort(all_collisions.begin(), all_collisions.end());
193 all_collisions.erase(std::unique(all_collisions.begin(), all_collisions.end()), all_collisions.end());
194
195 return all_collisions;
196}
197
198std::vector<uint> CollisionDetection::findCollisions(const std::vector<uint> &primitive_UUIDs, const std::vector<uint> &object_IDs, bool allow_spatial_culling) {
199
201 warnings.setEnabled(printmessages);
202
203 if (primitive_UUIDs.empty() && object_IDs.empty()) {
204 warnings.addWarning("no_inputs_provided", "No UUIDs or object IDs provided");
205 warnings.report(std::cerr);
206 return {};
207 }
208
209 // Expand object IDs to their constituent primitive UUIDs
210 std::vector<uint> all_test_UUIDs = primitive_UUIDs;
211
212 for (uint ObjID: object_IDs) {
213 if (!context->doesObjectExist(ObjID)) {
214 helios_runtime_error("ERROR (CollisionDetection::findCollisions): Object ID " + std::to_string(ObjID) + " does not exist");
215 }
216
217 std::vector<uint> object_UUIDs = context->getObjectPrimitiveUUIDs(ObjID);
218 all_test_UUIDs.insert(all_test_UUIDs.end(), object_UUIDs.begin(), object_UUIDs.end());
219 }
220
221 return findCollisions(all_test_UUIDs, allow_spatial_culling);
222}
223
224std::vector<uint> CollisionDetection::findCollisions(const std::vector<uint> &query_UUIDs, const std::vector<uint> &query_object_IDs, const std::vector<uint> &target_UUIDs, const std::vector<uint> &target_object_IDs, bool allow_spatial_culling) {
225
227 warnings.setEnabled(printmessages);
228
229 if (query_UUIDs.empty() && query_object_IDs.empty()) {
230 warnings.addWarning("no_query_inputs", "No query UUIDs or object IDs provided");
231 warnings.report(std::cerr);
232 return {};
233 }
234
235 // Expand query objects to their constituent primitive UUIDs
236 std::vector<uint> all_query_UUIDs = query_UUIDs;
237
238 for (uint ObjID: query_object_IDs) {
239 if (!context->doesObjectExist(ObjID)) {
240 helios_runtime_error("ERROR (CollisionDetection::findCollisions): Query object ID " + std::to_string(ObjID) + " does not exist");
241 }
242
243 std::vector<uint> object_UUIDs = context->getObjectPrimitiveUUIDs(ObjID);
244 all_query_UUIDs.insert(all_query_UUIDs.end(), object_UUIDs.begin(), object_UUIDs.end());
245 }
246
247 // Validate query UUIDs
248 if (!validateUUIDs(all_query_UUIDs)) {
249 helios_runtime_error("ERROR (CollisionDetection::findCollisions): One or more invalid query UUIDs provided");
250 }
251
252 // Build restricted BVH if target geometry is specified
253
254 if (target_UUIDs.empty() && target_object_IDs.empty()) {
255 // Use all geometry in context as target
256 ensureBVHCurrent();
257 } else {
258 // Build restricted BVH with only target geometry
259 std::vector<uint> all_target_UUIDs = target_UUIDs;
260
261 for (uint ObjID: target_object_IDs) {
262 if (!context->doesObjectExist(ObjID)) {
263 helios_runtime_error("ERROR (CollisionDetection::findCollisions): Target object ID " + std::to_string(ObjID) + " does not exist");
264 }
265
266 std::vector<uint> object_UUIDs = context->getObjectPrimitiveUUIDs(ObjID);
267 all_target_UUIDs.insert(all_target_UUIDs.end(), object_UUIDs.begin(), object_UUIDs.end());
268 }
269
270 // OPTIMIZATION: Use per-tree BVH if enabled for better scaling
271 if (tree_based_bvh_enabled && allow_spatial_culling && !all_query_UUIDs.empty()) {
272 // Get tree-relevant geometry instead of using all targets
273 helios::vec3 query_center = helios::vec3(0, 0, 0);
274 if (!all_query_UUIDs.empty()) {
275 // Calculate approximate query center from first query primitive
276 helios::vec3 min_corner, max_corner;
277 context->getPrimitiveBoundingBox(all_query_UUIDs[0], min_corner, max_corner);
278 query_center = (min_corner + max_corner) * 0.5f;
279 }
280
281 // Use the configured tree isolation distance (collision cone height)
282 std::vector<uint> effective_targets = getRelevantGeometryForTree(query_center, all_query_UUIDs, tree_isolation_distance);
283
284 all_target_UUIDs = effective_targets;
285 }
286
287 // Validate target UUIDs
288 if (!all_target_UUIDs.empty() && !validateUUIDs(all_target_UUIDs)) {
289 helios_runtime_error("ERROR (CollisionDetection::findCollisions): One or more invalid target UUIDs provided");
290 }
291
292 // Build BVH with only the target geometry (with caching)
293 if (!all_target_UUIDs.empty()) {
294 updateBVH(all_target_UUIDs, false); // Use caching logic instead of direct rebuild
295 }
296 }
297
298 // Perform collision detection using the same logic as the standard findCollisions
299 std::vector<uint> all_collisions;
300
301 for (uint UUID: all_query_UUIDs) {
302
303 // Get bounding box for query primitive
304 if (!context->doesPrimitiveExist(UUID)) {
305 continue; // Skip invalid primitive
306 }
307 vec3 aabb_min, aabb_max;
308 context->getPrimitiveBoundingBox(UUID, aabb_min, aabb_max);
309
310 std::vector<uint> collisions;
311
312#ifdef HELIOS_CUDA_AVAILABLE
313 if (gpu_acceleration_enabled && gpu_memory_allocated) {
314 collisions = traverseBVH_GPU(aabb_min, aabb_max);
315 } else {
316 collisions = traverseBVH_CPU(aabb_min, aabb_max);
317 }
318#else
319 collisions = traverseBVH_CPU(aabb_min, aabb_max);
320#endif
321
322 // Remove the query UUID from results
323 collisions.erase(std::remove(collisions.begin(), collisions.end(), UUID), collisions.end());
324
325 // Add to overall results
326 all_collisions.insert(all_collisions.end(), collisions.begin(), collisions.end());
327 }
328
329 // Remove duplicates
330 std::sort(all_collisions.begin(), all_collisions.end());
331 all_collisions.erase(std::unique(all_collisions.begin(), all_collisions.end()), all_collisions.end());
332
333 warnings.report(std::cerr);
334 return all_collisions;
335}
336
337void CollisionDetection::buildBVH(const std::vector<uint> &UUIDs) {
338
339 // Create warning aggregator
341 warnings.setEnabled(printmessages);
342
343 std::vector<uint> primitives_to_include;
344
345 if (UUIDs.empty()) {
346 // Include all primitives in context
347 primitives_to_include = context->getAllUUIDs();
348 } else {
349 primitives_to_include = UUIDs;
350 }
351
352
353 if (primitives_to_include.empty()) {
354 warnings.addWarning("no_primitives_for_bvh", "No primitives found to build BVH");
355 warnings.report(std::cerr);
356 return;
357 }
358
359 // Validate UUIDs - throw exception if any are invalid (only when specific UUIDs are provided)
360 std::vector<uint> valid_primitives;
361 if (!UUIDs.empty()) {
362 // When specific UUIDs are provided, they must all be valid
363 for (uint uuid: primitives_to_include) {
364 if (context->doesPrimitiveExist(uuid)) {
365 valid_primitives.push_back(uuid);
366 } else {
367 helios_runtime_error("ERROR (CollisionDetection::buildBVH): Invalid UUID " + std::to_string(uuid) + " provided");
368 }
369 }
370 } else {
371 // When no specific UUIDs provided (use all), filter out invalid ones
372 for (uint uuid: primitives_to_include) {
373 if (context->doesPrimitiveExist(uuid)) {
374 valid_primitives.push_back(uuid);
375 } else {
376 warnings.addWarning("invalid_uuid_skipped", "Skipping invalid UUID " + std::to_string(uuid));
377 }
378 }
379
380 if (valid_primitives.empty()) {
381 warnings.addWarning("no_valid_primitives_after_filtering", "No valid primitives found after filtering");
382 warnings.report(std::cerr);
383 return;
384 }
385 }
386
387 primitives_to_include = valid_primitives;
388
389 // Check if the primitive set has actually changed before clearing cache
390 std::set<uint> new_primitive_set(primitives_to_include.begin(), primitives_to_include.end());
391 std::set<uint> old_primitive_set(primitive_indices.begin(), primitive_indices.end());
392
393 bool primitive_set_changed = (new_primitive_set != old_primitive_set);
394
395 if (primitive_set_changed) {
396 // Clear primitive cache only when primitive set changes - CRITICAL for performance
397 primitive_cache.clear();
398 }
399
400 // Clear existing BVH
401 bvh_nodes.clear();
402 primitive_indices.clear();
403
404 // Copy primitives to indices array
405 primitive_indices = primitives_to_include;
406
407 // Pre-allocate BVH nodes to avoid excessive resizing
408 // For N primitives, we need at most 2*N-1 nodes for a complete binary tree
409 size_t max_nodes = std::max(size_t(1), 2 * primitives_to_include.size());
410 bvh_nodes.clear();
411 bvh_nodes.resize(max_nodes); // Pre-allocate ALL nodes at once to avoid any resizing
412 next_available_node_index = 1; // Track next available node (0 is root)
413
414 // OPTIMIZATION: Pre-cache bounding boxes with dirty flagging to avoid repeated expensive calculations
415 // Only clear cache for primitives that no longer exist or are dirty
416 std::unordered_set<uint> current_primitives(primitives_to_include.begin(), primitives_to_include.end());
417
418 // Remove cached entries for primitives that no longer exist
419 auto cache_it = primitive_aabbs_cache.begin();
420 while (cache_it != primitive_aabbs_cache.end()) {
421 if (current_primitives.find(cache_it->first) == current_primitives.end()) {
422 cache_it = primitive_aabbs_cache.erase(cache_it);
423 } else {
424 ++cache_it;
425 }
426 }
427
428 // Update only dirty or missing cache entries
429 for (uint UUID: primitives_to_include) {
430 if (!context->doesPrimitiveExist(UUID)) {
431 continue; // Skip invalid primitive
432 }
433
434 // Only update if not cached or marked as dirty
435 bool needs_update = (primitive_aabbs_cache.find(UUID) == primitive_aabbs_cache.end()) || (dirty_primitive_cache.find(UUID) != dirty_primitive_cache.end());
436
437 if (needs_update) {
438 vec3 aabb_min, aabb_max;
439 context->getPrimitiveBoundingBox(UUID, aabb_min, aabb_max);
440 primitive_aabbs_cache[UUID] = {aabb_min, aabb_max};
441 dirty_primitive_cache.erase(UUID); // Mark as clean
442 }
443 }
444
445 // Build BVH recursively starting from root
446 buildBVHRecursive(0, 0, primitive_indices.size(), 0);
447
448 // Resize to actual nodes used (much more efficient than shrink_to_fit)
449 bvh_nodes.resize(next_available_node_index);
450
451 // Clear any stale optimized structures since we just rebuilt the BVH
452 // They will be rebuilt on demand by ensureOptimizedBVH() when needed
453 bvh_nodes_soa.clear();
454 bvh_nodes_soa.node_count = 0;
455
456 // Transfer to GPU if acceleration is enabled
457#ifdef HELIOS_CUDA_AVAILABLE
458 if (gpu_acceleration_enabled) {
459 transferBVHToGPU();
460 }
461#endif
462
463 // Update internal tracking - record what we have processed
464 // This allows us to avoid redundant rebuilds until user marks geometry clean
465 std::vector<uint> context_deleted_uuids = context->getDeletedUUIDs();
466
467 // Track ALL primitives that are now in the BVH, not just dirty ones
468 // This ensures isBVHValid() works correctly when new geometry is added
469 last_processed_uuids.clear();
470 last_processed_uuids.insert(primitives_to_include.begin(), primitives_to_include.end());
471
472 last_processed_deleted_uuids.clear();
473 last_processed_deleted_uuids.insert(context_deleted_uuids.begin(), context_deleted_uuids.end());
474
475 // Update BVH geometry tracking for caching
476 last_bvh_geometry.clear();
477 last_bvh_geometry.insert(primitives_to_include.begin(), primitives_to_include.end());
478 bvh_dirty = false;
479 soa_dirty = true; // SoA needs rebuild after BVH change
480
481 // Report aggregated warnings
482 warnings.report(std::cerr);
483}
484
486 markBVHDirty();
487 buildBVH();
488}
489
491 automatic_bvh_rebuilds = false;
492}
493
495 automatic_bvh_rebuilds = true;
496}
497
499 hierarchical_bvh_enabled = true;
500 static_bvh_valid = false; // Force rebuild of static BVH
501}
502
504 hierarchical_bvh_enabled = false;
505 // Clear static BVH data
506 static_bvh_nodes.clear();
507 static_bvh_primitives.clear();
508 static_bvh_valid = false;
509 last_static_bvh_geometry.clear();
510}
511
512void CollisionDetection::updateHierarchicalBVH(const std::set<uint> &requested_geometry, bool force_rebuild) {
513
514 // Step 1: Build/update static BVH if needed
515 if (!static_bvh_valid || force_rebuild || static_geometry_cache != last_static_bvh_geometry) {
517 }
518
519 // Step 2: Separate dynamic geometry (not in static cache)
520 std::vector<uint> dynamic_geometry;
521 for (uint uuid: requested_geometry) {
522 if (static_geometry_cache.find(uuid) == static_geometry_cache.end()) {
523 dynamic_geometry.push_back(uuid);
524 }
525 }
526
527
528 // Step 3: Build dynamic BVH with remaining geometry (this is smaller and faster)
529 if (!dynamic_geometry.empty()) {
530 buildBVH(dynamic_geometry); // Use existing buildBVH for dynamic part
531 } else {
532 // No dynamic geometry - just clear the dynamic BVH
533 bvh_nodes.clear();
534 primitive_indices.clear();
535 }
536
537 // Update cache
538 last_bvh_geometry = requested_geometry;
539 bvh_dirty = false;
540 soa_dirty = true; // SoA needs rebuild after BVH change
541}
542
544 if (static_geometry_cache.empty()) {
545 static_bvh_nodes.clear();
546 static_bvh_primitives.clear();
547 static_bvh_valid = false;
548 return;
549 }
550
551 std::vector<uint> static_primitives(static_geometry_cache.begin(), static_geometry_cache.end());
552
553
554 // Build BVH for static geometry (reuse existing GPU BVH building logic)
555 // For now, we'll store it in the static BVH structures but use same building method
556 std::vector<BVHNode> temp_nodes;
557 std::vector<uint> temp_primitives;
558
559 // Swap in static storage for building
560 std::swap(bvh_nodes, temp_nodes);
561 std::swap(primitive_indices, temp_primitives);
562
563 // Build BVH using existing method
564 buildBVH(static_primitives);
565
566 // Store result in static BVH and restore dynamic BVH
567 static_bvh_nodes = bvh_nodes;
568 static_bvh_primitives = primitive_indices;
569 std::swap(bvh_nodes, temp_nodes);
570 std::swap(primitive_indices, temp_primitives);
571
572 static_bvh_valid = true;
573 last_static_bvh_geometry = static_geometry_cache;
574}
575
576void CollisionDetection::updateBVH(const std::vector<uint> &UUIDs, bool force_rebuild) {
577 // Convert input to set for efficient comparison
578 std::set<uint> requested_geometry(UUIDs.begin(), UUIDs.end());
579
580 // Check if geometry has changed significantly
581 bool geometry_changed = (requested_geometry != last_bvh_geometry) || bvh_dirty;
582
583 if (!geometry_changed && !force_rebuild) {
584 return;
585 }
586
587 // Use hierarchical BVH approach if enabled
588 if (hierarchical_bvh_enabled) {
589 updateHierarchicalBVH(requested_geometry, force_rebuild);
590 return;
591 }
592
593 // Determine if we need a full rebuild or can do incremental update
594 if (force_rebuild || bvh_nodes.empty()) {
595 // Full rebuild required
596 buildBVH(UUIDs);
597 } else {
598 // Check how much geometry has changed
599 std::set<uint> added_geometry, removed_geometry;
600
601 // Find added geometry (in requested but not in last_bvh_geometry)
602 std::set_difference(requested_geometry.begin(), requested_geometry.end(), last_bvh_geometry.begin(), last_bvh_geometry.end(), std::inserter(added_geometry, added_geometry.begin()));
603
604 // Find removed geometry (in last_bvh_geometry but not in requested)
605 std::set_difference(last_bvh_geometry.begin(), last_bvh_geometry.end(), requested_geometry.begin(), requested_geometry.end(), std::inserter(removed_geometry, removed_geometry.begin()));
606
607 // If more than 20% of geometry changed, do full rebuild, otherwise incremental
608 size_t total_change = added_geometry.size() + removed_geometry.size();
609 size_t current_size = std::max(last_bvh_geometry.size(), requested_geometry.size());
610
611 // For plant growth, we want to favor incremental updates since they're mostly additions
612 // Use a more aggressive threshold that considers the type of changes
613 bool mostly_additions = (removed_geometry.size() < added_geometry.size() * 0.1f); // <10% removals
614 float change_threshold = mostly_additions ? 0.5f : 0.2f; // Higher threshold for growth scenarios
615
616 if (current_size == 0 || (float(total_change) / float(current_size)) > change_threshold) {
617 buildBVH(UUIDs);
618 } else {
619 // Implement incremental update by selective insertion/removal
620 incrementalUpdateBVH(added_geometry, removed_geometry, requested_geometry);
621 }
622 }
623
624 // Update tracking
625 last_bvh_geometry = requested_geometry;
626 bvh_dirty = false;
627 soa_dirty = true; // SoA needs rebuild after BVH change
628}
629
630void CollisionDetection::setStaticGeometry(const std::vector<uint> &UUIDs) {
631 static_geometry_cache.clear();
632 static_geometry_cache.insert(UUIDs.begin(), UUIDs.end());
633}
634
635void CollisionDetection::ensureBVHCurrent() {
636 // If automatic rebuilds are disabled, skip all automatic updates
637 if (!automatic_bvh_rebuilds) {
638 return;
639 }
640
641 // If BVH is completely empty, build it with all geometry
642 if (bvh_nodes.empty()) {
643 if (printmessages) {
644 std::cout << "Building initial BVH..." << std::endl;
645 }
646 buildBVH();
647 return;
648 }
649
650 // Use two-level dirty tracking pattern (similar to Visualizer plugin)
651 // Get dirty UUIDs from Context (but don't clear them - that's for the user)
652 std::vector<uint> context_dirty_uuids = context->getDirtyUUIDs(false); // Don't include deleted
653 std::vector<uint> context_deleted_uuids = context->getDeletedUUIDs();
654
655 // Convert to sets for efficient comparison
656 std::set<uint> current_dirty(context_dirty_uuids.begin(), context_dirty_uuids.end());
657 std::set<uint> current_deleted(context_deleted_uuids.begin(), context_deleted_uuids.end());
658
659 // Check if there are new changes since our last processing
660 bool has_new_dirty = false;
661 bool has_new_deleted = false;
662
663 // Check for new dirty UUIDs we haven't processed
664 for (uint uuid: current_dirty) {
665 if (last_processed_uuids.find(uuid) == last_processed_uuids.end()) {
666 has_new_dirty = true;
667 break;
668 }
669 }
670
671 // Check for new deleted UUIDs we haven't processed
672 for (uint uuid: current_deleted) {
673 if (last_processed_deleted_uuids.find(uuid) == last_processed_deleted_uuids.end()) {
674 has_new_deleted = true;
675 break;
676 }
677 }
678
679 // Only rebuild if there are genuinely new changes since our last processing
680 if (has_new_dirty || has_new_deleted) {
681 if (printmessages) {
682 std::cout << "Geometry has changed since last BVH build, rebuilding..." << std::endl;
683 }
684
685 buildBVH(); // This will update our internal tracking
686 }
687
688 // Note: We do NOT call context->markGeometryClean() here
689 // That should only be done by the user after all plugins have processed the changes
690}
691
693 if (bvh_nodes.empty()) {
694 return false;
695 }
696
697 // Check if there are new primitives in the context that aren't in our BVH
698 std::vector<uint> all_context_uuids = context->getAllUUIDs();
699 for (uint uuid: all_context_uuids) {
700 if (last_processed_uuids.find(uuid) == last_processed_uuids.end()) {
701 return false; // Found a primitive that's not in our BVH
702 }
703 }
704
705 // Check for deleted UUIDs we haven't processed
706 std::vector<uint> context_deleted_uuids = context->getDeletedUUIDs();
707 for (uint uuid: context_deleted_uuids) {
708 if (last_processed_deleted_uuids.find(uuid) == last_processed_deleted_uuids.end()) {
709 return false; // Found new deleted UUID we haven't processed
710 }
711 }
712
713 // Check if any primitives in our BVH have been deleted
714 for (uint uuid: primitive_indices) {
715 if (!context->doesPrimitiveExist(uuid)) {
716 return false; // A primitive in our BVH no longer exists
717 }
718 }
719
720 return true; // BVH is current with respect to geometry changes
721}
722
724#ifdef HELIOS_CUDA_AVAILABLE
725 gpu_acceleration_enabled = true;
726 if (!bvh_nodes.empty()) {
727 transferBVHToGPU();
728 }
729#else
730 if (printmessages) {
731 std::cerr << "WARNING: GPU acceleration requested but CUDA not available. Ignoring request." << std::endl;
732 }
733#endif
734}
735
737 gpu_acceleration_enabled = false;
738#ifdef HELIOS_CUDA_AVAILABLE
739 freeGPUMemory();
740#endif
741}
742
744 return gpu_acceleration_enabled;
745}
746
748 printmessages = false;
749}
750
752 printmessages = true;
753}
754
756 return primitive_indices.size();
757}
758
759void CollisionDetection::getBVHStatistics(size_t &node_count, size_t &leaf_count, size_t &max_depth) const {
760
761 node_count = bvh_nodes.size();
762 leaf_count = 0;
763 max_depth = 0;
764
765 // Simple traversal to count leaves and find max depth
766 std::function<void(uint, size_t)> traverse = [&](uint node_idx, size_t depth) {
767 if (node_idx >= bvh_nodes.size())
768 return;
769
770 const BVHNode &node = bvh_nodes[node_idx];
771 max_depth = std::max(max_depth, depth);
772
773 if (node.is_leaf) {
774 leaf_count++;
775 } else {
776 if (node.left_child != 0xFFFFFFFF) {
777 traverse(node.left_child, depth + 1);
778 }
779 if (node.right_child != 0xFFFFFFFF) {
780 traverse(node.right_child, depth + 1);
781 }
782 }
783 };
784
785 if (!bvh_nodes.empty()) {
786 traverse(0, 0);
787 }
788}
789
790void CollisionDetection::calculateAABB(const std::vector<uint> &primitives, vec3 &aabb_min, vec3 &aabb_max) const {
791
792 if (primitives.empty()) {
793 aabb_min = make_vec3(0, 0, 0);
794 aabb_max = make_vec3(0, 0, 0);
795 return;
796 }
797
798 // Find first valid primitive for initialization
799 size_t first_valid = 0;
800 while (first_valid < primitives.size() && !context->doesPrimitiveExist(primitives[first_valid])) {
801 first_valid++;
802 }
803 if (first_valid >= primitives.size()) {
804 // No valid primitives found
805 aabb_min = make_vec3(0, 0, 0);
806 aabb_max = make_vec3(0, 0, 0);
807 return;
808 }
809
810 // Initialize with first valid primitive's bounding box
811 context->getPrimitiveBoundingBox(primitives[first_valid], aabb_min, aabb_max);
812
813 // Expand to include all remaining valid primitives
814 for (size_t i = first_valid + 1; i < primitives.size(); i++) {
815 if (!context->doesPrimitiveExist(primitives[i])) {
816 continue; // Skip invalid primitive
817 }
818 vec3 prim_min, prim_max;
819 context->getPrimitiveBoundingBox(primitives[i], prim_min, prim_max);
820
821 aabb_min.x = std::min(aabb_min.x, prim_min.x);
822 aabb_min.y = std::min(aabb_min.y, prim_min.y);
823 aabb_min.z = std::min(aabb_min.z, prim_min.z);
824
825 aabb_max.x = std::max(aabb_max.x, prim_max.x);
826 aabb_max.y = std::max(aabb_max.y, prim_max.y);
827 aabb_max.z = std::max(aabb_max.z, prim_max.z);
828 }
829}
830
831void CollisionDetection::buildBVHRecursive(uint node_index, size_t primitive_start, size_t primitive_count, int depth) {
832
833 // Node should already be pre-allocated
834 if (node_index >= bvh_nodes.size()) {
835 throw std::runtime_error("CollisionDetection: BVH recursive access exceeded pre-allocated capacity");
836 }
837
838 // Bounds check for primitive_indices access
839 if (primitive_start + primitive_count > primitive_indices.size()) {
840 throw std::runtime_error("CollisionDetection: BVH primitive bounds check failed - primitive_start(" + std::to_string(primitive_start) + ") + primitive_count(" + std::to_string(primitive_count) + ") > primitive_indices.size(" +
841 std::to_string(primitive_indices.size()) + ")");
842 }
843
844 BVHNode &node = bvh_nodes[node_index];
845
846 // Calculate bounding box for this node using cached AABBs
847 if (primitive_count == 0) {
848 node.aabb_min = make_vec3(0, 0, 0);
849 node.aabb_max = make_vec3(0, 0, 0);
850 } else {
851 // Initialize with first primitive's cached AABB
852 uint first_uuid = primitive_indices[primitive_start];
853 auto it = primitive_aabbs_cache.find(first_uuid);
854 if (it == primitive_aabbs_cache.end()) {
855 // Handle missing primitive - use zero bounds
856 node.aabb_min = make_vec3(0, 0, 0);
857 node.aabb_max = make_vec3(0, 0, 0);
858 return;
859 }
860 const auto &first_cached_aabb = it->second;
861 node.aabb_min = first_cached_aabb.first;
862 node.aabb_max = first_cached_aabb.second;
863
864 // Expand to include all primitives in this node
865 for (size_t i = 1; i < primitive_count; i++) {
866 uint uuid = primitive_indices[primitive_start + i];
867 auto it = primitive_aabbs_cache.find(uuid);
868 if (it == primitive_aabbs_cache.end()) {
869 continue; // Skip missing primitive
870 }
871 const auto &cached_aabb = it->second;
872 node.aabb_min.x = std::min(node.aabb_min.x, cached_aabb.first.x);
873 node.aabb_min.y = std::min(node.aabb_min.y, cached_aabb.first.y);
874 node.aabb_min.z = std::min(node.aabb_min.z, cached_aabb.first.z);
875
876 node.aabb_max.x = std::max(node.aabb_max.x, cached_aabb.second.x);
877 node.aabb_max.y = std::max(node.aabb_max.y, cached_aabb.second.y);
878 node.aabb_max.z = std::max(node.aabb_max.z, cached_aabb.second.z);
879 }
880 }
881
882 // Stopping criteria - make this a leaf
883 // TEMPORARY: Very aggressive stopping for large scenes to prevent timeout
884 const int MAX_PRIMITIVES_PER_LEAF = (primitive_indices.size() > 500000) ? 500 : 100; // Much larger leaves for big scenes
885 const int MAX_DEPTH = (primitive_indices.size() > 500000) ? 6 : 10; // Shallower trees for big scenes
886
887 if (primitive_count <= MAX_PRIMITIVES_PER_LEAF || depth >= MAX_DEPTH) {
888 // Make leaf node
889 node.is_leaf = true;
890 node.primitive_start = primitive_start;
891 node.primitive_count = primitive_count;
892 node.left_child = 0xFFFFFFFF;
893 node.right_child = 0xFFFFFFFF;
894 return;
895 }
896
897 // Find best split using Surface Area Heuristic (simplified)
898 vec3 extent = node.aabb_max - node.aabb_min;
899 int split_axis = 0; // Split along longest axis
900 if (extent.y > extent.x)
901 split_axis = 1;
902 if (extent.z > (split_axis == 0 ? extent.x : extent.y))
903 split_axis = 2;
904
905 // In-place sort using pre-cached centroids to avoid temporary vector allocations
906 // This is critical for memory efficiency during recursive BVH construction
907 std::sort(primitive_indices.begin() + primitive_start, primitive_indices.begin() + primitive_start + primitive_count, [&](uint a, uint b) {
908 // Use cache lookup with bounds checking for safety
909 auto it_a = primitive_aabbs_cache.find(a);
910 auto it_b = primitive_aabbs_cache.find(b);
911 if (it_a == primitive_aabbs_cache.end() || it_b == primitive_aabbs_cache.end()) {
912 return a < b; // Fallback to UUID ordering for missing primitives
913 }
914
915 const auto &aabb_a = it_a->second;
916 const auto &aabb_b = it_b->second;
917
918 // Compute centroids inline to avoid vec3 temporaries
919 float centroid_a_coord, centroid_b_coord;
920 if (split_axis == 0) {
921 centroid_a_coord = 0.5f * (aabb_a.first.x + aabb_a.second.x);
922 centroid_b_coord = 0.5f * (aabb_b.first.x + aabb_b.second.x);
923 } else if (split_axis == 1) {
924 centroid_a_coord = 0.5f * (aabb_a.first.y + aabb_a.second.y);
925 centroid_b_coord = 0.5f * (aabb_b.first.y + aabb_b.second.y);
926 } else {
927 centroid_a_coord = 0.5f * (aabb_a.first.z + aabb_a.second.z);
928 centroid_b_coord = 0.5f * (aabb_b.first.z + aabb_b.second.z);
929 }
930
931 // Add stable tiebreaker using UUID to ensure deterministic BVH construction
932 // This prevents non-deterministic behavior when primitives have equal centroids
933 if (centroid_a_coord == centroid_b_coord) {
934 return a < b;
935 }
936 return centroid_a_coord < centroid_b_coord;
937 });
938
939 // Split in middle
940 size_t split_index = primitive_count / 2;
941
942 // Allocate child nodes from pre-allocated array (no resizing needed)
943 uint left_child_index = next_available_node_index++;
944 uint right_child_index = next_available_node_index++;
945
946 // Ensure we don't exceed pre-allocated capacity
947 if (right_child_index >= bvh_nodes.size()) {
948 throw std::runtime_error("CollisionDetection: BVH node allocation exceeded pre-calculated capacity");
949 }
950
951 // Re-get the node reference after potential reallocation
952 BVHNode &updated_node = bvh_nodes[node_index];
953 updated_node.left_child = left_child_index;
954 updated_node.right_child = right_child_index;
955 updated_node.is_leaf = false;
956 updated_node.primitive_start = 0;
957 updated_node.primitive_count = 0;
958
959 // Recursively build child nodes
960 buildBVHRecursive(left_child_index, primitive_start, split_index, depth + 1);
961 buildBVHRecursive(right_child_index, primitive_start + split_index, primitive_count - split_index, depth + 1);
962}
963
964std::vector<uint> CollisionDetection::traverseBVH_CPU(const vec3 &query_aabb_min, const vec3 &query_aabb_max) {
965
966 std::vector<uint> results;
967
968 if (bvh_nodes.empty()) {
969 return results;
970 }
971
972 // Stack-based traversal to avoid recursion
973 std::vector<uint> node_stack;
974 node_stack.push_back(0); // Start with root node
975
976 while (!node_stack.empty()) {
977 uint node_idx = node_stack.back();
978 node_stack.pop_back();
979
980 if (node_idx >= bvh_nodes.size())
981 continue;
982
983 const BVHNode &node = bvh_nodes[node_idx];
984
985 // Test if query AABB intersects node AABB
986 if (!aabbIntersect(query_aabb_min, query_aabb_max, node.aabb_min, node.aabb_max)) {
987 continue;
988 }
989
990 if (node.is_leaf) {
991 // Check each primitive in this leaf for intersection
992 for (uint i = 0; i < node.primitive_count; i++) {
993 uint primitive_id = primitive_indices[node.primitive_start + i];
994
995 // Get this primitive's AABB
996 if (!context->doesPrimitiveExist(primitive_id)) {
997 continue; // Skip invalid primitive
998 }
999 vec3 prim_min, prim_max;
1000 context->getPrimitiveBoundingBox(primitive_id, prim_min, prim_max);
1001
1002 // Only add to results if AABBs actually intersect
1003 if (aabbIntersect(query_aabb_min, query_aabb_max, prim_min, prim_max)) {
1004 results.push_back(primitive_id);
1005 }
1006 }
1007 } else {
1008 // Add child nodes to stack for processing
1009 if (node.left_child != 0xFFFFFFFF) {
1010 node_stack.push_back(node.left_child);
1011 }
1012 if (node.right_child != 0xFFFFFFFF) {
1013 node_stack.push_back(node.right_child);
1014 }
1015 }
1016 }
1017
1018 return results;
1019}
1020
1021#ifdef HELIOS_CUDA_AVAILABLE
1022std::vector<uint> CollisionDetection::traverseBVH_GPU(const vec3 &query_aabb_min, const vec3 &query_aabb_max) {
1023 if (!gpu_memory_allocated) {
1024 helios_runtime_error("ERROR: GPU traversal requested but GPU memory is not allocated. Call buildBVH() or transferBVHToGPU() first.");
1025 }
1026
1027 // Prepare single query
1028 float query_min_array[3] = {query_aabb_min.x, query_aabb_min.y, query_aabb_min.z};
1029 float query_max_array[3] = {query_aabb_max.x, query_aabb_max.y, query_aabb_max.z};
1030
1031 // Prepare primitive AABB arrays
1032 std::vector<float> primitive_min_array(primitive_indices.size() * 3);
1033 std::vector<float> primitive_max_array(primitive_indices.size() * 3);
1034
1035 for (size_t i = 0; i < primitive_indices.size(); i++) {
1036 uint uuid = primitive_indices[i];
1037 auto it = primitive_aabbs_cache.find(uuid);
1038 if (it == primitive_aabbs_cache.end()) {
1039 continue; // Skip missing primitive
1040 }
1041 const auto &cached_aabb = it->second;
1042
1043 primitive_min_array[i * 3] = cached_aabb.first.x;
1044 primitive_min_array[i * 3 + 1] = cached_aabb.first.y;
1045 primitive_min_array[i * 3 + 2] = cached_aabb.first.z;
1046
1047 primitive_max_array[i * 3] = cached_aabb.second.x;
1048 primitive_max_array[i * 3 + 1] = cached_aabb.second.y;
1049 primitive_max_array[i * 3 + 2] = cached_aabb.second.z;
1050 }
1051
1052 const int max_results = 1000; // Reasonable limit
1053 std::vector<unsigned int> results(max_results);
1054 unsigned int result_count = 0;
1055
1056 // Call CUDA kernel wrapper
1057 launchBVHTraversal(d_bvh_nodes, bvh_nodes.size(), d_primitive_indices, primitive_indices.size(), primitive_min_array.data(), primitive_max_array.data(), query_min_array, query_max_array, 1, results.data(), &result_count, max_results);
1058
1059 // Convert to return format
1060 std::vector<uint> final_results;
1061 for (unsigned int i = 0; i < result_count; i++) {
1062 final_results.push_back(results[i]);
1063 }
1064
1065 return final_results;
1066}
1067#endif
1068
1069bool CollisionDetection::aabbIntersect(const vec3 &min1, const vec3 &max1, const vec3 &min2, const vec3 &max2) {
1070 return (min1.x <= max2.x && max1.x >= min2.x) && (min1.y <= max2.y && max1.y >= min2.y) && (min1.z <= max2.z && max1.z >= min2.z);
1071}
1072
1073bool CollisionDetection::rayAABBIntersect(const vec3 &origin, const vec3 &direction, const vec3 &aabb_min, const vec3 &aabb_max, float &t_min, float &t_max) const {
1074
1075 t_min = 0.0f;
1076 t_max = std::numeric_limits<float>::max();
1077
1078 // Check intersection with each pair of parallel planes (X, Y, Z)
1079 for (int i = 0; i < 3; i++) {
1080 float dir_component = (i == 0) ? direction.x : (i == 1) ? direction.y : direction.z;
1081 float orig_component = (i == 0) ? origin.x : (i == 1) ? origin.y : origin.z;
1082 float min_component = (i == 0) ? aabb_min.x : (i == 1) ? aabb_min.y : aabb_min.z;
1083 float max_component = (i == 0) ? aabb_max.x : (i == 1) ? aabb_max.y : aabb_max.z;
1084
1085 if (std::abs(dir_component) < 1e-9f) {
1086 // Ray is parallel to the slab
1087 if (orig_component < min_component || orig_component > max_component) {
1088 return false; // Ray is outside the slab and parallel to it
1089 }
1090 } else {
1091 // Compute intersection t values with the two planes
1092 float t1 = (min_component - orig_component) / dir_component;
1093 float t2 = (max_component - orig_component) / dir_component;
1094
1095 // Make sure t1 is the near intersection and t2 is the far intersection
1096 if (t1 > t2) {
1097 std::swap(t1, t2);
1098 }
1099
1100 // Update the overall near and far intersection parameters
1101 t_min = std::max(t_min, t1);
1102 t_max = std::min(t_max, t2);
1103
1104 // If the near intersection is farther than the far intersection, no intersection
1105 if (t_min > t_max) {
1106 return false;
1107 }
1108 }
1109 }
1110
1111 // Ray intersects AABB if t_min <= t_max and the intersection is in front of ray origin
1112 return t_max >= 0.0f;
1113}
1114
1115bool CollisionDetection::coneAABBIntersect(const Cone &cone, const vec3 &aabb_min, const vec3 &aabb_max) {
1116 // Check if apex is inside AABB
1117 if (cone.apex.x >= aabb_min.x && cone.apex.x <= aabb_max.x && cone.apex.y >= aabb_min.y && cone.apex.y <= aabb_max.y && cone.apex.z >= aabb_min.z && cone.apex.z <= aabb_max.z) {
1118 return true; // Apex is inside AABB, definite intersection
1119 }
1120
1121 // Early rejection using bounding sphere around AABB
1122 vec3 box_center = 0.5f * (aabb_min + aabb_max);
1123 vec3 box_half_extents = 0.5f * (aabb_max - aabb_min);
1124 float box_radius = box_half_extents.magnitude();
1125
1126 // For infinite cone, check if any AABB corner is inside the cone
1127 if (cone.height <= 0.0f) {
1128 // Check all 8 corners of the AABB
1129 for (int i = 0; i < 8; i++) {
1130 vec3 corner = make_vec3((i & 1) ? aabb_max.x : aabb_min.x, (i & 2) ? aabb_max.y : aabb_min.y, (i & 4) ? aabb_max.z : aabb_min.z);
1131
1132 // Vector from apex to corner
1133 vec3 apex_to_corner = corner - cone.apex;
1134 float distance_along_axis = apex_to_corner * cone.axis;
1135
1136 // Point must be in front of apex
1137 if (distance_along_axis > 0) {
1138 // Check if point is within cone angle
1139 float cos_angle = distance_along_axis / apex_to_corner.magnitude();
1140 if (cos_angle >= cosf(cone.half_angle)) {
1141 return true; // This corner is inside the cone
1142 }
1143 }
1144 }
1145
1146 // Check if cone axis intersects AABB
1147 // Use ray-AABB intersection with cone axis as ray
1148 float t_min = 0.0f;
1149 float t_max = std::numeric_limits<float>::max();
1150
1151 for (int i = 0; i < 3; i++) {
1152 float axis_component = (i == 0) ? cone.axis.x : (i == 1) ? cone.axis.y : cone.axis.z;
1153 float apex_component = (i == 0) ? cone.apex.x : (i == 1) ? cone.apex.y : cone.apex.z;
1154 float min_component = (i == 0) ? aabb_min.x : (i == 1) ? aabb_min.y : aabb_min.z;
1155 float max_component = (i == 0) ? aabb_max.x : (i == 1) ? aabb_max.y : aabb_max.z;
1156
1157 if (std::abs(axis_component) < 1e-6f) {
1158 // Ray is parallel to slab
1159 if (apex_component < min_component || apex_component > max_component) {
1160 return false; // Ray is outside slab
1161 }
1162 } else {
1163 // Compute intersection t values
1164 float t1 = (min_component - apex_component) / axis_component;
1165 float t2 = (max_component - apex_component) / axis_component;
1166
1167 if (t1 > t2)
1168 std::swap(t1, t2);
1169
1170 t_min = std::max(t_min, t1);
1171 t_max = std::min(t_max, t2);
1172
1173 if (t_min > t_max) {
1174 return false; // No intersection
1175 }
1176 }
1177 }
1178
1179 // If we get here, the cone axis intersects the AABB
1180 // But for narrow cones, we need to check if the AABB is within the cone angle
1181 if (t_min >= 0 && t_max >= 0) {
1182 // Check if the intersection region on the axis is within the cone angle
1183 // Find the closest point on the axis within the intersection region
1184 float t_check = std::max(0.0f, t_min);
1185 vec3 axis_point = cone.apex + cone.axis * t_check;
1186
1187 // Find the closest point in the AABB to this axis point
1188 vec3 closest_in_box = make_vec3(std::max(aabb_min.x, std::min(axis_point.x, aabb_max.x)), std::max(aabb_min.y, std::min(axis_point.y, aabb_max.y)), std::max(aabb_min.z, std::min(axis_point.z, aabb_max.z)));
1189
1190 // Check if this closest point is within the cone
1191 vec3 apex_to_point = closest_in_box - cone.apex;
1192 float distance_along_axis = apex_to_point * cone.axis;
1193
1194 if (distance_along_axis > 0) {
1195 float distance_to_point = apex_to_point.magnitude();
1196 if (distance_to_point > 0) {
1197 float cos_angle = distance_along_axis / distance_to_point;
1198 if (cos_angle >= cosf(cone.half_angle)) {
1199 return true;
1200 }
1201 }
1202 }
1203 }
1204 } else {
1205 // Finite cone case
1206 // Check if any AABB corner is inside the finite cone
1207 for (int i = 0; i < 8; i++) {
1208 vec3 corner = make_vec3((i & 1) ? aabb_max.x : aabb_min.x, (i & 2) ? aabb_max.y : aabb_min.y, (i & 4) ? aabb_max.z : aabb_min.z);
1209
1210 // Vector from apex to corner
1211 vec3 apex_to_corner = corner - cone.apex;
1212 float distance_along_axis = apex_to_corner * cone.axis;
1213
1214 // Check if point is within cone height and in front of apex
1215 if (distance_along_axis > 0 && distance_along_axis <= cone.height) {
1216 // Check if point is within cone angle
1217 float cos_angle = distance_along_axis / apex_to_corner.magnitude();
1218 if (cos_angle >= cosf(cone.half_angle)) {
1219 return true; // This corner is inside the cone
1220 }
1221 }
1222 }
1223
1224 // Also need to check if cone base intersects AABB
1225 vec3 base_center = cone.apex + cone.axis * cone.height;
1226 float base_radius = cone.height * tanf(cone.half_angle);
1227
1228 // Simple sphere-AABB check for cone base
1229 vec3 closest_point = make_vec3(std::max(aabb_min.x, std::min(base_center.x, aabb_max.x)), std::max(aabb_min.y, std::min(base_center.y, aabb_max.y)), std::max(aabb_min.z, std::min(base_center.z, aabb_max.z)));
1230
1231 float dist_sq = (closest_point - base_center).magnitude();
1232 if (dist_sq <= base_radius) {
1233 return true; // Base circle intersects AABB
1234 }
1235 }
1236
1237 // More sophisticated tests could be added here for edge cases
1238 // For now, return false if no intersection found
1239 return false;
1240}
1241
1242bool CollisionDetection::coneAABBIntersectFast(const Cone &cone, const vec3 &aabb_min, const vec3 &aabb_max) {
1243 // Fast-path cone-AABB intersection optimized for high-throughput filtering
1244 // Uses aggressive early rejection to eliminate 99%+ of geometry with minimal computation
1245
1246 // Fast rejection test 1: Check if apex is inside AABB (very common case, worth checking first)
1247 if (cone.apex.x >= aabb_min.x && cone.apex.x <= aabb_max.x && cone.apex.y >= aabb_min.y && cone.apex.y <= aabb_max.y && cone.apex.z >= aabb_min.z && cone.apex.z <= aabb_max.z) {
1248 return true; // Apex inside AABB - definite intersection
1249 }
1250
1251 // Fast rejection test 2: Behind-apex test (eliminates geometry behind cone)
1252 vec3 box_center = 0.5f * (aabb_min + aabb_max);
1253 vec3 apex_to_center = box_center - cone.apex;
1254 float distance_along_axis = apex_to_center * cone.axis;
1255
1256 if (distance_along_axis <= 0.0f) {
1257 return false; // AABB is completely behind cone apex
1258 }
1259
1260 // Fast rejection test 3: Height test for finite cones
1261 if (cone.height > 0.0f && distance_along_axis > cone.height) {
1262 // Box center is beyond cone height, but we need to check if any part of box is within height
1263 vec3 box_half_extents = 0.5f * (aabb_max - aabb_min);
1264 float box_radius = box_half_extents.magnitude();
1265 if (distance_along_axis - box_radius > cone.height) {
1266 return false; // Entire AABB is beyond cone height
1267 }
1268 }
1269
1270 // Fast rejection test 4: Cone angle bounding sphere test (fast approximation)
1271 float max_distance = (cone.height > 0.0f) ? cone.height : distance_along_axis;
1272 float max_radius_at_distance = max_distance * tanf(cone.half_angle);
1273
1274 // Find closest point on cone axis to box center
1275 vec3 axis_point = cone.apex + cone.axis * distance_along_axis;
1276 float distance_from_axis = (box_center - axis_point).magnitude();
1277
1278 // Conservative bounding sphere test
1279 vec3 box_half_extents = 0.5f * (aabb_max - aabb_min);
1280 float box_radius = box_half_extents.magnitude();
1281
1282 if (distance_from_axis > max_radius_at_distance + box_radius) {
1283 return false; // AABB is completely outside cone's maximum radius
1284 }
1285
1286 // If we get here, AABB might intersect cone - fall back to precise test
1287 // This should only happen for a small percentage of AABBs
1288 return coneAABBIntersect(cone, aabb_min, aabb_max);
1289}
1290
1291// -------- RASTERIZATION-BASED COLLISION DETECTION IMPLEMENTATION --------
1292
1293int CollisionDetection::calculateOptimalBinCount(float cone_half_angle, int geometry_count) {
1294 // Target: ~1 degree angular resolution, scaled by cone size
1295 float base_resolution = M_PI / 180.0f; // 1 degree in radians
1296 float cone_solid_angle = 2.0f * M_PI * (1.0f - cosf(cone_half_angle));
1297 int optimal_bins = (int) (cone_solid_angle / (base_resolution * base_resolution));
1298
1299 // Clamp based on geometry complexity and performance
1300 int min_bins = 64; // Always enough resolution for gap detection
1301 int max_bins = std::min(1024, geometry_count * 4); // Don't exceed geometry complexity
1302
1303 return std::clamp(optimal_bins, min_bins, max_bins);
1304}
1305
1306std::vector<uint> CollisionDetection::filterPrimitivesParallel(const Cone &cone, const std::vector<uint> &primitive_uuids) {
1307 std::vector<uint> filtered_uuids;
1308
1309 if (primitive_uuids.empty()) {
1310 return filtered_uuids;
1311 }
1312
1313 // Reserve space for result vector
1314 filtered_uuids.reserve(primitive_uuids.size() / 10); // Estimate ~10% will pass filter
1315
1316#ifdef _OPENMP
1317 // Use thread-local vectors to avoid synchronization overhead
1318 const int num_threads = omp_get_max_threads();
1319 std::vector<std::vector<uint>> thread_results(num_threads);
1320
1321 // Pre-allocate thread-local storage
1322 for (int i = 0; i < num_threads; i++) {
1323 thread_results[i].reserve(primitive_uuids.size() / (num_threads * 10));
1324 }
1325
1326// Always parallelize AABB filtering (high value, low overhead)
1327#pragma omp parallel
1328 {
1329 int thread_id = omp_get_thread_num();
1330 std::vector<uint> &local_results = thread_results[thread_id];
1331
1332#pragma omp for nowait
1333 for (int i = 0; i < static_cast<int>(primitive_uuids.size()); i++) {
1334 uint uuid = primitive_uuids[i];
1335
1336 // Get primitive vertices and calculate AABB
1337 if (context->doesPrimitiveExist(uuid)) {
1338 std::vector<vec3> vertices = context->getPrimitiveVertices(uuid);
1339 if (!vertices.empty()) {
1340 // Calculate AABB from vertices
1341 vec3 aabb_min = vertices[0];
1342 vec3 aabb_max = vertices[0];
1343 for (const vec3 &vertex: vertices) {
1344 aabb_min = make_vec3(std::min(aabb_min.x, vertex.x), std::min(aabb_min.y, vertex.y), std::min(aabb_min.z, vertex.z));
1345 aabb_max = make_vec3(std::max(aabb_max.x, vertex.x), std::max(aabb_max.y, vertex.y), std::max(aabb_max.z, vertex.z));
1346 }
1347
1348 // Use fast cone-AABB intersection test
1349 if (coneAABBIntersectFast(cone, aabb_min, aabb_max)) {
1350 local_results.push_back(uuid);
1351 }
1352 }
1353 }
1354 }
1355 }
1356
1357 // Merge results from all threads
1358 size_t total_count = 0;
1359 for (const auto &thread_result: thread_results) {
1360 total_count += thread_result.size();
1361 }
1362
1363 filtered_uuids.reserve(total_count);
1364 for (const auto &thread_result: thread_results) {
1365 filtered_uuids.insert(filtered_uuids.end(), thread_result.begin(), thread_result.end());
1366 }
1367#else
1368 // Serial fallback when OpenMP is not available
1369 for (size_t i = 0; i < primitive_uuids.size(); i++) {
1370 uint uuid = primitive_uuids[i];
1371
1372 // Get primitive vertices and calculate AABB
1373 if (context->doesPrimitiveExist(uuid)) {
1374 std::vector<vec3> vertices = context->getPrimitiveVertices(uuid);
1375 if (!vertices.empty()) {
1376 // Calculate AABB from vertices
1377 vec3 aabb_min = vertices[0];
1378 vec3 aabb_max = vertices[0];
1379 for (const vec3 &vertex: vertices) {
1380 aabb_min = make_vec3(std::min(aabb_min.x, vertex.x), std::min(aabb_min.y, vertex.y), std::min(aabb_min.z, vertex.z));
1381 aabb_max = make_vec3(std::max(aabb_max.x, vertex.x), std::max(aabb_max.y, vertex.y), std::max(aabb_max.z, vertex.z));
1382 }
1383
1384 // Use fast cone-AABB intersection test
1385 if (coneAABBIntersectFast(cone, aabb_min, aabb_max)) {
1386 filtered_uuids.push_back(uuid);
1387 }
1388 }
1389 }
1390 }
1391#endif
1392
1393 return filtered_uuids;
1394}
1395
1396float CollisionDetection::cartesianToSphericalCone(const vec3 &vector, const vec3 &cone_axis, float &theta, float &phi) {
1397 float distance = vector.magnitude();
1398 if (distance < 1e-6f) {
1399 theta = 0.0f;
1400 phi = 0.0f;
1401 return 0.0f;
1402 }
1403
1404 vec3 normalized_vector = vector / distance;
1405
1406 // Calculate phi (polar angle from cone axis)
1407 float cos_phi = normalized_vector * cone_axis;
1408 phi = acosf(std::clamp(cos_phi, -1.0f, 1.0f));
1409
1410 // Calculate theta (azimuthal angle around cone axis)
1411 // Create orthonormal basis from cone axis
1412 vec3 up = (abs(cone_axis.z) < 0.999f) ? make_vec3(0, 0, 1) : make_vec3(1, 0, 0);
1413 vec3 right = cross(cone_axis, up);
1414 right.normalize();
1415 vec3 forward = cross(right, cone_axis);
1416
1417 // Project vector onto perpendicular plane
1418 vec3 projected = normalized_vector - cone_axis * cos_phi;
1419 if (projected.magnitude() > 1e-6f) {
1420 projected.normalize();
1421 float cos_theta = projected * right;
1422 float sin_theta = projected * forward;
1423 theta = atan2f(sin_theta, cos_theta);
1424 if (theta < 0.0f)
1425 theta += 2.0f * M_PI; // Ensure [0, 2π]
1426 } else {
1427 theta = 0.0f; // Vector is along cone axis
1428 }
1429
1430 return distance;
1431}
1432
1433bool CollisionDetection::sphericalCoordsToBinIndices(float theta, float phi, const AngularBins &bins, int &theta_bin, int &phi_bin) {
1434 // Check if angles are within valid cone range
1435 if (phi < 0.0f || theta < 0.0f || theta >= 2.0f * M_PI) {
1436 return false;
1437 }
1438
1439 // Map to bin indices
1440 theta_bin = (int) (theta * bins.theta_divisions / (2.0f * M_PI));
1441 phi_bin = (int) (phi * bins.phi_divisions / M_PI); // Assume phi range is [0, PI] for full sphere
1442
1443 // Clamp to valid ranges
1444 theta_bin = std::clamp(theta_bin, 0, bins.theta_divisions - 1);
1445 phi_bin = std::clamp(phi_bin, 0, bins.phi_divisions - 1);
1446
1447 return true;
1448}
1449
1450void CollisionDetection::projectGeometryToBins(const Cone &cone, const std::vector<uint> &filtered_uuids, AngularBins &bins) {
1451 bins.clear();
1452
1453 const int PARALLEL_THRESHOLD = 500; // Empirically determined threshold
1454
1455 if (filtered_uuids.size() > PARALLEL_THRESHOLD) {
1456// Conditional parallel projection for large geometry sets
1457#ifdef _OPENMP
1458 const int num_threads = omp_get_max_threads();
1459 std::vector<AngularBins> thread_bins(num_threads, AngularBins(bins.theta_divisions, bins.phi_divisions));
1460
1461#pragma omp parallel
1462 {
1463 int thread_id = omp_get_thread_num();
1464 AngularBins &local_bins = thread_bins[thread_id];
1465
1466#pragma omp for nowait
1467 for (int i = 0; i < static_cast<int>(filtered_uuids.size()); i++) {
1468 uint uuid = filtered_uuids[i];
1469
1470 if (context->doesPrimitiveExist(uuid)) {
1471 std::vector<vec3> vertices = context->getPrimitiveVertices(uuid);
1472
1473 // Project each vertex to spherical coordinates
1474 for (const vec3 &vertex: vertices) {
1475 vec3 apex_to_vertex = vertex - cone.apex;
1476 float distance = apex_to_vertex.magnitude();
1477
1478 if (distance > 1e-6f) {
1479 float theta, phi;
1480 cartesianToSphericalCone(apex_to_vertex, cone.axis, theta, phi);
1481
1482 // Skip if outside cone angle
1483 if (phi <= cone.half_angle) {
1484 int theta_bin, phi_bin;
1485 if (sphericalCoordsToBinIndices(theta, phi, local_bins, theta_bin, phi_bin)) {
1486 local_bins.setCovered(theta_bin, phi_bin, distance);
1487 }
1488 }
1489 }
1490 }
1491 }
1492 }
1493 }
1494
1495 // Merge thread-local bins into global bins
1496 for (const auto &thread_bin: thread_bins) {
1497 for (int theta = 0; theta < bins.theta_divisions; theta++) {
1498 for (int phi = 0; phi < bins.phi_divisions; phi++) {
1499 if (thread_bin.isCovered(theta, phi)) {
1500 int index = theta * bins.phi_divisions + phi;
1501 float thread_depth = thread_bin.depth_values[index];
1502 bins.setCovered(theta, phi, thread_depth);
1503 }
1504 }
1505 }
1506 }
1507#else
1508 // Fallback to serial if OpenMP not available
1509 projectGeometryToBinsSerial(cone, filtered_uuids, bins);
1510#endif
1511 } else {
1512 // Serial projection for small geometry sets
1513 projectGeometryToBinsSerial(cone, filtered_uuids, bins);
1514 }
1515}
1516
1517void CollisionDetection::projectGeometryToBinsSerial(const Cone &cone, const std::vector<uint> &filtered_uuids, AngularBins &bins) {
1518 for (uint uuid: filtered_uuids) {
1519 if (context->doesPrimitiveExist(uuid)) {
1520 std::vector<vec3> vertices = context->getPrimitiveVertices(uuid);
1521
1522 // Project each vertex to spherical coordinates
1523 for (const vec3 &vertex: vertices) {
1524 vec3 apex_to_vertex = vertex - cone.apex;
1525 float distance = apex_to_vertex.magnitude();
1526
1527 if (distance > 1e-6f) {
1528 float theta, phi;
1529 cartesianToSphericalCone(apex_to_vertex, cone.axis, theta, phi);
1530
1531 // Skip if outside cone angle
1532 if (phi <= cone.half_angle) {
1533 int theta_bin, phi_bin;
1534 if (sphericalCoordsToBinIndices(theta, phi, bins, theta_bin, phi_bin)) {
1535 bins.setCovered(theta_bin, phi_bin, distance);
1536 }
1537 }
1538 }
1539 }
1540 }
1541 }
1542}
1543
1544std::vector<CollisionDetection::Gap> CollisionDetection::findGapsInCoverageMap(const AngularBins &bins, const Cone &cone) {
1545 std::vector<Gap> gaps;
1546
1547 // Connected component analysis to find contiguous free regions
1548 std::vector<std::vector<bool>> visited(bins.theta_divisions, std::vector<bool>(bins.phi_divisions, false));
1549
1550 for (int theta = 0; theta < bins.theta_divisions; theta++) {
1551 for (int phi = 0; phi < bins.phi_divisions; phi++) {
1552 if (!bins.isCovered(theta, phi) && !visited[theta][phi]) {
1553 // Found start of new gap - flood fill to find full extent
1554 Gap gap = floodFillGap(bins, theta, phi, visited, cone);
1555
1556 // Only keep gaps meeting minimum size thresholds
1557 const float MIN_GAP_SIZE_STERADIANS = 0.01f; // Minimum gap size
1558 if (gap.angular_size > MIN_GAP_SIZE_STERADIANS) {
1559 gaps.push_back(gap);
1560 }
1561 }
1562 }
1563 }
1564
1565 // Sort gaps by angular size (largest first)
1566 std::sort(gaps.begin(), gaps.end(), [](const Gap &a, const Gap &b) { return a.angular_size > b.angular_size; });
1567
1568 return gaps;
1569}
1570
1571CollisionDetection::Gap CollisionDetection::floodFillGap(const AngularBins &bins, int start_theta, int start_phi, std::vector<std::vector<bool>> &visited, const Cone &cone) {
1572 Gap gap;
1573 std::queue<std::pair<int, int>> queue;
1574 queue.push({start_theta, start_phi});
1575
1576 float total_solid_angle = 0;
1577 vec3 weighted_center(0, 0, 0);
1578
1579 while (!queue.empty()) {
1580 auto [theta, phi] = queue.front();
1581 queue.pop();
1582
1583 if (visited[theta][phi] || bins.isCovered(theta, phi))
1584 continue;
1585 visited[theta][phi] = true;
1586
1587 // Calculate solid angle contribution of this bin
1588 float bin_solid_angle = calculateBinSolidAngle(theta, phi, bins, cone.half_angle);
1589 total_solid_angle += bin_solid_angle;
1590
1591 // Accumulate weighted center direction
1592 vec3 bin_direction = binIndicesToCartesian(theta, phi, bins, cone);
1593 weighted_center = weighted_center + bin_direction * bin_solid_angle;
1594
1595 // Add unoccupied neighbors to queue
1596 addUnoccupiedNeighbors(theta, phi, bins, visited, queue);
1597 }
1598
1599 gap.angular_size = total_solid_angle;
1600 gap.center_direction = weighted_center.magnitude() > 1e-6f ? weighted_center.normalize() : cone.axis;
1601
1602 return gap;
1603}
1604
1605float CollisionDetection::calculateBinSolidAngle(int theta_bin, int phi_bin, const AngularBins &bins, float cone_half_angle) {
1606 // Calculate solid angle of a single bin
1607 float theta_step = 2.0f * M_PI / bins.theta_divisions;
1608 float phi_step = cone_half_angle / bins.phi_divisions;
1609
1610 // For small angles, solid angle ≈ θ_step × φ_step × sin(φ)
1611 float phi = (phi_bin + 0.5f) * phi_step;
1612 return theta_step * phi_step * sinf(phi);
1613}
1614
1615vec3 CollisionDetection::binIndicesToCartesian(int theta_bin, int phi_bin, const AngularBins &bins, const Cone &cone) {
1616 // Convert bin indices back to cartesian direction
1617 float theta = (theta_bin + 0.5f) * 2.0f * M_PI / bins.theta_divisions;
1618 float phi = (phi_bin + 0.5f) * cone.half_angle / bins.phi_divisions;
1619
1620 // Create orthonormal basis from cone axis (same as in cartesianToSphericalCone)
1621 vec3 up = (abs(cone.axis.z) < 0.999f) ? make_vec3(0, 0, 1) : make_vec3(1, 0, 0);
1622 vec3 right = cross(cone.axis, up);
1623 right.normalize();
1624 vec3 forward = cross(right, cone.axis);
1625
1626 // Convert spherical to cartesian
1627 float sin_phi = sinf(phi);
1628 float cos_phi = cosf(phi);
1629 float sin_theta = sinf(theta);
1630 float cos_theta = cosf(theta);
1631
1632 vec3 direction = cone.axis * cos_phi + (right * cos_theta + forward * sin_theta) * sin_phi;
1633 return direction.normalize();
1634}
1635
1636void CollisionDetection::addUnoccupiedNeighbors(int theta, int phi, const AngularBins &bins, std::vector<std::vector<bool>> &visited, std::queue<std::pair<int, int>> &queue) {
1637 // Add 4-connected neighbors (up, down, left, right in bin space)
1638 const int neighbors[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}};
1639
1640 for (int i = 0; i < 4; i++) {
1641 int new_theta = theta + neighbors[i][0];
1642 int new_phi = phi + neighbors[i][1];
1643
1644 // Handle theta wraparound (circular)
1645 if (new_theta < 0)
1646 new_theta += bins.theta_divisions;
1647 if (new_theta >= bins.theta_divisions)
1648 new_theta -= bins.theta_divisions;
1649
1650 // Check phi bounds (no wraparound)
1651 if (new_phi >= 0 && new_phi < bins.phi_divisions) {
1652 if (!visited[new_theta][new_phi] && !bins.isCovered(new_theta, new_phi)) {
1653 queue.push({new_theta, new_phi});
1654 }
1655 }
1656 }
1657}
1658
1659// -------- MAIN RASTERIZED FINDOPTIMALCONEPATH IMPLEMENTATION --------
1660
1661CollisionDetection::OptimalPathResult CollisionDetection::findOptimalConePath(const vec3 &apex, const vec3 &centralAxis, float half_angle, float height, int initialSamples) {
1662
1663 OptimalPathResult result;
1664 result.direction = centralAxis;
1665 result.direction.normalize();
1666 result.collisionCount = 0;
1667 result.confidence = 0.0f;
1668
1669 // Validate input parameters
1670 if (initialSamples <= 0 || half_angle <= 0.0f || half_angle > M_PI) {
1671 if (printmessages) {
1672 std::cerr << "WARNING: Invalid parameters for findOptimalConePath" << std::endl;
1673 }
1674 return result;
1675 }
1676
1677 if (bvh_nodes.empty()) {
1678 // No geometry to collide with, central axis is optimal
1679 result.confidence = 1.0f;
1680 return result;
1681 }
1682
1683 // Use original fish-eye camera gap detection algorithm
1684 std::vector<Gap> detected_gaps = detectGapsInCone(apex, centralAxis, half_angle, height, initialSamples);
1685
1686 if (detected_gaps.empty()) {
1687 // No gaps found, fall back to central axis
1688 // if (printmessages) {
1689 // std::cerr << "WARNING: No gaps detected in cone, using central axis" << std::endl;
1690 // }
1691 result.confidence = 0.1f;
1692 return result;
1693 }
1694
1695 // Score gaps using fish-eye metric
1696 scoreGapsByFishEyeMetric(detected_gaps, centralAxis);
1697
1698 // Find optimal direction toward highest-scoring gap
1699 result.direction = findOptimalGapDirection(detected_gaps, centralAxis);
1700
1701 // Count collisions along optimal direction for reporting using modern ray-tracing
1702 float max_distance = (height > 0.0f) ? height : -1.0f;
1703 HitResult direction_hit = castRay(apex, result.direction, max_distance);
1704 result.collisionCount = direction_hit.hit ? 1 : 0;
1705
1706 // Calculate confidence based on gap quality
1707 if (!detected_gaps.empty()) {
1708 // Higher confidence for larger, well-defined gaps
1709 const Gap &best_gap = detected_gaps[0]; // Assuming first is best after sorting
1710 result.confidence = std::min(1.0f, best_gap.angular_size * 10.0f); // Scale angular size to confidence
1711 }
1712
1713 return result;
1714}
1715
1716#ifdef HELIOS_CUDA_AVAILABLE
1717void CollisionDetection::allocateGPUMemory() {
1718 if (gpu_memory_allocated) {
1719 freeGPUMemory(); // Clean up existing allocation
1720 }
1721
1722 if (bvh_nodes.empty() || primitive_indices.empty()) {
1723 return; // Nothing to allocate
1724 }
1725
1726 // Initialize pointers to nullptr for safety
1727 d_bvh_nodes = nullptr;
1728 d_primitive_indices = nullptr;
1729
1730 // Check if CUDA is actually available at runtime
1731 int deviceCount = 0;
1732 cudaError_t err = cudaGetDeviceCount(&deviceCount);
1733 if (err != cudaSuccess || deviceCount == 0) {
1734 // CUDA not available - disable GPU acceleration and fall back to CPU
1735 if (printmessages) {
1736 std::cout << "WARNING (CollisionDetection::allocateGPUMemory): CUDA runtime unavailable (" << cudaGetErrorString(err) << "). Falling back to CPU-only mode." << std::endl;
1737 }
1738 gpu_acceleration_enabled = false;
1739 return;
1740 }
1741
1742 // Calculate sizes
1743 size_t bvh_size = bvh_nodes.size() * sizeof(GPUBVHNode);
1744 size_t indices_size = primitive_indices.size() * sizeof(uint);
1745
1746 // Validate sizes are reasonable
1747 if (bvh_size == 0 || indices_size == 0) {
1748 helios_runtime_error("ERROR: Invalid BVH or primitive data sizes for GPU allocation");
1749 }
1750
1751 // Allocate BVH nodes
1752 err = cudaMalloc(&d_bvh_nodes, bvh_size);
1753 if (err != cudaSuccess) {
1754 // GPU allocation failed - fall back to CPU instead of crashing
1755 if (printmessages) {
1756 std::cout << "WARNING (CollisionDetection::allocateGPUMemory): Failed to allocate GPU memory (" << cudaGetErrorString(err) << "). Falling back to CPU-only mode." << std::endl;
1757 }
1758 gpu_acceleration_enabled = false;
1759 return;
1760 }
1761
1762 // Allocate primitive indices
1763 err = cudaMalloc((void **) &d_primitive_indices, indices_size);
1764 if (err != cudaSuccess) {
1765 // GPU allocation failed - clean up and fall back to CPU
1766 cudaFree(d_bvh_nodes);
1767 d_bvh_nodes = nullptr;
1768 if (printmessages) {
1769 std::cout << "WARNING (CollisionDetection::allocateGPUMemory): Failed to allocate primitive indices (" << cudaGetErrorString(err) << "). Falling back to CPU-only mode." << std::endl;
1770 }
1771 gpu_acceleration_enabled = false;
1772 return;
1773 }
1774
1775 // Mark as allocated only after both allocations succeeded
1776 gpu_memory_allocated = true;
1777}
1778#endif
1779
1780#ifdef HELIOS_CUDA_AVAILABLE
1781void CollisionDetection::freeGPUMemory() {
1782 if (!gpu_memory_allocated)
1783 return;
1784
1785 if (d_bvh_nodes) {
1786 cudaFree(d_bvh_nodes);
1787 d_bvh_nodes = nullptr;
1788 }
1789
1790 if (d_primitive_indices) {
1791 cudaFree(d_primitive_indices);
1792 d_primitive_indices = nullptr;
1793 }
1794
1795 gpu_memory_allocated = false;
1796}
1797#endif
1798
1799#ifdef HELIOS_CUDA_AVAILABLE
1800void CollisionDetection::transferBVHToGPU() {
1801 if (!gpu_acceleration_enabled || bvh_nodes.empty()) {
1802 return;
1803 }
1804
1805 // Always reallocate GPU memory to handle size changes
1806 if (gpu_memory_allocated) {
1807 freeGPUMemory();
1808 }
1809 allocateGPUMemory();
1810
1811 // Re-check if GPU acceleration is still enabled after allocation attempt
1812 // allocateGPUMemory() may have disabled it due to lack of GPU hardware
1813 if (!gpu_acceleration_enabled) {
1814 return; // Gracefully fall back to CPU mode
1815 }
1816
1817 // Verify allocation succeeded
1818 if (!gpu_memory_allocated || d_bvh_nodes == nullptr || d_primitive_indices == nullptr) {
1819 helios_runtime_error("ERROR: Failed to allocate GPU memory for BVH transfer");
1820 }
1821
1822 // Convert CPU BVH to GPU format
1823 std::vector<GPUBVHNode> gpu_nodes(bvh_nodes.size());
1824 for (size_t i = 0; i < bvh_nodes.size(); i++) {
1825 const BVHNode &cpu_node = bvh_nodes[i];
1826 GPUBVHNode &gpu_node = gpu_nodes[i];
1827
1828 gpu_node.aabb_min = heliosVecToFloat3(cpu_node.aabb_min);
1829 gpu_node.aabb_max = heliosVecToFloat3(cpu_node.aabb_max);
1830 gpu_node.left_child = cpu_node.left_child;
1831 gpu_node.right_child = cpu_node.right_child;
1832 gpu_node.primitive_start = cpu_node.primitive_start;
1833 gpu_node.primitive_count = cpu_node.primitive_count;
1834 gpu_node.is_leaf = cpu_node.is_leaf ? 1 : 0;
1835 gpu_node.padding = 0;
1836 }
1837
1838 // Transfer to GPU
1839 cudaError_t err = cudaMemcpy(d_bvh_nodes, gpu_nodes.data(), gpu_nodes.size() * sizeof(GPUBVHNode), cudaMemcpyHostToDevice);
1840 if (err != cudaSuccess) {
1841 helios_runtime_error("CUDA error transferring BVH nodes: " + std::string(cudaGetErrorString(err)));
1842 }
1843
1844 err = cudaMemcpy(d_primitive_indices, primitive_indices.data(), primitive_indices.size() * sizeof(uint), cudaMemcpyHostToDevice);
1845 if (err != cudaSuccess) {
1846 helios_runtime_error("CUDA error transferring primitive indices: " + std::string(cudaGetErrorString(err)));
1847 }
1848}
1849#endif
1850
1851void CollisionDetection::markBVHDirty() {
1852 // Clear internal tracking so BVH will be rebuilt on next access
1853 last_processed_uuids.clear();
1854 last_processed_deleted_uuids.clear();
1855 last_bvh_geometry.clear();
1856 bvh_dirty = true;
1857
1858 // Note: Don't clear primitive_cache here - it will be cleared only when
1859 // buildBVH() detects actual primitive set changes, not just geometry updates
1860
1861 // Free GPU memory since BVH will be rebuilt
1862#ifdef HELIOS_CUDA_AVAILABLE
1863 freeGPUMemory();
1864#endif
1865}
1866
1867void CollisionDetection::incrementalUpdateBVH(const std::set<uint> &added_geometry, const std::set<uint> &removed_geometry, const std::set<uint> &final_geometry) {
1868
1869 // Validate new geometries exist first
1870 for (uint uuid: added_geometry) {
1871 if (!context->doesPrimitiveExist(uuid)) {
1872 if (printmessages) {
1873 std::cerr << "Warning: Added primitive " << uuid << " does not exist, falling back to full rebuild" << std::endl;
1874 }
1875 std::vector<uint> final_primitives(final_geometry.begin(), final_geometry.end());
1876 buildBVH(final_primitives);
1877 return;
1878 }
1879 }
1880
1881 // For plant growth scenarios, most changes are additions (new leaves/branches)
1882 // We can optimize for this by caching primitive AABBs and selective reconstruction
1883
1884 // Update primitive AABB cache for new primitives
1885 for (uint uuid: added_geometry) {
1886 updatePrimitiveAABBCache(uuid);
1887 }
1888
1889 // Remove old primitives from cache
1890 for (uint uuid: removed_geometry) {
1891 primitive_aabbs_cache.erase(uuid);
1892 }
1893
1894 // For incremental updates, we use a two-stage approach:
1895 // 1. If the number of changes is small relative to tree size, do targeted insertion
1896 // 2. Otherwise, do optimized rebuild with cached AABBs
1897
1898 size_t total_changes = added_geometry.size() + removed_geometry.size();
1899 size_t current_size = final_geometry.size();
1900
1901 // If changes are very small (<5% of tree), try targeted insertion
1902 if (current_size > 0 && !bvh_nodes.empty() && (float(total_changes) / float(current_size)) < 0.05f) {
1903 // For very small changes, targeted insertion can be beneficial
1904 bool insertion_successful = true;
1905
1906 // Remove primitives from primitive_indices
1907 if (!removed_geometry.empty()) {
1908 primitive_indices.erase(std::remove_if(primitive_indices.begin(), primitive_indices.end(), [&removed_geometry](uint uuid) { return removed_geometry.find(uuid) != removed_geometry.end(); }), primitive_indices.end());
1909 }
1910
1911 // Add new primitives to primitive_indices
1912 for (uint uuid: added_geometry) {
1913 primitive_indices.push_back(uuid);
1914 }
1915
1916 // For small changes, rebuild only affected subtrees by invalidating nodes
1917 // This is more efficient than full rebuild for tiny changes
1918 if (insertion_successful && total_changes < 50) {
1919 if (printmessages) {
1920 std::cout << "Using targeted tree update for " << total_changes << " changes" << std::endl;
1921 }
1922
1923 // Mark BVH as needing rebalance but keep existing structure where possible
1924 // For now, we do a fast rebuild since true incremental tree rebalancing
1925 // requires complex algorithms that may not be worth the complexity
1926 optimizedRebuildBVH(final_geometry);
1927 return;
1928 }
1929 }
1930
1931 // Fall back to optimized full rebuild using cached AABBs
1932 std::vector<uint> final_primitives(final_geometry.begin(), final_geometry.end());
1933 buildBVH(final_primitives);
1934
1935 // Update tracking
1936 last_bvh_geometry = final_geometry;
1937 bvh_dirty = false;
1938 soa_dirty = true; // SoA needs rebuild after BVH change
1939}
1940
1941bool CollisionDetection::validateUUIDs(const std::vector<uint> &UUIDs) const {
1943 warnings.setEnabled(printmessages);
1944
1945 bool all_valid = true;
1946 for (uint UUID: UUIDs) {
1947 if (!context->doesPrimitiveExist(UUID)) {
1948 warnings.addWarning("primitive_uuid_not_exist", "Primitive UUID " + std::to_string(UUID) + " does not exist - skipping");
1949 all_valid = false;
1950 }
1951 }
1952
1953 warnings.report(std::cerr);
1954 return all_valid;
1955}
1956
1957bool CollisionDetection::rayPrimitiveIntersection(const vec3 &origin, const vec3 &direction, uint primitive_UUID, float &distance) const {
1958 // Check if primitive exists first
1959 if (!context->doesPrimitiveExist(primitive_UUID)) {
1960 return false;
1961 }
1962
1963 try {
1964 // Get primitive type and vertices
1965 PrimitiveType type = context->getPrimitiveType(primitive_UUID);
1966 std::vector<vec3> vertices = context->getPrimitiveVertices(primitive_UUID);
1967
1968 if (vertices.empty()) {
1969 return false;
1970 }
1971
1972
1973 bool hit = false;
1974 float min_distance = std::numeric_limits<float>::max();
1975
1976 if (type == PRIMITIVE_TYPE_TRIANGLE) {
1977 // Triangle intersection using radiation model algorithm (proven to work)
1978 if (vertices.size() >= 3) {
1979 const vec3 &v0 = vertices[0];
1980 const vec3 &v1 = vertices[1];
1981 const vec3 &v2 = vertices[2];
1982
1983 // Use the same algorithm as radiation model's triangle_intersect
1984 float a = v0.x - v1.x, b = v0.x - v2.x, c = direction.x, d = v0.x - origin.x;
1985 float e = v0.y - v1.y, f = v0.y - v2.y, g = direction.y, h = v0.y - origin.y;
1986 float i = v0.z - v1.z, j = v0.z - v2.z, k = direction.z, l = v0.z - origin.z;
1987
1988 float m = f * k - g * j, n = h * k - g * l, p = f * l - h * j;
1989 float q = g * i - e * k, s = e * j - f * i;
1990
1991 float denom = a * m + b * q + c * s;
1992 if (std::abs(denom) < 1e-8f) {
1993 return false; // Ray is parallel to triangle
1994 }
1995
1996 float inv_denom = 1.0f / denom;
1997
1998 float e1 = d * m - b * n - c * p;
1999 float beta = e1 * inv_denom;
2000
2001 if (beta >= 0.0f) {
2002 float r = e * l - h * i;
2003 float e2 = a * n + d * q + c * r;
2004 float gamma = e2 * inv_denom;
2005
2006 if (gamma >= 0.0f && beta + gamma <= 1.0f) {
2007 float e3 = a * p - b * r + d * s;
2008 float t = e3 * inv_denom;
2009
2010 if (t > 1e-8f && t < min_distance) {
2011 min_distance = t;
2012 hit = true;
2013 }
2014 }
2015 }
2016 }
2017 } else if (type == PRIMITIVE_TYPE_PATCH) {
2018 // Patch (quadrilateral) intersection using radiation model algorithm
2019 if (vertices.size() >= 4) {
2020 const vec3 &v0 = vertices[0];
2021 const vec3 &v1 = vertices[1];
2022 const vec3 &v2 = vertices[2];
2023 const vec3 &v3 = vertices[3];
2024
2025 // Calculate patch vectors and normal (same as radiation model)
2026 vec3 anchor = v0;
2027 vec3 normal = cross(v1 - v0, v2 - v0);
2028 normal.normalize();
2029
2030 vec3 a = v1 - v0; // First edge vector
2031 vec3 b = v3 - v0; // Second edge vector
2032
2033 // Ray-plane intersection
2034 float denom = direction * normal;
2035 if (std::abs(denom) > 1e-8f) { // Not parallel to plane
2036 float t = (anchor - origin) * normal / denom;
2037
2038 if (t > 1e-8f && t < 1e8f) { // Valid intersection distance
2039 // Find intersection point
2040 vec3 p = origin + direction * t;
2041 vec3 d = p - anchor;
2042
2043 // Project onto patch coordinate system
2044 float ddota = d * a;
2045 float ddotb = d * b;
2046
2047 // Check if point is within patch bounds
2048 if (ddota >= 0.0f && ddota <= (a * a) && ddotb >= 0.0f && ddotb <= (b * b)) {
2049
2050 if (t < min_distance) {
2051 min_distance = t;
2052 hit = true;
2053 }
2054 }
2055 }
2056 }
2057 }
2058 } else if (type == PRIMITIVE_TYPE_VOXEL) {
2059 // Voxel (AABB) intersection using slab method
2060 if (vertices.size() == 8) {
2061 // Calculate AABB from 8 vertices
2062 vec3 aabb_min = vertices[0];
2063 vec3 aabb_max = vertices[0];
2064
2065 for (int i = 1; i < 8; i++) {
2066 aabb_min.x = std::min(aabb_min.x, vertices[i].x);
2067 aabb_min.y = std::min(aabb_min.y, vertices[i].y);
2068 aabb_min.z = std::min(aabb_min.z, vertices[i].z);
2069 aabb_max.x = std::max(aabb_max.x, vertices[i].x);
2070 aabb_max.y = std::max(aabb_max.y, vertices[i].y);
2071 aabb_max.z = std::max(aabb_max.z, vertices[i].z);
2072 }
2073
2074 // Ray-AABB intersection using slab method
2075 float t_near = -std::numeric_limits<float>::max();
2076 float t_far = std::numeric_limits<float>::max();
2077
2078 // Check intersection with each slab (X, Y, Z)
2079 for (int i = 0; i < 3; i++) {
2080 float ray_dir_component = (i == 0) ? direction.x : (i == 1) ? direction.y : direction.z;
2081 float ray_orig_component = (i == 0) ? origin.x : (i == 1) ? origin.y : origin.z;
2082 float aabb_min_component = (i == 0) ? aabb_min.x : (i == 1) ? aabb_min.y : aabb_min.z;
2083 float aabb_max_component = (i == 0) ? aabb_max.x : (i == 1) ? aabb_max.y : aabb_max.z;
2084
2085 if (std::abs(ray_dir_component) < 1e-8f) {
2086 // Ray is parallel to slab
2087 if (ray_orig_component < aabb_min_component || ray_orig_component > aabb_max_component) {
2088 return false; // Ray is outside slab and parallel - no intersection
2089 }
2090 } else {
2091 // Calculate intersection distances for this slab
2092 float t1 = (aabb_min_component - ray_orig_component) / ray_dir_component;
2093 float t2 = (aabb_max_component - ray_orig_component) / ray_dir_component;
2094
2095 // Ensure t1 <= t2
2096 if (t1 > t2) {
2097 std::swap(t1, t2);
2098 }
2099
2100 // Update near and far intersection distances
2101 t_near = std::max(t_near, t1);
2102 t_far = std::min(t_far, t2);
2103
2104 // Early exit if no intersection possible
2105 if (t_near > t_far) {
2106 return false;
2107 }
2108 }
2109 }
2110
2111 // Check if intersection is in front of ray origin
2112 if (t_far >= 0.0f && t_near < min_distance) {
2113 // Use t_near if it's positive (ray starts outside box), otherwise t_far (ray starts inside box)
2114 float intersection_distance = (t_near >= 1e-8f) ? t_near : t_far;
2115 if (intersection_distance >= 1e-8f) {
2116 min_distance = intersection_distance;
2117 hit = true;
2118 }
2119 }
2120 }
2121 }
2122
2123 if (hit) {
2124 distance = min_distance;
2125 return true;
2126 }
2127
2128 return false;
2129 } catch (const std::exception &e) {
2130 // Primitive no longer exists or can't be accessed
2131 return false;
2132 }
2133}
2134
2135void CollisionDetection::calculateGridIntersection(const vec3 &grid_center, const vec3 &grid_size, const helios::int3 &grid_divisions, const std::vector<uint> &UUIDs) {
2136 // Use slicePrimitivesUsingGrid to populate grid_cells
2137 std::vector<uint> uuids_to_process = UUIDs.empty() ? context->getAllUUIDs() : UUIDs;
2138
2139 // Filter to only non-voxel primitives
2140 std::vector<uint> planar_primitives;
2141 for (uint uuid: uuids_to_process) {
2142 if (context->getPrimitiveType(uuid) != PRIMITIVE_TYPE_VOXEL) {
2143 planar_primitives.push_back(uuid);
2144 }
2145 }
2146
2147 // This will populate grid_cells
2148 slicePrimitivesUsingGrid(planar_primitives, grid_center, grid_size, grid_divisions);
2149}
2150
2151std::vector<std::vector<std::vector<std::vector<uint>>>> CollisionDetection::getGridCells() {
2152 return grid_cells;
2153}
2154
2155std::vector<uint> CollisionDetection::getGridIntersections(int i, int j, int k) {
2156 if (i < 0 || i >= static_cast<int>(grid_cells.size()) || j < 0 || j >= static_cast<int>(grid_cells[i].size()) || k < 0 || k >= static_cast<int>(grid_cells[i][j].size())) {
2157 helios_runtime_error("ERROR (CollisionDetection::getGridIntersections): Grid indices out of bounds");
2158 }
2159 return grid_cells[i][j][k];
2160}
2161
2162int CollisionDetection::optimizeLayout(const std::vector<uint> &UUIDs, float learning_rate, int max_iterations) {
2163 if (printmessages) {
2164 std::cerr << "WARNING: optimizeLayout not yet implemented" << std::endl;
2165 }
2166 return 0;
2167}
2168
2169int CollisionDetection::countRayIntersections(const vec3 &origin, const vec3 &direction, float max_distance) {
2170
2171 int intersection_count = 0;
2172
2173 if (bvh_nodes.empty()) {
2174 return intersection_count;
2175 }
2176
2177 // OPTIMIZATION: Minimum distance threshold to avoid self-intersection with nearby geometry
2178 // This prevents plant's own geometry (shoot tips, etc.) from occluding the entire cone view
2179 float min_distance = 0.05f; // 5cm minimum distance - ignore intersections closer than this
2180
2181 // Ensure the BVH is current before traversal
2182 const_cast<CollisionDetection *>(this)->ensureBVHCurrent();
2183
2184 // Stack-based traversal to avoid recursion
2185 std::vector<uint> node_stack;
2186 node_stack.push_back(0); // Start with root node
2187
2188 while (!node_stack.empty()) {
2189 uint node_idx = node_stack.back();
2190 node_stack.pop_back();
2191
2192 if (node_idx >= bvh_nodes.size())
2193 continue;
2194
2195 const BVHNode &node = bvh_nodes[node_idx];
2196
2197 // Test if ray intersects node AABB
2198 float t_min, t_max;
2199 if (!rayAABBIntersect(origin, direction, node.aabb_min, node.aabb_max, t_min, t_max)) {
2200 continue;
2201 }
2202
2203 // Check if intersection is within distance range (both min and max)
2204 if (t_max < min_distance) {
2205 continue; // Entire AABB is too close - skip
2206 }
2207 if (max_distance > 0.0f && t_min > max_distance) {
2208 continue; // Entire AABB is too far - skip
2209 }
2210
2211 if (node.is_leaf) {
2212 // Check each primitive in this leaf for ray intersection
2213 for (uint i = 0; i < node.primitive_count; i++) {
2214 uint primitive_id = primitive_indices[node.primitive_start + i];
2215
2216 // Get this primitive's AABB
2217 if (!context->doesPrimitiveExist(primitive_id)) {
2218 continue; // Skip invalid primitive
2219 }
2220 vec3 prim_min, prim_max;
2221 context->getPrimitiveBoundingBox(primitive_id, prim_min, prim_max);
2222
2223 // Test ray against primitive AABB
2224 float prim_t_min, prim_t_max;
2225 if (rayAABBIntersect(origin, direction, prim_min, prim_max, prim_t_min, prim_t_max)) {
2226 // Check distance constraints (both min and max)
2227 bool within_min_distance = prim_t_min >= min_distance;
2228 bool within_max_distance = (max_distance <= 0.0f) || (prim_t_min <= max_distance);
2229
2230 if (within_min_distance && within_max_distance) {
2231 intersection_count++;
2232 }
2233 }
2234 }
2235 } else {
2236 // Add child nodes to stack for further traversal
2237 if (node.left_child != 0xFFFFFFFF) {
2238 node_stack.push_back(node.left_child);
2239 }
2240 if (node.right_child != 0xFFFFFFFF) {
2241 node_stack.push_back(node.right_child);
2242 }
2243 }
2244 }
2245
2246 return intersection_count;
2247}
2248
2249bool CollisionDetection::findNearestRayIntersection(const vec3 &origin, const vec3 &direction, const std::set<uint> &candidate_UUIDs, float &nearest_distance, float max_distance) {
2250
2251 nearest_distance = std::numeric_limits<float>::max();
2252 bool found_intersection = false;
2253
2254 // Check if we need to traverse both static and dynamic BVHs
2255 bool check_static_bvh = hierarchical_bvh_enabled && static_bvh_valid && !static_bvh_nodes.empty();
2256 bool check_dynamic_bvh = !bvh_nodes.empty();
2257
2258 if (!check_static_bvh && !check_dynamic_bvh) {
2259 return false;
2260 }
2261
2262
2263 // Ensure the BVH is current before traversal
2264 const_cast<CollisionDetection *>(this)->ensureBVHCurrent();
2265
2266 // Lambda function to traverse a BVH and find ray intersections
2267 auto traverseBVH = [&](const std::vector<BVHNode> &nodes, const std::vector<uint> &primitives, const char *bvh_name) {
2268 if (nodes.empty())
2269 return;
2270
2271 // Stack-based traversal to avoid recursion
2272 std::vector<uint> node_stack;
2273 node_stack.push_back(0); // Start with root node
2274
2275 while (!node_stack.empty()) {
2276 uint node_idx = node_stack.back();
2277 node_stack.pop_back();
2278
2279 if (node_idx >= nodes.size()) {
2280 continue;
2281 }
2282
2283 const BVHNode &node = nodes[node_idx];
2284
2285 // Test if ray intersects node AABB
2286 float t_min, t_max;
2287 if (!rayAABBIntersect(origin, direction, node.aabb_min, node.aabb_max, t_min, t_max)) {
2288 continue;
2289 }
2290
2291 // Check if intersection is within distance range
2292 if (max_distance > 0.0f && t_min > max_distance) {
2293 continue; // Entire AABB is too far - skip
2294 }
2295
2296 // If we've already found a closer intersection than this AABB, skip it
2297 if (t_min > nearest_distance) {
2298 continue;
2299 }
2300
2301 if (node.is_leaf) {
2302 // Check each primitive in this leaf for ray intersection
2303 for (uint i = 0; i < node.primitive_count; i++) {
2304 uint primitive_id = primitives[node.primitive_start + i];
2305
2306
2307 // Skip if this primitive is not in the candidate set (unless candidate set is empty)
2308 if (!candidate_UUIDs.empty() && candidate_UUIDs.find(primitive_id) == candidate_UUIDs.end()) {
2309 continue;
2310 }
2311
2312
2313 // Get this primitive's AABB
2314 if (!context->doesPrimitiveExist(primitive_id)) {
2315 continue; // Skip invalid primitive
2316 }
2317
2318 vec3 prim_min, prim_max;
2319 context->getPrimitiveBoundingBox(primitive_id, prim_min, prim_max);
2320
2321 // Test ray against primitive AABB
2322 float prim_t_min, prim_t_max;
2323 if (rayAABBIntersect(origin, direction, prim_min, prim_max, prim_t_min, prim_t_max)) {
2324 // Check distance constraints
2325 bool within_max_distance = (max_distance <= 0.0f) || (prim_t_min <= max_distance);
2326
2327 if (within_max_distance && prim_t_min > 0.0f && prim_t_min < nearest_distance) {
2328 // For now, we use AABB intersection distance as an approximation
2329 // A more accurate implementation would perform exact ray-primitive intersection
2330 nearest_distance = prim_t_min;
2331 found_intersection = true;
2332 }
2333 }
2334 }
2335 } else {
2336 // Add child nodes to stack for further traversal
2337 if (node.left_child != 0xFFFFFFFF) {
2338 node_stack.push_back(node.left_child);
2339 }
2340 if (node.right_child != 0xFFFFFFFF) {
2341 node_stack.push_back(node.right_child);
2342 }
2343 }
2344 } // End of while loop
2345 }; // End of lambda
2346
2347 // First, traverse the static BVH if hierarchical BVH is enabled
2348 if (check_static_bvh) {
2349 traverseBVH(static_bvh_nodes, static_bvh_primitives, "static");
2350 }
2351
2352 // Then, traverse the dynamic BVH
2353 if (check_dynamic_bvh) {
2354 traverseBVH(bvh_nodes, primitive_indices, "dynamic");
2355 }
2356
2357 return found_intersection;
2358}
2359
2360bool CollisionDetection::findNearestPrimitiveDistance(const vec3 &origin, const vec3 &direction, const std::vector<uint> &candidate_UUIDs, float &distance, vec3 &obstacle_direction) {
2361
2363 warnings.setEnabled(printmessages);
2364
2365 if (candidate_UUIDs.empty()) {
2366 warnings.addWarning("no_candidate_uuids", "No candidate UUIDs provided");
2367 warnings.report(std::cerr);
2368 return false;
2369 }
2370
2371 // Validate that direction is normalized
2372 float dir_magnitude = direction.magnitude();
2373 if (std::abs(dir_magnitude - 1.0f) > 1e-6f) {
2374 warnings.addWarning("direction_not_normalized", "Direction vector is not normalized (magnitude = " + std::to_string(dir_magnitude) + ")");
2375 warnings.report(std::cerr);
2376 return false;
2377 }
2378
2379
2380 // Filter out invalid UUIDs
2381 std::vector<uint> valid_candidates;
2382 for (uint uuid: candidate_UUIDs) {
2383 if (context->doesPrimitiveExist(uuid)) {
2384 valid_candidates.push_back(uuid);
2385 } else {
2386 warnings.addWarning("invalid_candidate_uuid", "Skipping invalid UUID " + std::to_string(uuid));
2387 }
2388 }
2389
2390
2391 if (valid_candidates.empty()) {
2392 warnings.addWarning("no_valid_candidates", "No valid candidate UUIDs after filtering");
2393 warnings.report(std::cerr);
2394 return false;
2395 }
2396
2397 float nearest_distance_found = std::numeric_limits<float>::max();
2398 vec3 nearest_obstacle_direction;
2399 bool found_forward_surface = false;
2400
2401 // Check each candidate primitive to find the nearest "forward-facing" surface
2402 for (uint primitive_id: valid_candidates) {
2403 // Get primitive normal and a point on the surface using Context methods
2404 vec3 surface_normal = context->getPrimitiveNormal(primitive_id);
2405 std::vector<vec3> vertices = context->getPrimitiveVertices(primitive_id);
2406
2407 if (vertices.empty()) {
2408 continue; // Skip if no vertices
2409 }
2410
2411 // Use first vertex as a point on the plane
2412 vec3 point_on_plane = vertices[0];
2413
2414 // Calculate distance from origin to the plane
2415 vec3 to_origin = origin - point_on_plane;
2416 float distance_to_plane = to_origin * surface_normal;
2417
2418 // Distance is the absolute value
2419 float surface_distance = std::abs(distance_to_plane);
2420
2421 // The direction from origin to closest point on surface
2422 vec3 surface_direction;
2423 if (distance_to_plane > 0) {
2424 // Origin is on the positive side of the normal - direction to surface is -normal
2425 surface_direction = -surface_normal;
2426 } else {
2427 // Origin is on the negative side of the normal - direction to surface is +normal
2428 surface_direction = surface_normal;
2429 }
2430
2431 // Check if this surface is "in front" using dot product
2432 float dot_product = surface_direction * direction;
2433
2434 if (dot_product > 0.0f) { // Surface is in front of origin
2435 if (surface_distance < nearest_distance_found) {
2436 nearest_distance_found = surface_distance;
2437 nearest_obstacle_direction = surface_direction;
2438 found_forward_surface = true;
2439 }
2440 }
2441 }
2442
2443 if (found_forward_surface) {
2444 distance = nearest_distance_found;
2445 obstacle_direction = nearest_obstacle_direction;
2446 warnings.report(std::cerr);
2447 return true;
2448 }
2449
2450 warnings.report(std::cerr);
2451 return false;
2452}
2453
2454bool CollisionDetection::findNearestSolidObstacleInCone(const vec3 &apex, const vec3 &axis, float half_angle, float height, const std::vector<uint> &candidate_UUIDs, float &distance, vec3 &obstacle_direction, int num_rays) {
2455
2457 warnings.setEnabled(printmessages);
2458
2459 // OPTIMIZATION: Use per-tree BVH if enabled for better scaling
2460 std::vector<uint> effective_candidates;
2461 if (tree_based_bvh_enabled) {
2462 // Get tree-relevant geometry - use proportional distance for spatial filtering
2463 float spatial_filter_distance = height * 1.25f; // 25% buffer beyond cone height
2464 effective_candidates = getRelevantGeometryForTree(apex, candidate_UUIDs, spatial_filter_distance);
2465
2466 } else {
2467 effective_candidates = candidate_UUIDs;
2468 }
2469
2470 if (effective_candidates.empty()) {
2471 return false; // No obstacles within detection range - normal for outer branches
2472 }
2473
2474 // Validate input parameters
2475 if (half_angle <= 0.0f || half_angle > M_PI / 2.0f) {
2476 warnings.addWarning("invalid_half_angle", "Invalid half_angle " + std::to_string(half_angle));
2477 warnings.report(std::cerr);
2478 return false;
2479 }
2480
2481 if (height <= 0.0f) {
2482 warnings.addWarning("invalid_height", "Invalid height " + std::to_string(height));
2483 warnings.report(std::cerr);
2484 return false;
2485 }
2486
2487 // Ensure BVH is current
2488 ensureBVHCurrent();
2489
2490 // Check if BVH is empty
2491 if (bvh_nodes.empty()) {
2492 return false; // No geometry to collide with
2493 }
2494
2495 // Convert effective candidate UUIDs to set for efficient lookup
2496 std::set<uint> candidate_set(effective_candidates.begin(), effective_candidates.end());
2497
2498 // Generate ray directions within the cone
2499 std::vector<vec3> ray_directions = sampleDirectionsInCone(apex, axis, half_angle, num_rays);
2500
2501 float nearest_distance = std::numeric_limits<float>::max();
2502 vec3 nearest_direction;
2503 bool found_obstacle = false;
2504
2505 // Use modern batch ray-casting for better performance
2506 std::vector<RayQuery> ray_queries;
2507 ray_queries.reserve(ray_directions.size());
2508
2509 for (const vec3 &ray_dir: ray_directions) {
2510 ray_queries.emplace_back(apex, ray_dir, height, candidate_UUIDs);
2511 }
2512
2513 // Cast all rays in batch - automatically selects CPU/GPU based on count
2514 RayTracingStats ray_stats;
2515 std::vector<HitResult> hit_results = castRays(ray_queries, &ray_stats);
2516
2517 // Find the nearest obstacle from all ray results
2518 for (size_t i = 0; i < hit_results.size(); ++i) {
2519 const HitResult &result = hit_results[i];
2520
2521 if (result.hit && result.distance < nearest_distance) {
2522 nearest_distance = result.distance;
2523 nearest_direction = ray_directions[i];
2524 found_obstacle = true;
2525 }
2526 }
2527
2528 if (found_obstacle) {
2529 distance = nearest_distance;
2530 obstacle_direction = nearest_direction;
2531 warnings.report(std::cerr);
2532 return true;
2533 }
2534
2535 warnings.report(std::cerr);
2536 return false;
2537}
2538
2539bool CollisionDetection::findNearestSolidObstacleInCone(const vec3 &apex, const vec3 &axis, float half_angle, float height, const std::vector<uint> &candidate_UUIDs, const std::vector<uint> &plant_primitives, float &distance,
2540 vec3 &obstacle_direction, int num_rays) {
2541
2543 warnings.setEnabled(printmessages);
2544
2545 // OPTIMIZATION: Use per-tree BVH with plant primitive identification for better scaling
2546 std::vector<uint> effective_candidates;
2547 if (tree_based_bvh_enabled) {
2548 // Get tree-relevant geometry using plant primitives to identify the querying tree
2549 effective_candidates = getRelevantGeometryForTree(apex, plant_primitives, height);
2550
2551 if (printmessages && !effective_candidates.empty()) {
2552 std::cout << "Per-tree findNearestSolidObstacleInCone: Using " << effective_candidates.size() << " relevant targets instead of " << candidate_UUIDs.size() << " total targets" << std::endl;
2553 }
2554 } else {
2555 effective_candidates = candidate_UUIDs;
2556 }
2557
2558 if (effective_candidates.empty()) {
2559 return false; // No obstacles within detection range - normal for outer branches
2560 }
2561
2562 // Validate input parameters
2563 if (half_angle <= 0.0f || half_angle > M_PI / 2.0f) {
2564 warnings.addWarning("invalid_half_angle", "Invalid half_angle " + std::to_string(half_angle));
2565 warnings.report(std::cerr);
2566 return false;
2567 }
2568
2569 if (height <= 0.0f) {
2570 warnings.addWarning("invalid_height", "Invalid height " + std::to_string(height));
2571 warnings.report(std::cerr);
2572 return false;
2573 }
2574
2575 // Ensure BVH is current
2576 ensureBVHCurrent();
2577
2578 // Check if BVH is empty
2579 if (bvh_nodes.empty()) {
2580 return false; // No geometry to collide with
2581 }
2582
2583 // Convert effective candidate UUIDs to set for efficient lookup
2584 std::set<uint> candidate_set(effective_candidates.begin(), effective_candidates.end());
2585
2586 // Generate ray directions within the cone
2587 std::vector<vec3> ray_directions = sampleDirectionsInCone(apex, axis, half_angle, num_rays);
2588
2589 float nearest_distance = std::numeric_limits<float>::max();
2590 vec3 nearest_direction;
2591 bool found_obstacle = false;
2592
2593 // Use modern batch ray-casting for better performance
2594 std::vector<RayQuery> ray_queries;
2595 ray_queries.reserve(ray_directions.size());
2596
2597 for (const vec3 &ray_dir: ray_directions) {
2598 ray_queries.emplace_back(apex, ray_dir, height, effective_candidates);
2599 }
2600
2601 // Cast all rays in batch - automatically selects CPU/GPU based on count
2602 RayTracingStats ray_stats;
2603 std::vector<HitResult> hit_results = castRays(ray_queries, &ray_stats);
2604
2605 // Find the nearest obstacle from all ray results
2606 for (size_t i = 0; i < hit_results.size(); ++i) {
2607 const HitResult &result = hit_results[i];
2608
2609 if (result.hit && result.distance < nearest_distance) {
2610 nearest_distance = result.distance;
2611 nearest_direction = ray_directions[i];
2612 found_obstacle = true;
2613 }
2614 }
2615
2616 if (found_obstacle) {
2617 distance = nearest_distance;
2618 obstacle_direction = nearest_direction;
2619 warnings.report(std::cerr);
2620 return true;
2621 }
2622
2623 warnings.report(std::cerr);
2624 return false;
2625}
2626
2627
2628std::vector<helios::vec3> CollisionDetection::sampleDirectionsInCone(const vec3 &apex, const vec3 &central_axis, float half_angle, int num_samples) {
2629
2630 std::vector<vec3> directions;
2631 directions.reserve(num_samples);
2632
2633 if (num_samples <= 0 || half_angle <= 0.0f) {
2634 return directions;
2635 }
2636
2637 // Normalize the central axis
2638 vec3 axis = central_axis;
2639 axis.normalize();
2640
2641 // Create an orthonormal basis with the central axis as the primary axis
2642 vec3 u, v;
2643 if (std::abs(axis.z) < 0.9f) {
2644 u = cross(axis, make_vec3(0, 0, 1));
2645 } else {
2646 u = cross(axis, make_vec3(1, 0, 0));
2647 }
2648 u.normalize();
2649 v = cross(axis, u);
2650 v.normalize();
2651
2652 // Generate uniform samples within the cone using rejection sampling on hemisphere
2653 std::random_device rd;
2654 std::mt19937 gen(rd());
2655 std::uniform_real_distribution<float> uniform_dist(0.0f, 1.0f);
2656
2657 int samples_generated = 0;
2658 int max_attempts = num_samples * 10; // Limit attempts to prevent infinite loops
2659 int attempts = 0;
2660
2661 while (samples_generated < num_samples && attempts < max_attempts) {
2662 attempts++;
2663
2664 // Generate uniform sample on unit hemisphere using spherical coordinates
2665 float u1 = uniform_dist(gen);
2666 float u2 = uniform_dist(gen);
2667
2668 // Use stratified sampling for better distribution
2669 if (samples_generated > 0) {
2670 float stratum_u1 = (float) samples_generated / (float) num_samples;
2671 float stratum_u2 = uniform_dist(gen);
2672 u1 = (stratum_u1 + u1 / (float) num_samples);
2673 if (u1 > 1.0f)
2674 u1 -= 1.0f;
2675 }
2676
2677 // Convert to spherical coordinates
2678 // For uniform sampling within cone, we need:
2679 // cos(theta) uniformly distributed between cos(half_angle) and 1
2680 float cos_half_angle = cosf(half_angle);
2681 float cos_theta = cos_half_angle + u1 * (1.0f - cos_half_angle);
2682 float sin_theta = sqrtf(1.0f - cos_theta * cos_theta);
2683 float phi = 2.0f * M_PI * u2;
2684
2685 // Convert to Cartesian coordinates in local coordinate system
2686 float x = sin_theta * cosf(phi);
2687 float y = sin_theta * sinf(phi);
2688 float z = cos_theta;
2689
2690 // Transform from local coordinates to world coordinates
2691 vec3 local_direction = make_vec3(x, y, z);
2692 vec3 world_direction = u * local_direction.x + v * local_direction.y + axis * local_direction.z;
2693 world_direction.normalize();
2694
2695 // Verify the direction is within the cone (numerical precision check)
2696 float dot_product = world_direction * axis;
2697 if (dot_product >= cos_half_angle - 1e-6f) {
2698 directions.push_back(world_direction);
2699 samples_generated++;
2700 }
2701 }
2702
2703 // If we couldn't generate enough samples, fill with the central axis
2704 while (directions.size() < (size_t) num_samples) {
2705 directions.push_back(axis);
2706 }
2707
2708 return directions;
2709}
2710
2711
2712// -------- SPATIAL GRID OPTIMIZATION --------
2713
2714std::vector<uint> CollisionDetection::getCandidatesUsingSpatialGrid(const Cone &cone, const vec3 &apex, const vec3 &central_axis, float half_angle, float height) {
2715 // Get cone AABB for grid cell determination
2716 vec3 cone_base = apex + central_axis * height;
2717 float cone_base_radius = height * tan(half_angle);
2718
2719 vec3 cone_aabb_min = vec3(std::min(apex.x, cone_base.x - cone_base_radius), std::min(apex.y, cone_base.y - cone_base_radius), std::min(apex.z, cone_base.z - cone_base_radius));
2720 vec3 cone_aabb_max = vec3(std::max(apex.x, cone_base.x + cone_base_radius), std::max(apex.y, cone_base.y + cone_base_radius), std::max(apex.z, cone_base.z + cone_base_radius));
2721
2722 // Use BVH primitive indices approach for better performance
2723 std::vector<uint> all_primitives;
2724 if (!primitive_indices.empty()) {
2725 all_primitives = primitive_indices; // Use BVH primitive indices
2726 } else {
2727 // Fallback: get all primitives from context (slower)
2728 all_primitives = context->getAllUUIDs();
2729 }
2730
2731 // Use existing filterPrimitivesParallel method with cone object
2732 return filterPrimitivesParallel(cone, all_primitives);
2733}
2734
2735// -------- HYBRID BVH + RASTERIZATION METHODS --------
2736
2737std::vector<uint> CollisionDetection::getCandidatePrimitivesInCone(const vec3 &apex, const vec3 &central_axis, float half_angle, float height) {
2738 std::vector<uint> candidates;
2739
2740 // Create cone object for filtering
2741 Cone cone{apex, central_axis, half_angle, height};
2742
2743 // OPTIMIZATION: Use spatial grid to avoid O(N) scaling with tree count
2744 candidates = getCandidatesUsingSpatialGrid(cone, apex, central_axis, half_angle, height);
2745
2746 return candidates;
2747}
2748
2749
2750// -------- GAP DETECTION IMPLEMENTATION --------
2751
2752std::vector<CollisionDetection::Gap> CollisionDetection::detectGapsInCone(const vec3 &apex, const vec3 &central_axis, float half_angle, float height, int num_samples) {
2753
2754 std::vector<Gap> gaps;
2755
2756 // Generate dense ray samples within the cone
2757 std::vector<vec3> sample_directions = sampleDirectionsInCone(apex, central_axis, half_angle, num_samples);
2758
2759 if (sample_directions.empty()) {
2760 return gaps;
2761 }
2762
2763 // Build ray sample data using modern ray-tracing API
2764 std::vector<RaySample> ray_samples;
2765 ray_samples.reserve(sample_directions.size());
2766
2767 float max_distance = (height > 0.0f) ? height : -1.0f;
2768
2769 // Use batch ray casting for better performance
2770 std::vector<RayQuery> ray_queries;
2771 ray_queries.reserve(sample_directions.size());
2772
2773 for (const vec3 &direction: sample_directions) {
2774 ray_queries.emplace_back(apex, direction, max_distance);
2775 }
2776
2777 // Cast all rays in batch - automatically selects CPU/GPU based on count
2778 RayTracingStats ray_stats;
2779 std::vector<HitResult> hit_results = castRays(ray_queries, &ray_stats);
2780
2781 // Process results to build ray samples
2782 for (size_t i = 0; i < hit_results.size(); ++i) {
2783 RaySample sample;
2784 sample.direction = sample_directions[i];
2785
2786 if (hit_results[i].hit) {
2787 sample.distance = hit_results[i].distance;
2788 sample.is_free = false;
2789 } else {
2790 sample.distance = (max_distance > 0.0f) ? max_distance : 1000.0f;
2791 sample.is_free = true;
2792 }
2793
2794 ray_samples.push_back(sample);
2795 }
2796
2797
2798 // Use a more sophisticated gap detection approach based on contiguous free regions
2799 // First, sort samples by angular position relative to central axis to identify contiguous regions
2800 std::vector<std::pair<float, size_t>> angular_positions;
2801 for (size_t i = 0; i < ray_samples.size(); ++i) {
2802 if (ray_samples[i].is_free) {
2803 // Calculate angular position in cone-relative coordinates
2804 float dot_product = ray_samples[i].direction * central_axis;
2805 dot_product = std::max(-1.0f, std::min(1.0f, dot_product));
2806 float angular_from_center = acosf(dot_product);
2807 angular_positions.push_back({angular_from_center, i});
2808 }
2809 }
2810
2811 if (angular_positions.empty()) {
2812 return gaps; // No free samples
2813 }
2814
2815 // Sort by angular position
2816 std::sort(angular_positions.begin(), angular_positions.end());
2817
2818 // Find contiguous free regions (gaps) with minimum size threshold
2819 std::vector<bool> processed(ray_samples.size(), false);
2820 float min_gap_angular_size = half_angle * 0.05f; // 5% of cone angle minimum gap size
2821
2822 for (size_t start = 0; start < angular_positions.size(); ++start) {
2823 size_t start_idx = angular_positions[start].second;
2824 if (processed[start_idx])
2825 continue;
2826
2827 Gap new_gap;
2828 new_gap.sample_indices.push_back(start_idx);
2829 processed[start_idx] = true;
2830
2831 // Extend gap by finding nearby free samples using k-nearest neighbor approach
2832 std::vector<float> distances_to_start;
2833 for (size_t j = 0; j < ray_samples.size(); ++j) {
2834 if (j != start_idx && ray_samples[j].is_free && !processed[j]) {
2835 float dot_product = ray_samples[start_idx].direction * ray_samples[j].direction;
2836 dot_product = std::max(-1.0f, std::min(1.0f, dot_product));
2837 float angular_distance = acosf(dot_product);
2838 distances_to_start.push_back(angular_distance);
2839 } else {
2840 distances_to_start.push_back(999.0f); // Large value for excluded samples
2841 }
2842 }
2843
2844 // Add nearby samples to gap using adaptive threshold
2845 float sample_density = 2.0f * half_angle / sqrtf((float) num_samples);
2846 float adaptive_threshold = sample_density * 3.0f; // 3x sample spacing for connection
2847
2848 for (size_t j = 0; j < ray_samples.size(); ++j) {
2849 if (j != start_idx && ray_samples[j].is_free && !processed[j] && distances_to_start[j] < adaptive_threshold) {
2850 new_gap.sample_indices.push_back(j);
2851 processed[j] = true;
2852 }
2853 }
2854
2855 // Only keep gaps that meet minimum size requirements
2856 if (new_gap.sample_indices.size() >= 5) { // Require at least 5 samples for a valid gap
2857 gaps.push_back(new_gap);
2858 }
2859 }
2860
2861 // Calculate gap properties
2862 for (Gap &gap: gaps) {
2863 // Calculate gap center direction (average of constituent directions)
2864 vec3 center(0, 0, 0);
2865 for (int idx: gap.sample_indices) {
2866 center = center + ray_samples[idx].direction;
2867 }
2868 center = center / (float) gap.sample_indices.size();
2869 center.normalize();
2870 gap.center_direction = center;
2871
2872 // Calculate angular size
2873 std::vector<RaySample> gap_samples;
2874 for (int idx: gap.sample_indices) {
2875 gap_samples.push_back(ray_samples[idx]);
2876 }
2877 gap.angular_size = calculateGapAngularSize(gap_samples, central_axis);
2878
2879 // Calculate angular distance from central axis
2880 float dot_product = gap.center_direction * central_axis;
2881 dot_product = std::max(-1.0f, std::min(1.0f, dot_product));
2882 gap.angular_distance = acosf(dot_product);
2883 }
2884
2885 // OPTIMIZATION: Spatial filtering - remove gaps that are too far from central axis
2886 // This reduces the number of gaps passed to the expensive scoring function
2887 if (gaps.size() > 10) {
2888 float max_angular_distance = half_angle * 0.8f; // Only consider gaps within 80% of cone angle
2889
2890 auto it = std::remove_if(gaps.begin(), gaps.end(), [max_angular_distance](const Gap &gap) { return gap.angular_distance > max_angular_distance; });
2891
2892 gaps.erase(it, gaps.end());
2893
2894 // If filtering removed too many gaps, keep at least the closest ones
2895 if (gaps.size() < 3 && gaps.size() > 0) {
2896 // Sort by angular distance and keep the closest ones
2897 std::partial_sort(gaps.begin(), gaps.begin() + std::min(size_t(3), gaps.size()), gaps.end(), [](const Gap &a, const Gap &b) { return a.angular_distance < b.angular_distance; });
2898 }
2899 }
2900
2901 return gaps;
2902}
2903
2904
2905float CollisionDetection::calculateGapAngularSize(const std::vector<RaySample> &gap_samples, const vec3 &central_axis) {
2906
2907 if (gap_samples.empty()) {
2908 return 0.0f;
2909 }
2910
2911 // Find the angular extent of the gap by finding min/max angles
2912 float min_angle = M_PI;
2913 float max_angle = 0.0f;
2914
2915 for (const RaySample &sample: gap_samples) {
2916 float dot_product = sample.direction * central_axis;
2917 dot_product = std::max(-1.0f, std::min(1.0f, dot_product));
2918 float angle = acosf(dot_product);
2919
2920 min_angle = std::min(min_angle, angle);
2921 max_angle = std::max(max_angle, angle);
2922 }
2923
2924 // Simple approximation: angular size as solid angle
2925 float angular_width = max_angle - min_angle;
2926
2927 // Convert to approximate solid angle (steradians)
2928 // This is a rough approximation: solid_angle ≈ π * (angular_width)^2
2929 float solid_angle = M_PI * angular_width * angular_width;
2930
2931 return solid_angle;
2932}
2933
2934void CollisionDetection::scoreGapsByFishEyeMetric(std::vector<Gap> &gaps, const vec3 &central_axis) {
2935
2936 // Early exit for small gap counts - full sort is fine
2937 if (gaps.size() <= 10) {
2938 for (Gap &gap: gaps) {
2939 // Fish-eye metric: prefer larger gaps closer to central axis
2940 // Gap size component (logarithmic scaling for larger gaps)
2941 float size_score = log(1.0f + gap.angular_size * 100.0f); // Scale up angular size
2942 // Distance penalty (exponential penalty for gaps far from center)
2943 float distance_penalty = exp(gap.angular_distance * 2.0f);
2944 // Combined score (higher is better)
2945 gap.score = size_score / distance_penalty;
2946 }
2947 // Sort gaps by score (highest first)
2948 std::sort(gaps.begin(), gaps.end(), [](const Gap &a, const Gap &b) { return a.score > b.score; });
2949 return;
2950 }
2951
2952 // OPTIMIZATION: For large gap counts, use partial sorting
2953 // We only need the top 3-5 gaps for collision avoidance
2954 const size_t max_gaps_needed = std::min(size_t(5), gaps.size());
2955
2956 // Calculate scores for all gaps
2957 for (Gap &gap: gaps) {
2958 // Fish-eye metric: prefer larger gaps closer to central axis
2959
2960 // Gap size component (logarithmic scaling for larger gaps)
2961 float size_score = log(1.0f + gap.angular_size * 100.0f); // Scale up angular size
2962
2963 // Distance penalty (exponential penalty for gaps far from center)
2964 float distance_penalty = exp(gap.angular_distance * 2.0f);
2965
2966 // Combined score (higher is better)
2967 gap.score = size_score / distance_penalty;
2968 }
2969
2970 // Use partial_sort to get only the top N gaps - O(n log k) instead of O(n log n)
2971 std::partial_sort(gaps.begin(), gaps.begin() + max_gaps_needed, gaps.end(), [](const Gap &a, const Gap &b) { return a.score > b.score; });
2972
2973 // Resize to keep only the top gaps to avoid processing unnecessary gaps later
2974 gaps.resize(max_gaps_needed);
2975}
2976
2977helios::vec3 CollisionDetection::findOptimalGapDirection(const std::vector<Gap> &gaps, const vec3 &central_axis) {
2978
2979 if (gaps.empty()) {
2980 // No gaps found, return central axis
2981 vec3 result = central_axis;
2982 result.normalize();
2983 return result;
2984 }
2985
2986 // Return direction toward the highest-scoring gap
2987 const Gap &best_gap = gaps[0];
2988 return best_gap.center_direction;
2989}
2990
2991// -------- SPATIAL OPTIMIZATION METHODS --------
2992
2993std::vector<std::pair<uint, uint>> CollisionDetection::findCollisionsWithinDistance(const std::vector<uint> &query_UUIDs, const std::vector<uint> &target_UUIDs, float max_distance) {
2994
2995 std::vector<std::pair<uint, uint>> collision_pairs;
2996
2997
2998 // Update primitive centroids cache for target geometry
2999 for (uint target_id: target_UUIDs) {
3000 if (primitive_centroids_cache.find(target_id) == primitive_centroids_cache.end()) {
3001 // Calculate and cache centroid for this primitive
3002 std::vector<vec3> vertices = context->getPrimitiveVertices(target_id);
3003 if (!vertices.empty()) {
3004 vec3 centroid = make_vec3(0, 0, 0);
3005 for (const vec3 &vertex: vertices) {
3006 centroid = centroid + vertex;
3007 }
3008 centroid = centroid / float(vertices.size());
3009 primitive_centroids_cache[target_id] = centroid;
3010 }
3011 }
3012 }
3013
3014 // For each query primitive, find nearby targets within distance
3015 for (uint query_id: query_UUIDs) {
3016 // Get query centroid
3017 std::vector<vec3> query_vertices = context->getPrimitiveVertices(query_id);
3018 if (query_vertices.empty())
3019 continue;
3020
3021 vec3 query_centroid = make_vec3(0, 0, 0);
3022 for (const vec3 &vertex: query_vertices) {
3023 query_centroid = query_centroid + vertex;
3024 }
3025 query_centroid = query_centroid / float(query_vertices.size());
3026
3027 // Check distance to each target
3028 for (uint target_id: target_UUIDs) {
3029 if (query_id == target_id)
3030 continue; // Skip self-collision
3031
3032 auto target_centroid_it = primitive_centroids_cache.find(target_id);
3033 if (target_centroid_it != primitive_centroids_cache.end()) {
3034 vec3 target_centroid = target_centroid_it->second;
3035 float distance = (query_centroid - target_centroid).magnitude();
3036
3037 if (distance <= max_distance) {
3038 // Within distance threshold, check for actual collision
3039 std::vector<uint> single_query = {query_id};
3040 std::vector<uint> single_target = {target_id};
3041 std::vector<uint> empty_objects;
3042
3043 std::vector<uint> collisions = findCollisions(single_query, empty_objects, single_target, empty_objects);
3044 if (!collisions.empty()) {
3045 collision_pairs.push_back(std::make_pair(query_id, target_id));
3046 }
3047 }
3048 }
3049 }
3050 }
3051
3052 return collision_pairs;
3053}
3054
3056 if (distance <= 0.0f) {
3057 helios_runtime_error("ERROR (CollisionDetection::setMaxCollisionDistance): Distance must be positive");
3058 }
3059
3060 max_collision_distance = distance;
3061}
3062
3064 return max_collision_distance;
3065}
3066
3067std::vector<uint> CollisionDetection::filterGeometryByDistance(const helios::vec3 &query_center, float max_radius, const std::vector<uint> &candidate_UUIDs) {
3068
3069 std::vector<uint> filtered_UUIDs;
3070
3071 // Get list of candidates (either provided or all primitives)
3072 std::vector<uint> candidates;
3073 if (candidate_UUIDs.empty()) {
3074 candidates = context->getAllUUIDs();
3075 } else {
3076 candidates = candidate_UUIDs;
3077 }
3078
3079 // Filter candidates by distance
3080 for (uint candidate_id: candidates) {
3081 // Skip if primitive doesn't exist
3082 if (!context->doesPrimitiveExist(candidate_id)) {
3083 continue;
3084 }
3085
3086 // Get primitive centroid (calculate if not cached)
3087 vec3 centroid;
3088 auto cache_it = primitive_centroids_cache.find(candidate_id);
3089 if (cache_it != primitive_centroids_cache.end()) {
3090 centroid = cache_it->second;
3091 } else {
3092 // Calculate and cache centroid
3093 std::vector<vec3> vertices = context->getPrimitiveVertices(candidate_id);
3094 if (vertices.empty())
3095 continue;
3096
3097 centroid = make_vec3(0, 0, 0);
3098 for (const vec3 &vertex: vertices) {
3099 centroid = centroid + vertex;
3100 }
3101 centroid = centroid / float(vertices.size());
3102 primitive_centroids_cache[candidate_id] = centroid;
3103 }
3104
3105 // Check distance from query center
3106 float distance = (query_center - centroid).magnitude();
3107 if (distance <= max_radius) {
3108 filtered_UUIDs.push_back(candidate_id);
3109 }
3110 }
3111
3112 return filtered_UUIDs;
3113}
3114
3115// -------- VOXEL RAY PATH LENGTH CALCULATIONS --------
3116
3117void CollisionDetection::calculateVoxelRayPathLengths(const vec3 &grid_center, const vec3 &grid_size, const helios::int3 &grid_divisions, const std::vector<vec3> &ray_origins, const std::vector<vec3> &ray_directions) {
3118
3120 warnings.setEnabled(printmessages);
3121
3122 if (ray_origins.size() != ray_directions.size()) {
3123 helios_runtime_error("ERROR (CollisionDetection::calculateVoxelRayPathLengths): ray_origins and ray_directions vectors must have same size");
3124 }
3125
3126 if (ray_origins.empty()) {
3127 warnings.addWarning("no_rays_provided", "No rays provided");
3128 warnings.report(std::cerr);
3129 return;
3130 }
3131
3132 // Initialize voxel data structures for the given grid
3133 initializeVoxelData(grid_center, grid_size, grid_divisions);
3134
3135 // Ensure BVH and primitive cache are built before parallel section
3136 // This prevents thread-safety issues when multiple threads try to build them
3137 ensureBVHCurrent();
3138 if (primitive_cache.empty()) {
3139 buildPrimitiveCache();
3140 }
3141
3142 // Choose GPU or CPU implementation based on acceleration setting
3143#ifdef HELIOS_CUDA_AVAILABLE
3145 // Try GPU first, but fall back to CPU if GPU fails (no hardware, out of memory, etc.)
3146 bool gpu_success = calculateVoxelRayPathLengths_GPU(ray_origins, ray_directions);
3147 if (!gpu_success) {
3148 // GPU failed - fall back to CPU and disable GPU for future calls
3149 if (printmessages) {
3150 warnings.addWarning("gpu_voxel_fallback", "GPU voxel calculation failed, falling back to CPU");
3151 }
3152 gpu_acceleration_enabled = false;
3153 calculateVoxelRayPathLengths_CPU(ray_origins, ray_directions);
3154 }
3155 } else {
3156 calculateVoxelRayPathLengths_CPU(ray_origins, ray_directions);
3157 }
3158#else
3159 calculateVoxelRayPathLengths_CPU(ray_origins, ray_directions);
3160#endif
3161
3162 warnings.report(std::cerr);
3163}
3164
3165void CollisionDetection::setVoxelTransmissionProbability(int P_denom, int P_trans, const helios::int3 &ijk) {
3166 if (!validateVoxelIndices(ijk)) {
3167 helios_runtime_error("ERROR (CollisionDetection::setVoxelTransmissionProbability): Invalid voxel indices");
3168 }
3169
3170 if (!voxel_data_initialized) {
3171 helios_runtime_error("ERROR (CollisionDetection::setVoxelTransmissionProbability): Voxel data not initialized. Call calculateVoxelRayPathLengths first.");
3172 }
3173
3174 if (use_flat_arrays) {
3175 size_t flat_idx = flatIndex(ijk);
3176 voxel_ray_counts_flat[flat_idx] = P_denom;
3177 voxel_transmitted_flat[flat_idx] = P_trans;
3178 } else {
3179 voxel_ray_counts[ijk.x][ijk.y][ijk.z] = P_denom;
3180 voxel_transmitted[ijk.x][ijk.y][ijk.z] = P_trans;
3181 }
3182}
3183
3184void CollisionDetection::getVoxelTransmissionProbability(const helios::int3 &ijk, int &P_denom, int &P_trans) const {
3185 if (!validateVoxelIndices(ijk)) {
3186 helios_runtime_error("ERROR (CollisionDetection::getVoxelTransmissionProbability): Invalid voxel indices");
3187 }
3188
3189 if (!voxel_data_initialized) {
3190 P_denom = 0;
3191 P_trans = 0;
3192 return;
3193 }
3194
3195 if (use_flat_arrays) {
3196 size_t flat_idx = flatIndex(ijk);
3197 P_denom = voxel_ray_counts_flat[flat_idx];
3198 P_trans = voxel_transmitted_flat[flat_idx];
3199 } else {
3200 P_denom = voxel_ray_counts[ijk.x][ijk.y][ijk.z];
3201 P_trans = voxel_transmitted[ijk.x][ijk.y][ijk.z];
3202 }
3203}
3204
3205void CollisionDetection::setVoxelRbar(float r_bar, const helios::int3 &ijk) {
3206 if (!validateVoxelIndices(ijk)) {
3207 helios_runtime_error("ERROR (CollisionDetection::setVoxelRbar): Invalid voxel indices");
3208 }
3209
3210 if (!voxel_data_initialized) {
3211 helios_runtime_error("ERROR (CollisionDetection::setVoxelRbar): Voxel data not initialized. Call calculateVoxelRayPathLengths first.");
3212 }
3213
3214 if (use_flat_arrays) {
3215 size_t flat_idx = flatIndex(ijk);
3216 // Store r_bar * ray_count so that getVoxelRbar returns the correct value when it divides
3217 int ray_count = voxel_ray_counts_flat[flat_idx];
3218 if (ray_count == 0) {
3219 ray_count = 1; // Set to 1 to avoid division by zero
3220 voxel_ray_counts_flat[flat_idx] = 1;
3221 }
3222 voxel_path_lengths_flat[flat_idx] = r_bar * static_cast<float>(ray_count);
3223 } else {
3224 // Store r_bar * ray_count so that getVoxelRbar returns the correct value when it divides
3225 int ray_count = voxel_ray_counts[ijk.x][ijk.y][ijk.z];
3226 if (ray_count == 0) {
3227 ray_count = 1; // Set to 1 to avoid division by zero
3228 voxel_ray_counts[ijk.x][ijk.y][ijk.z] = 1;
3229 }
3230 voxel_path_lengths[ijk.x][ijk.y][ijk.z] = r_bar * static_cast<float>(ray_count);
3231 }
3232}
3233
3235 if (!validateVoxelIndices(ijk)) {
3236 helios_runtime_error("ERROR (CollisionDetection::getVoxelRbar): Invalid voxel indices");
3237 }
3238
3239 if (!voxel_data_initialized) {
3240 return 0.0f;
3241 }
3242
3243 if (use_flat_arrays) {
3244 size_t flat_idx = flatIndex(ijk);
3245 int ray_count = voxel_ray_counts_flat[flat_idx];
3246 if (ray_count == 0) {
3247 return 0.0f;
3248 }
3249 // If this was set directly via setVoxelRbar, return it as-is
3250 // If this accumulated from ray calculations, compute the average
3251 return voxel_path_lengths_flat[flat_idx] / static_cast<float>(ray_count);
3252 } else {
3253 int ray_count = voxel_ray_counts[ijk.x][ijk.y][ijk.z];
3254 if (ray_count == 0) {
3255 return 0.0f;
3256 }
3257 // If this was set directly via setVoxelRbar, return it as-is
3258 // If this accumulated from ray calculations, compute the average
3259 return voxel_path_lengths[ijk.x][ijk.y][ijk.z] / static_cast<float>(ray_count);
3260 }
3261}
3262
3263void CollisionDetection::getVoxelRayHitCounts(const helios::int3 &ijk, int &hit_before, int &hit_after, int &hit_inside) const {
3264 if (!validateVoxelIndices(ijk)) {
3265 helios_runtime_error("ERROR (CollisionDetection::getVoxelRayHitCounts): Invalid voxel indices");
3266 }
3267
3268 if (!voxel_data_initialized) {
3269 hit_before = 0;
3270 hit_after = 0;
3271 hit_inside = 0;
3272 return;
3273 }
3274
3275 if (use_flat_arrays) {
3276 size_t flat_idx = flatIndex(ijk);
3277 hit_before = voxel_hit_before_flat[flat_idx];
3278 hit_after = voxel_hit_after_flat[flat_idx];
3279 hit_inside = voxel_hit_inside_flat[flat_idx];
3280 } else {
3281 hit_before = voxel_hit_before[ijk.x][ijk.y][ijk.z];
3282 hit_after = voxel_hit_after[ijk.x][ijk.y][ijk.z];
3283 hit_inside = voxel_hit_inside[ijk.x][ijk.y][ijk.z];
3284 }
3285}
3286
3287std::vector<float> CollisionDetection::getVoxelRayPathLengths(const helios::int3 &ijk) const {
3288 if (!validateVoxelIndices(ijk)) {
3289 helios_runtime_error("ERROR (CollisionDetection::getVoxelRayPathLengths): Invalid voxel indices");
3290 }
3291
3292 if (!voxel_data_initialized) {
3293 return std::vector<float>();
3294 }
3295
3296 if (use_flat_arrays) {
3297 // Use flat array structure with offsets
3298 size_t flat_idx = flatIndex(ijk);
3299
3300 // Bounds checking for flat array access
3301 if (flat_idx >= voxel_individual_path_offsets.size() || flat_idx >= voxel_individual_path_counts.size()) {
3302 return std::vector<float>();
3303 }
3304
3305 size_t offset = voxel_individual_path_offsets[flat_idx];
3306 size_t count = voxel_individual_path_counts[flat_idx];
3307
3308 // Additional bounds checking for the data array
3309 if (count == 0 || offset + count > voxel_individual_path_lengths_flat.size()) {
3310 return std::vector<float>();
3311 }
3312
3313 std::vector<float> result;
3314 result.reserve(count);
3315
3316 for (size_t i = 0; i < count; ++i) {
3317 result.push_back(voxel_individual_path_lengths_flat[offset + i]);
3318 }
3319
3320 return result;
3321 } else {
3322 return voxel_individual_path_lengths[ijk.x][ijk.y][ijk.z];
3323 }
3324}
3325
3327 voxel_ray_counts.clear();
3328 voxel_transmitted.clear();
3329 voxel_path_lengths.clear();
3330 voxel_hit_before.clear();
3331 voxel_hit_after.clear();
3332 voxel_hit_inside.clear();
3333 voxel_individual_path_lengths.clear();
3334 voxel_data_initialized = false;
3335
3336 if (printmessages) {
3337 std::cout << "Voxel data cleared." << std::endl;
3338 }
3339}
3340
3341// -------- VOXEL RAY PATH LENGTH HELPER METHODS --------
3342
3343void CollisionDetection::initializeVoxelData(const vec3 &grid_center, const vec3 &grid_size, const helios::int3 &grid_divisions) {
3344
3345 // Check if we need to reinitialize (grid parameters changed)
3346 bool need_reinit = !voxel_data_initialized || (grid_center - voxel_grid_center).magnitude() > 1e-6 || (grid_size - voxel_grid_size).magnitude() > 1e-6 || grid_divisions.x != voxel_grid_divisions.x || grid_divisions.y != voxel_grid_divisions.y ||
3347 grid_divisions.z != voxel_grid_divisions.z;
3348
3349 if (!need_reinit) {
3350 // Just clear existing data but keep structure
3351 if (use_flat_arrays) {
3352 // Clear flat arrays
3353 size_t total_voxels = static_cast<size_t>(grid_divisions.x) * grid_divisions.y * grid_divisions.z;
3354 std::fill(voxel_ray_counts_flat.begin(), voxel_ray_counts_flat.end(), 0);
3355 std::fill(voxel_transmitted_flat.begin(), voxel_transmitted_flat.end(), 0);
3356 std::fill(voxel_path_lengths_flat.begin(), voxel_path_lengths_flat.end(), 0.0f);
3357 std::fill(voxel_hit_before_flat.begin(), voxel_hit_before_flat.end(), 0);
3358 std::fill(voxel_hit_after_flat.begin(), voxel_hit_after_flat.end(), 0);
3359 std::fill(voxel_hit_inside_flat.begin(), voxel_hit_inside_flat.end(), 0);
3360
3361 // Clear individual path lengths
3362 voxel_individual_path_lengths_flat.clear();
3363 std::fill(voxel_individual_path_offsets.begin(), voxel_individual_path_offsets.end(), 0);
3364 std::fill(voxel_individual_path_counts.begin(), voxel_individual_path_counts.end(), 0);
3365 } else {
3366 // Clear nested vectors
3367 for (int i = 0; i < grid_divisions.x; i++) {
3368 for (int j = 0; j < grid_divisions.y; j++) {
3369 for (int k = 0; k < grid_divisions.z; k++) {
3370 voxel_ray_counts[i][j][k] = 0;
3371 voxel_transmitted[i][j][k] = 0;
3372 voxel_path_lengths[i][j][k] = 0.0f;
3373 voxel_hit_before[i][j][k] = 0;
3374 voxel_hit_after[i][j][k] = 0;
3375 voxel_hit_inside[i][j][k] = 0;
3376 voxel_individual_path_lengths[i][j][k].clear();
3377 }
3378 }
3379 }
3380 }
3381 return;
3382 }
3383
3384 // Store grid parameters
3385 voxel_grid_center = grid_center;
3386 voxel_grid_size = grid_size;
3387 voxel_grid_divisions = grid_divisions;
3388
3389 // Enable flat arrays for better performance
3390 use_flat_arrays = true;
3391
3392 if (use_flat_arrays) {
3393 // Initialize optimized flat arrays (Structure-of-Arrays)
3394 size_t total_voxels = static_cast<size_t>(grid_divisions.x) * grid_divisions.y * grid_divisions.z;
3395
3396 voxel_ray_counts_flat.assign(total_voxels, 0);
3397 voxel_transmitted_flat.assign(total_voxels, 0);
3398 voxel_path_lengths_flat.assign(total_voxels, 0.0f);
3399 voxel_hit_before_flat.assign(total_voxels, 0);
3400 voxel_hit_after_flat.assign(total_voxels, 0);
3401 voxel_hit_inside_flat.assign(total_voxels, 0);
3402
3403 // Individual path lengths with dynamic storage
3404 voxel_individual_path_lengths_flat.clear();
3405 voxel_individual_path_offsets.assign(total_voxels, 0);
3406 voxel_individual_path_counts.assign(total_voxels, 0);
3407
3408 // Reserve reasonable initial capacity for individual paths
3409 voxel_individual_path_lengths_flat.reserve(total_voxels * 10); // Average 10 paths per voxel
3410
3411 } else {
3412 // Fallback to nested vectors for compatibility
3413 voxel_ray_counts.resize(grid_divisions.x);
3414 voxel_transmitted.resize(grid_divisions.x);
3415 voxel_path_lengths.resize(grid_divisions.x);
3416 voxel_hit_before.resize(grid_divisions.x);
3417 voxel_hit_after.resize(grid_divisions.x);
3418 voxel_hit_inside.resize(grid_divisions.x);
3419 voxel_individual_path_lengths.resize(grid_divisions.x);
3420
3421 for (int i = 0; i < grid_divisions.x; i++) {
3422 voxel_ray_counts[i].resize(grid_divisions.y);
3423 voxel_transmitted[i].resize(grid_divisions.y);
3424 voxel_path_lengths[i].resize(grid_divisions.y);
3425 voxel_hit_before[i].resize(grid_divisions.y);
3426 voxel_hit_after[i].resize(grid_divisions.y);
3427 voxel_hit_inside[i].resize(grid_divisions.y);
3428 voxel_individual_path_lengths[i].resize(grid_divisions.y);
3429
3430 for (int j = 0; j < grid_divisions.y; j++) {
3431 voxel_ray_counts[i][j].resize(grid_divisions.z, 0);
3432 voxel_transmitted[i][j].resize(grid_divisions.z, 0);
3433 voxel_path_lengths[i][j].resize(grid_divisions.z, 0.0f);
3434 voxel_hit_before[i][j].resize(grid_divisions.z, 0);
3435 voxel_hit_after[i][j].resize(grid_divisions.z, 0);
3436 voxel_hit_inside[i][j].resize(grid_divisions.z, 0);
3437 voxel_individual_path_lengths[i][j].resize(grid_divisions.z);
3438 }
3439 }
3440 }
3441
3442 voxel_data_initialized = true;
3443}
3444
3445bool CollisionDetection::validateVoxelIndices(const helios::int3 &ijk) const {
3446 return (ijk.x >= 0 && ijk.x < voxel_grid_divisions.x && ijk.y >= 0 && ijk.y < voxel_grid_divisions.y && ijk.z >= 0 && ijk.z < voxel_grid_divisions.z);
3447}
3448
3449void CollisionDetection::calculateVoxelAABB(const helios::int3 &ijk, vec3 &voxel_min, vec3 &voxel_max) const {
3450 vec3 voxel_size = make_vec3(voxel_grid_size.x / static_cast<float>(voxel_grid_divisions.x), voxel_grid_size.y / static_cast<float>(voxel_grid_divisions.y), voxel_grid_size.z / static_cast<float>(voxel_grid_divisions.z));
3451
3452 vec3 grid_min = voxel_grid_center - 0.5f * voxel_grid_size;
3453
3454 voxel_min = grid_min + make_vec3(static_cast<float>(ijk.x) * voxel_size.x, static_cast<float>(ijk.y) * voxel_size.y, static_cast<float>(ijk.z) * voxel_size.z);
3455
3456 voxel_max = voxel_min + voxel_size;
3457}
3458
3459std::vector<std::pair<helios::int3, float>> CollisionDetection::traverseVoxelGrid(const vec3 &ray_origin, const vec3 &ray_direction) const {
3460 std::vector<std::pair<helios::int3, float>> traversed_voxels;
3461
3462 // Grid bounds
3463 vec3 grid_min = voxel_grid_center - 0.5f * voxel_grid_size;
3464 vec3 grid_max = voxel_grid_center + 0.5f * voxel_grid_size;
3465 vec3 voxel_size = make_vec3(voxel_grid_size.x / static_cast<float>(voxel_grid_divisions.x), voxel_grid_size.y / static_cast<float>(voxel_grid_divisions.y), voxel_grid_size.z / static_cast<float>(voxel_grid_divisions.z));
3466
3467 // Test if ray intersects grid at all
3468 float t_grid_min, t_grid_max;
3469 if (!rayAABBIntersect(ray_origin, ray_direction, grid_min, grid_max, t_grid_min, t_grid_max)) {
3470 return traversed_voxels; // Empty - ray doesn't hit grid
3471 }
3472
3473 // Ensure intersection is in forward direction
3474 if (t_grid_max <= 1e-6) {
3475 return traversed_voxels; // Grid is behind ray
3476 }
3477
3478 // Clamp t_grid_min to 0 if ray starts inside grid
3479 t_grid_min = std::max(0.0f, t_grid_min);
3480
3481 // Fast path for single voxel grids
3482 if (voxel_grid_divisions.x == 1 && voxel_grid_divisions.y == 1 && voxel_grid_divisions.z == 1) {
3483 float path_length = t_grid_max - t_grid_min;
3484 if (path_length > 1e-6f) {
3485 traversed_voxels.emplace_back(helios::make_int3(0, 0, 0), path_length);
3486 }
3487 return traversed_voxels;
3488 }
3489
3490 // Starting position in grid space
3491 vec3 start_pos = ray_origin + t_grid_min * ray_direction;
3492
3493 // Convert to voxel indices (clamped to grid bounds)
3494 helios::int3 current_voxel;
3495 current_voxel.x = static_cast<int>(std::floor((start_pos.x - grid_min.x) / voxel_size.x));
3496 current_voxel.y = static_cast<int>(std::floor((start_pos.y - grid_min.y) / voxel_size.y));
3497 current_voxel.z = static_cast<int>(std::floor((start_pos.z - grid_min.z) / voxel_size.z));
3498
3499 // Clamp to grid bounds
3500 current_voxel.x = std::max(0, std::min(current_voxel.x, voxel_grid_divisions.x - 1));
3501 current_voxel.y = std::max(0, std::min(current_voxel.y, voxel_grid_divisions.y - 1));
3502 current_voxel.z = std::max(0, std::min(current_voxel.z, voxel_grid_divisions.z - 1));
3503
3504 // DDA algorithm parameters
3505 helios::int3 step;
3506 vec3 t_delta, t_max;
3507
3508 // Set up stepping direction and delta t values
3509 for (int i = 0; i < 3; i++) {
3510 float dir_comp = (i == 0) ? ray_direction.x : (i == 1) ? ray_direction.y : ray_direction.z;
3511 float size_comp = (i == 0) ? voxel_size.x : (i == 1) ? voxel_size.y : voxel_size.z;
3512 float grid_min_comp = (i == 0) ? grid_min.x : (i == 1) ? grid_min.y : grid_min.z;
3513 float start_comp = (i == 0) ? start_pos.x : (i == 1) ? start_pos.y : start_pos.z;
3514 int current_comp = (i == 0) ? current_voxel.x : (i == 1) ? current_voxel.y : current_voxel.z;
3515 int max_comp = (i == 0) ? voxel_grid_divisions.x : (i == 1) ? voxel_grid_divisions.y : voxel_grid_divisions.z;
3516
3517 if (std::abs(dir_comp) < 1e-8f) {
3518 // Ray is parallel to this axis
3519 if (i == 0) {
3520 step.x = 0;
3521 t_delta.x = 1e30f; // Large value
3522 t_max.x = 1e30f;
3523 } else if (i == 1) {
3524 step.y = 0;
3525 t_delta.y = 1e30f;
3526 t_max.y = 1e30f;
3527 } else {
3528 step.z = 0;
3529 t_delta.z = 1e30f;
3530 t_max.z = 1e30f;
3531 }
3532 } else {
3533 // Calculate step direction and delta t
3534 if (i == 0) {
3535 step.x = (dir_comp > 0) ? 1 : -1;
3536 t_delta.x = std::abs(size_comp / dir_comp);
3537
3538 if (step.x > 0) {
3539 t_max.x = t_grid_min + (grid_min_comp + (current_comp + 1) * size_comp - start_comp) / dir_comp;
3540 } else {
3541 t_max.x = t_grid_min + (grid_min_comp + current_comp * size_comp - start_comp) / dir_comp;
3542 }
3543 } else if (i == 1) {
3544 step.y = (dir_comp > 0) ? 1 : -1;
3545 t_delta.y = std::abs(size_comp / dir_comp);
3546
3547 if (step.y > 0) {
3548 t_max.y = t_grid_min + (grid_min_comp + (current_comp + 1) * size_comp - start_comp) / dir_comp;
3549 } else {
3550 t_max.y = t_grid_min + (grid_min_comp + current_comp * size_comp - start_comp) / dir_comp;
3551 }
3552 } else {
3553 step.z = (dir_comp > 0) ? 1 : -1;
3554 t_delta.z = std::abs(size_comp / dir_comp);
3555
3556 if (step.z > 0) {
3557 t_max.z = t_grid_min + (grid_min_comp + (current_comp + 1) * size_comp - start_comp) / dir_comp;
3558 } else {
3559 t_max.z = t_grid_min + (grid_min_comp + current_comp * size_comp - start_comp) / dir_comp;
3560 }
3561 }
3562 }
3563 }
3564
3565 // Traverse the grid
3566 float current_t = t_grid_min;
3567
3568 while (validateVoxelIndices(current_voxel) && current_t < t_grid_max) {
3569 // Calculate path length through this voxel
3570 float next_t = std::min({t_max.x, t_max.y, t_max.z, t_grid_max});
3571 float path_length = next_t - current_t;
3572
3573 if (path_length > 1e-6f) {
3574 traversed_voxels.emplace_back(current_voxel, path_length);
3575 }
3576
3577 // Move to next voxel
3578 if (next_t >= t_grid_max) {
3579 break; // Reached end of grid
3580 }
3581
3582 // Determine which axis to step along
3583 if (t_max.x <= t_max.y && t_max.x <= t_max.z) {
3584 current_voxel.x += step.x;
3585 t_max.x += t_delta.x;
3586 } else if (t_max.y <= t_max.z) {
3587 current_voxel.y += step.y;
3588 t_max.y += t_delta.y;
3589 } else {
3590 current_voxel.z += step.z;
3591 t_max.z += t_delta.z;
3592 }
3593
3594 current_t = next_t;
3595 }
3596
3597 return traversed_voxels;
3598}
3599
3600void CollisionDetection::calculateVoxelRayPathLengths_CPU(const std::vector<vec3> &ray_origins, const std::vector<vec3> &ray_directions) {
3601
3602
3603 auto start_time = std::chrono::high_resolution_clock::now();
3604
3605 // Performance profiling variables
3606 std::atomic<long long> total_raycast_time(0);
3607 std::atomic<int> raycast_count(0);
3608
3609 // NOTE: BVH currency should be ensured by caller before calling this function
3610 // to avoid rebuilding BVH on every batch of rays
3611
3612 const int num_rays = static_cast<int>(ray_origins.size());
3613
3614 // PERFORMANCE OPTIMIZATION: Use thread-local storage to eliminate atomic operations
3615 const int total_voxels = voxel_grid_divisions.x * voxel_grid_divisions.y * voxel_grid_divisions.z;
3616
3617 // Pre-compute grid bounds for early culling
3618 vec3 grid_min = voxel_grid_center - 0.5f * voxel_grid_size;
3619 vec3 grid_max = voxel_grid_center + 0.5f * voxel_grid_size;
3620
3621#ifdef _OPENMP
3622 const int num_threads = omp_get_max_threads();
3623
3624 // Thread-local accumulation arrays to eliminate atomic operations
3625 std::vector<std::vector<int>> thread_ray_counts(num_threads, std::vector<int>(total_voxels, 0));
3626 std::vector<std::vector<float>> thread_path_lengths(num_threads, std::vector<float>(total_voxels, 0.0f));
3627 std::vector<std::vector<int>> thread_hit_before(num_threads, std::vector<int>(total_voxels, 0));
3628 std::vector<std::vector<int>> thread_hit_after(num_threads, std::vector<int>(total_voxels, 0));
3629 std::vector<std::vector<int>> thread_hit_inside(num_threads, std::vector<int>(total_voxels, 0));
3630 std::vector<std::vector<int>> thread_transmitted(num_threads, std::vector<int>(total_voxels, 0));
3631 std::vector<std::vector<std::vector<float>>> thread_individual_paths(num_threads, std::vector<std::vector<float>>(total_voxels));
3632
3633// Use OpenMP for parallel processing with thread-local accumulation
3634#pragma omp parallel for schedule(dynamic)
3635 for (int ray_idx = 0; ray_idx < num_rays; ray_idx++) {
3636 const int thread_id = omp_get_thread_num();
3637 const vec3 &ray_origin = ray_origins[ray_idx];
3638 const vec3 &ray_direction = ray_directions[ray_idx];
3639
3640 // Early culling: Skip rays that don't intersect the grid at all
3641 float t_grid_min, t_grid_max;
3642 if (!rayAABBIntersect(ray_origin, ray_direction, grid_min, grid_max, t_grid_min, t_grid_max) || t_grid_max <= 1e-6) {
3643 continue; // Skip this ray entirely
3644 }
3645
3646 // Use DDA traversal to get only intersected voxels
3647 auto traversed_voxels = traverseVoxelGrid(ray_origin, ray_direction);
3648
3649 // Only perform expensive ray classification for rays that actually intersect the grid
3650 if (traversed_voxels.empty()) {
3651 continue; // Skip rays that don't traverse any voxels
3652 }
3653
3654 // Perform ray classification once per ray (outside voxel loop for efficiency)
3655 auto raycast_start = std::chrono::high_resolution_clock::now();
3656 RayQuery query(ray_origin, ray_direction, -1.0f, {});
3657 HitResult hit = castRay(query);
3658 auto raycast_end = std::chrono::high_resolution_clock::now();
3659
3660 // Profile raycast performance
3661 total_raycast_time += std::chrono::duration_cast<std::chrono::microseconds>(raycast_end - raycast_start).count();
3662 raycast_count++;
3663
3664 float hit_distance = hit.hit ? hit.distance : 1e30f; // Large value if no hit
3665
3666 // Process each intersected voxel
3667 for (const auto &voxel_data: traversed_voxels) {
3668 const helios::int3 &voxel_idx = voxel_data.first;
3669 float path_length = voxel_data.second;
3670
3671 // Calculate voxel bounds for ray classification
3672 vec3 voxel_min, voxel_max;
3673 calculateVoxelAABB(voxel_idx, voxel_min, voxel_max);
3674
3675 // Get t_min and t_max for this voxel
3676 float t_min, t_max;
3677 rayAABBIntersect(ray_origin, ray_direction, voxel_min, voxel_max, t_min, t_max);
3678
3679 // Ensure valid intersection
3680 if (t_min < 0)
3681 t_min = 0;
3682
3683 // Perform ray classification for Beer's law calculations
3684 bool hit_before = false;
3685 bool hit_after = false;
3686 bool hit_inside = false;
3687
3688 if (hit.hit) {
3689 // Classify the hit based on where it occurs relative to voxel
3690 if (hit_distance < t_min) {
3691 // Hit occurs before entering voxel
3692 hit_before = true;
3693 } else if (hit_distance >= t_min && hit_distance <= t_max) {
3694 // Hit occurs inside voxel
3695 hit_inside = true;
3696 hit_after = true; // Also count as hit_after since it's after entering
3697 } else {
3698 // Hit occurs after exiting voxel
3699 hit_after = true;
3700 }
3701 } else {
3702 // No geometry hit - ray is transmitted through entire scene
3703 hit_after = true; // Consider this as reaching the voxel
3704 }
3705
3706 // PERFORMANCE OPTIMIZATION: Use thread-local accumulation (no synchronization!)
3707 size_t flat_idx = flatIndex(voxel_idx);
3708
3709 // All operations are now thread-local - no atomic operations needed!
3710 thread_ray_counts[thread_id][flat_idx]++;
3711 thread_path_lengths[thread_id][flat_idx] += path_length;
3712
3713 if (hit_before) {
3714 thread_hit_before[thread_id][flat_idx]++;
3715 }
3716 if (hit_after) {
3717 thread_hit_after[thread_id][flat_idx]++;
3718 }
3719 if (hit_inside) {
3720 thread_hit_inside[thread_id][flat_idx]++;
3721 } else {
3722 thread_transmitted[thread_id][flat_idx]++;
3723 }
3724
3725 // Store individual path lengths in thread-local storage (no critical section!)
3726 thread_individual_paths[thread_id][flat_idx].push_back(path_length);
3727 }
3728 }
3729
3730 // PERFORMANCE OPTIMIZATION: Reduction phase - combine thread-local results
3731 // This single reduction eliminates hundreds of thousands of atomic operations!
3732 for (int thread_id = 0; thread_id < num_threads; ++thread_id) {
3733 for (int voxel_idx = 0; voxel_idx < total_voxels; ++voxel_idx) {
3734 voxel_ray_counts_flat[voxel_idx] += thread_ray_counts[thread_id][voxel_idx];
3735 voxel_path_lengths_flat[voxel_idx] += thread_path_lengths[thread_id][voxel_idx];
3736 voxel_hit_before_flat[voxel_idx] += thread_hit_before[thread_id][voxel_idx];
3737 voxel_hit_after_flat[voxel_idx] += thread_hit_after[thread_id][voxel_idx];
3738 voxel_hit_inside_flat[voxel_idx] += thread_hit_inside[thread_id][voxel_idx];
3739 voxel_transmitted_flat[voxel_idx] += thread_transmitted[thread_id][voxel_idx];
3740 }
3741 }
3742
3743 // Post-process for flat array storage - aggregate individual paths from thread-local storage
3744 if (use_flat_arrays) {
3745 // Consolidate individual path lengths into flat storage with proper offsets
3746 voxel_individual_path_lengths_flat.clear();
3747
3748 // Calculate total size needed from all threads
3749 size_t total_paths = 0;
3750 for (int thread_id = 0; thread_id < num_threads; ++thread_id) {
3751 for (int voxel_idx = 0; voxel_idx < total_voxels; ++voxel_idx) {
3752 total_paths += thread_individual_paths[thread_id][voxel_idx].size();
3753 }
3754 }
3755 voxel_individual_path_lengths_flat.reserve(total_paths);
3756
3757 // Build flat array and offsets by aggregating from all threads
3758 size_t current_offset = 0;
3759 for (int voxel_idx = 0; voxel_idx < total_voxels; ++voxel_idx) {
3760 voxel_individual_path_offsets[voxel_idx] = current_offset;
3761 size_t voxel_path_count = 0;
3762
3763 // Aggregate paths from all threads for this voxel
3764 for (int thread_id = 0; thread_id < num_threads; ++thread_id) {
3765 for (float path_length: thread_individual_paths[thread_id][voxel_idx]) {
3766 voxel_individual_path_lengths_flat.push_back(path_length);
3767 voxel_path_count++;
3768 }
3769 }
3770
3771 voxel_individual_path_counts[voxel_idx] = voxel_path_count;
3772 current_offset += voxel_path_count;
3773 }
3774 }
3775#else
3776 // Serial fallback when OpenMP is not available
3777 const int num_threads = 1;
3778 const int thread_id = 0;
3779
3780 // Direct accumulation arrays for serial processing
3781 std::vector<int> serial_ray_counts(total_voxels, 0);
3782 std::vector<float> serial_path_lengths(total_voxels, 0.0f);
3783 std::vector<int> serial_hit_before(total_voxels, 0);
3784 std::vector<int> serial_hit_after(total_voxels, 0);
3785 std::vector<int> serial_hit_inside(total_voxels, 0);
3786 std::vector<int> serial_transmitted(total_voxels, 0);
3787 std::vector<std::vector<float>> serial_individual_paths(total_voxels);
3788
3789 // Serial processing without OpenMP pragmas
3790 for (int ray_idx = 0; ray_idx < num_rays; ray_idx++) {
3791 const vec3 &ray_origin = ray_origins[ray_idx];
3792 const vec3 &ray_direction = ray_directions[ray_idx];
3793
3794 // Early culling: Skip rays that don't intersect the grid at all
3795 float t_grid_min, t_grid_max;
3796 if (!rayAABBIntersect(ray_origin, ray_direction, grid_min, grid_max, t_grid_min, t_grid_max) || t_grid_max <= 1e-6) {
3797 continue; // Skip this ray entirely
3798 }
3799
3800 // Use DDA traversal to get only intersected voxels
3801 auto traversed_voxels = traverseVoxelGrid(ray_origin, ray_direction);
3802
3803 // Only perform expensive ray classification for rays that actually intersect the grid
3804 if (traversed_voxels.empty()) {
3805 continue; // Skip rays that don't traverse any voxels
3806 }
3807
3808 // Perform ray classification once per ray (outside voxel loop for efficiency)
3809 auto raycast_start = std::chrono::high_resolution_clock::now();
3810 RayQuery query(ray_origin, ray_direction, -1.0f, {});
3811 HitResult hit = castRay(query);
3812 auto raycast_end = std::chrono::high_resolution_clock::now();
3813
3814 // Profile raycast performance
3815 total_raycast_time += std::chrono::duration_cast<std::chrono::microseconds>(raycast_end - raycast_start).count();
3816 raycast_count++;
3817
3818 float hit_distance = hit.hit ? hit.distance : 1e30f; // Large value if no hit
3819
3820 // Process each intersected voxel
3821 for (const auto &voxel_data: traversed_voxels) {
3822 const helios::int3 &voxel_idx = voxel_data.first;
3823 float path_length = voxel_data.second;
3824
3825 // Calculate voxel bounds for ray classification
3826 vec3 voxel_min, voxel_max;
3827 calculateVoxelAABB(voxel_idx, voxel_min, voxel_max);
3828
3829 // Get t_min and t_max for this voxel
3830 float t_min, t_max;
3831 rayAABBIntersect(ray_origin, ray_direction, voxel_min, voxel_max, t_min, t_max);
3832
3833 // Ensure valid intersection
3834 if (t_min < 0)
3835 t_min = 0;
3836
3837 // Perform ray classification for Beer's law calculations
3838 bool hit_before = false;
3839 bool hit_after = false;
3840 bool hit_inside = false;
3841
3842 if (hit.hit) {
3843 // Classify the hit based on where it occurs relative to voxel
3844 if (hit_distance < t_min) {
3845 // Hit occurs before entering voxel
3846 hit_before = true;
3847 } else if (hit_distance >= t_min && hit_distance <= t_max) {
3848 // Hit occurs inside voxel
3849 hit_inside = true;
3850 hit_after = true; // Also count as hit_after since it's after entering
3851 } else {
3852 // Hit occurs after exiting voxel
3853 hit_after = true;
3854 }
3855 } else {
3856 // No geometry hit - ray is transmitted through entire scene
3857 hit_after = true; // Consider this as reaching the voxel
3858 }
3859
3860 // Direct accumulation for serial processing
3861 size_t flat_idx = flatIndex(voxel_idx);
3862
3863 serial_ray_counts[flat_idx]++;
3864 serial_path_lengths[flat_idx] += path_length;
3865
3866 if (hit_before) {
3867 serial_hit_before[flat_idx]++;
3868 }
3869 if (hit_after) {
3870 serial_hit_after[flat_idx]++;
3871 }
3872 if (hit_inside) {
3873 serial_hit_inside[flat_idx]++;
3874 } else {
3875 serial_transmitted[flat_idx]++;
3876 }
3877
3878 // Store individual path lengths for serial processing
3879 serial_individual_paths[flat_idx].push_back(path_length);
3880 }
3881 }
3882
3883 // Direct assignment from serial arrays to final storage
3884 for (int voxel_idx = 0; voxel_idx < total_voxels; ++voxel_idx) {
3885 voxel_ray_counts_flat[voxel_idx] += serial_ray_counts[voxel_idx];
3886 voxel_path_lengths_flat[voxel_idx] += serial_path_lengths[voxel_idx];
3887 voxel_hit_before_flat[voxel_idx] += serial_hit_before[voxel_idx];
3888 voxel_hit_after_flat[voxel_idx] += serial_hit_after[voxel_idx];
3889 voxel_hit_inside_flat[voxel_idx] += serial_hit_inside[voxel_idx];
3890 voxel_transmitted_flat[voxel_idx] += serial_transmitted[voxel_idx];
3891 }
3892
3893 // Post-process for flat array storage - aggregate individual paths from serial storage
3894 if (use_flat_arrays) {
3895 // Consolidate individual path lengths into flat storage with proper offsets
3896 voxel_individual_path_lengths_flat.clear();
3897
3898 // Calculate total size needed
3899 size_t total_paths = 0;
3900 for (int voxel_idx = 0; voxel_idx < total_voxels; ++voxel_idx) {
3901 total_paths += serial_individual_paths[voxel_idx].size();
3902 }
3903 voxel_individual_path_lengths_flat.reserve(total_paths);
3904
3905 // Build flat array and offsets
3906 size_t current_offset = 0;
3907 for (int voxel_idx = 0; voxel_idx < total_voxels; ++voxel_idx) {
3908 voxel_individual_path_offsets[voxel_idx] = current_offset;
3909 size_t voxel_path_count = serial_individual_paths[voxel_idx].size();
3910
3911 // Copy paths to flat array
3912 for (float path_length: serial_individual_paths[voxel_idx]) {
3913 voxel_individual_path_lengths_flat.push_back(path_length);
3914 }
3915
3916 voxel_individual_path_counts[voxel_idx] = voxel_path_count;
3917 current_offset += voxel_path_count;
3918 }
3919 }
3920#endif
3921
3922 auto end_time = std::chrono::high_resolution_clock::now();
3923 auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
3924
3925 // Performance profiling output
3926 long long avg_raycast_time = raycast_count > 0 ? total_raycast_time.load() / raycast_count.load() : 0;
3927 //
3928 // if (printmessages) {
3929 // // Report some statistics
3930 // int total_ray_voxel_intersections = 0;
3931 // if (use_flat_arrays) {
3932 // // Sum from flat arrays
3933 // for (size_t i = 0; i < voxel_ray_counts_flat.size(); i++) {
3934 // total_ray_voxel_intersections += voxel_ray_counts_flat[i];
3935 // }
3936 // } else {
3937 // // Sum from nested vectors
3938 // for (int i = 0; i < voxel_grid_divisions.x; i++) {
3939 // for (int j = 0; j < voxel_grid_divisions.y; j++) {
3940 // for (int k = 0; k < voxel_grid_divisions.z; k++) {
3941 // total_ray_voxel_intersections += voxel_ray_counts[i][j][k];
3942 // }
3943 // }
3944 // }
3945 // }
3946 // }
3947}
3948
3949#ifdef HELIOS_CUDA_AVAILABLE
3950bool CollisionDetection::calculateVoxelRayPathLengths_GPU(const std::vector<vec3> &ray_origins, const std::vector<vec3> &ray_directions) {
3951 // Check if GPU is actually available at runtime
3952 int deviceCount = 0;
3953 cudaError_t err = cudaGetDeviceCount(&deviceCount);
3954 if (err != cudaSuccess || deviceCount == 0) {
3955 // No GPU hardware available - return false to trigger CPU fallback
3956 return false;
3957 }
3958
3959 if (printmessages) {
3960 }
3961
3962 auto start_time = std::chrono::high_resolution_clock::now();
3963
3964 const int num_rays = static_cast<int>(ray_origins.size());
3965 const int total_voxels = voxel_grid_divisions.x * voxel_grid_divisions.y * voxel_grid_divisions.z;
3966
3967 // Prepare data for GPU kernel
3968 std::vector<float> h_ray_origins(num_rays * 3);
3969 std::vector<float> h_ray_directions(num_rays * 3);
3970 std::vector<int> h_voxel_ray_counts(total_voxels, 0);
3971 std::vector<float> h_voxel_path_lengths(total_voxels, 0.0f);
3972 std::vector<int> h_voxel_transmitted(total_voxels, 0);
3973 std::vector<int> h_voxel_hit_before(total_voxels, 0);
3974 std::vector<int> h_voxel_hit_after(total_voxels, 0);
3975 std::vector<int> h_voxel_hit_inside(total_voxels, 0);
3976
3977 // Convert ray data to flat arrays
3978 for (int i = 0; i < num_rays; i++) {
3979 h_ray_origins[i * 3 + 0] = ray_origins[i].x;
3980 h_ray_origins[i * 3 + 1] = ray_origins[i].y;
3981 h_ray_origins[i * 3 + 2] = ray_origins[i].z;
3982
3983 h_ray_directions[i * 3 + 0] = ray_directions[i].x;
3984 h_ray_directions[i * 3 + 1] = ray_directions[i].y;
3985 h_ray_directions[i * 3 + 2] = ray_directions[i].z;
3986 }
3987
3988 // Check if there are any primitives in the scene for geometry detection
3989 int primitive_count = static_cast<int>(primitive_cache.size());
3990
3991 // Launch CUDA kernel
3992 bool gpu_success = launchVoxelRayPathLengths(num_rays, h_ray_origins.data(), h_ray_directions.data(), voxel_grid_center.x, voxel_grid_center.y, voxel_grid_center.z, voxel_grid_size.x, voxel_grid_size.y, voxel_grid_size.z, voxel_grid_divisions.x,
3993 voxel_grid_divisions.y, voxel_grid_divisions.z, primitive_count, h_voxel_ray_counts.data(), h_voxel_path_lengths.data(), h_voxel_transmitted.data(), h_voxel_hit_before.data(), h_voxel_hit_after.data(),
3994 h_voxel_hit_inside.data());
3995
3996 if (!gpu_success) {
3997 // GPU kernel failed - return false to trigger CPU fallback
3998 return false;
3999 }
4000
4001 // Copy results back to class data structures
4002 if (use_flat_arrays) {
4003 // Copy directly to flat arrays
4004 voxel_ray_counts_flat = h_voxel_ray_counts;
4005 voxel_path_lengths_flat = h_voxel_path_lengths;
4006 voxel_transmitted_flat = h_voxel_transmitted;
4007
4008 // Copy hit classification data from GPU kernel
4009 voxel_hit_before_flat = h_voxel_hit_before;
4010 voxel_hit_after_flat = h_voxel_hit_after;
4011 voxel_hit_inside_flat = h_voxel_hit_inside;
4012
4013 // Initialize individual path length data structures
4014 // Note: GPU implementation currently provides aggregate data only
4015 // For now, initialize empty individual path data to prevent crashes
4016 voxel_individual_path_lengths_flat.clear();
4017 std::fill(voxel_individual_path_offsets.begin(), voxel_individual_path_offsets.end(), 0);
4018 std::fill(voxel_individual_path_counts.begin(), voxel_individual_path_counts.end(), 0);
4019
4020 // TODO: Implement proper individual path length collection in GPU kernel
4021 // For now, estimate individual path lengths based on ray geometry
4022 // This is a workaround until the GPU kernel can provide individual path data
4023 for (int voxel_idx = 0; voxel_idx < total_voxels; ++voxel_idx) {
4024 voxel_individual_path_offsets[voxel_idx] = voxel_individual_path_lengths_flat.size();
4025
4026 if (h_voxel_ray_counts[voxel_idx] > 0) {
4027 // Calculate individual path lengths for each ray by simulating ray-voxel intersection
4028 // This is an approximation but more accurate than using just averages
4029 std::vector<float> estimated_paths;
4030
4031 // Convert flat voxel index back to 3D coordinates
4032 int voxel_z = voxel_idx % voxel_grid_divisions.z;
4033 int voxel_y = (voxel_idx / voxel_grid_divisions.z) % voxel_grid_divisions.y;
4034 int voxel_x = voxel_idx / (voxel_grid_divisions.y * voxel_grid_divisions.z);
4035
4036 // Calculate voxel bounds
4037 vec3 voxel_size = voxel_grid_size;
4038 voxel_size.x /= voxel_grid_divisions.x;
4039 voxel_size.y /= voxel_grid_divisions.y;
4040 voxel_size.z /= voxel_grid_divisions.z;
4041
4042 vec3 voxel_min = voxel_grid_center - voxel_grid_size * 0.5f;
4043 voxel_min.x += voxel_x * voxel_size.x;
4044 voxel_min.y += voxel_y * voxel_size.y;
4045 voxel_min.z += voxel_z * voxel_size.z;
4046
4047 vec3 voxel_max = voxel_min + voxel_size;
4048
4049 // Check each ray to see if it intersects this voxel and calculate path length
4050 for (int ray_idx = 0; ray_idx < num_rays; ++ray_idx) {
4051 vec3 ray_origin = ray_origins[ray_idx];
4052 vec3 ray_dir = ray_directions[ray_idx];
4053
4054 // Ray-box intersection algorithm
4055 float t_min = 0.0f;
4056 float t_max = std::numeric_limits<float>::max();
4057
4058 // Check intersection with each axis-aligned slab
4059 for (int axis = 0; axis < 3; ++axis) {
4060 float origin_comp = (axis == 0) ? ray_origin.x : (axis == 1) ? ray_origin.y : ray_origin.z;
4061 float dir_comp = (axis == 0) ? ray_dir.x : (axis == 1) ? ray_dir.y : ray_dir.z;
4062 float min_comp = (axis == 0) ? voxel_min.x : (axis == 1) ? voxel_min.y : voxel_min.z;
4063 float max_comp = (axis == 0) ? voxel_max.x : (axis == 1) ? voxel_max.y : voxel_max.z;
4064
4065 if (std::abs(dir_comp) < 1e-9f) {
4066 // Ray is parallel to slab
4067 if (origin_comp < min_comp || origin_comp > max_comp) {
4068 t_max = -1.0f; // No intersection
4069 break;
4070 }
4071 } else {
4072 float t1 = (min_comp - origin_comp) / dir_comp;
4073 float t2 = (max_comp - origin_comp) / dir_comp;
4074
4075 if (t1 > t2)
4076 std::swap(t1, t2);
4077
4078 t_min = std::max(t_min, t1);
4079 t_max = std::min(t_max, t2);
4080
4081 if (t_min > t_max)
4082 break; // No intersection
4083 }
4084 }
4085
4086 // If there's a valid intersection, calculate path length
4087 if (t_max > t_min && t_max > 0.0f) {
4088 float entry_t = std::max(0.0f, t_min);
4089 float exit_t = t_max;
4090 float path_length = exit_t - entry_t;
4091
4092 if (path_length > 1e-6f) {
4093 estimated_paths.push_back(path_length);
4094 }
4095 }
4096 }
4097
4098 // Store the estimated paths
4099 for (float path_length: estimated_paths) {
4100 voxel_individual_path_lengths_flat.push_back(path_length);
4101 }
4102 voxel_individual_path_counts[voxel_idx] = estimated_paths.size();
4103 } else {
4104 voxel_individual_path_counts[voxel_idx] = 0;
4105 }
4106 }
4107
4108 } else {
4109 // Copy to nested vectors
4110 for (int i = 0; i < voxel_grid_divisions.x; i++) {
4111 for (int j = 0; j < voxel_grid_divisions.y; j++) {
4112 for (int k = 0; k < voxel_grid_divisions.z; k++) {
4113 int flat_idx = i * voxel_grid_divisions.y * voxel_grid_divisions.z + j * voxel_grid_divisions.z + k;
4114 voxel_ray_counts[i][j][k] = h_voxel_ray_counts[flat_idx];
4115 voxel_path_lengths[i][j][k] = h_voxel_path_lengths[flat_idx];
4116 voxel_transmitted[i][j][k] = h_voxel_transmitted[flat_idx];
4117
4118 // Copy hit classification data from GPU kernel
4119 voxel_hit_before[i][j][k] = h_voxel_hit_before[flat_idx];
4120 voxel_hit_after[i][j][k] = h_voxel_hit_after[flat_idx];
4121 voxel_hit_inside[i][j][k] = h_voxel_hit_inside[flat_idx];
4122
4123 // Initialize individual path lengths (approximation)
4124 voxel_individual_path_lengths[i][j][k].clear();
4125 if (h_voxel_ray_counts[flat_idx] > 0) {
4126 float avg_path_length = h_voxel_path_lengths[flat_idx] / h_voxel_ray_counts[flat_idx];
4127 for (int ray = 0; ray < h_voxel_ray_counts[flat_idx]; ++ray) {
4128 voxel_individual_path_lengths[i][j][k].push_back(avg_path_length);
4129 }
4130 }
4131 }
4132 }
4133 }
4134 }
4135
4136 auto end_time = std::chrono::high_resolution_clock::now();
4137 auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
4138
4139 if (printmessages) {
4140
4141 // Report some statistics
4142 int total_ray_voxel_intersections = 0;
4143 for (const auto &count: h_voxel_ray_counts) {
4144 total_ray_voxel_intersections += count;
4145 }
4146 }
4147
4148 return true; // GPU execution succeeded
4149}
4150#endif
4151
4152void CollisionDetection::ensureOptimizedBVH() {
4153 // Convert standard BVH to Structure-of-Arrays format for optimal cache performance
4154 // Only rebuild if BVH structure has actually changed OR SoA is explicitly dirty
4155 if (soa_dirty || bvh_nodes_soa.node_count != bvh_nodes.size() || bvh_nodes_soa.aabb_mins.empty()) {
4156
4157 if (bvh_nodes.empty()) {
4158 bvh_nodes_soa.clear();
4159 bvh_nodes_soa.node_count = 0;
4160 return;
4161 }
4162
4163 size_t node_count = bvh_nodes.size();
4164
4165 // OPTIMIZATION 1: Pre-allocate all arrays to exact size (no reserve+push_back)
4166 bvh_nodes_soa.node_count = node_count;
4167 bvh_nodes_soa.aabb_mins.resize(node_count);
4168 bvh_nodes_soa.aabb_maxs.resize(node_count);
4169 bvh_nodes_soa.left_children.resize(node_count);
4170 bvh_nodes_soa.right_children.resize(node_count);
4171 bvh_nodes_soa.primitive_starts.resize(node_count);
4172 bvh_nodes_soa.primitive_counts.resize(node_count);
4173 bvh_nodes_soa.is_leaf_flags.resize(node_count);
4174
4175 // OPTIMIZATION 2: Direct assignment instead of push_back (much faster)
4176 for (size_t i = 0; i < node_count; ++i) {
4177 const BVHNode &node = bvh_nodes[i];
4178
4179 // Hot data: frequently accessed during traversal
4180 bvh_nodes_soa.aabb_mins[i] = node.aabb_min;
4181 bvh_nodes_soa.aabb_maxs[i] = node.aabb_max;
4182 bvh_nodes_soa.left_children[i] = node.left_child;
4183 bvh_nodes_soa.right_children[i] = node.right_child;
4184
4185 // Cold data: accessed less frequently
4186 bvh_nodes_soa.primitive_starts[i] = node.primitive_start;
4187 bvh_nodes_soa.primitive_counts[i] = node.primitive_count;
4188 bvh_nodes_soa.is_leaf_flags[i] = node.is_leaf ? 1 : 0;
4189 }
4190
4191 // Mark SoA as clean after successful conversion
4192 soa_dirty = false;
4193 }
4194}
4195
4196void CollisionDetection::updatePrimitiveAABBCache(uint uuid) {
4197 // Check if primitive exists
4198 if (!context->doesPrimitiveExist(uuid)) {
4199 return;
4200 }
4201
4202 try {
4203 // Get primitive vertices to compute AABB
4204 std::vector<vec3> vertices = context->getPrimitiveVertices(uuid);
4205 if (vertices.empty()) {
4206 return;
4207 }
4208
4209 // Compute AABB from vertices
4210 vec3 aabb_min = vertices[0];
4211 vec3 aabb_max = vertices[0];
4212
4213 for (const vec3 &vertex: vertices) {
4214 aabb_min.x = std::min(aabb_min.x, vertex.x);
4215 aabb_min.y = std::min(aabb_min.y, vertex.y);
4216 aabb_min.z = std::min(aabb_min.z, vertex.z);
4217
4218 aabb_max.x = std::max(aabb_max.x, vertex.x);
4219 aabb_max.y = std::max(aabb_max.y, vertex.y);
4220 aabb_max.z = std::max(aabb_max.z, vertex.z);
4221 }
4222
4223 // Store in cache
4224 primitive_aabbs_cache[uuid] = std::make_pair(aabb_min, aabb_max);
4225
4226 } catch (const std::exception &e) {
4227 if (printmessages) {
4228 std::cerr << "Warning: Failed to cache AABB for primitive " << uuid << ": " << e.what() << std::endl;
4229 }
4230 }
4231}
4232
4233void CollisionDetection::optimizedRebuildBVH(const std::set<uint> &final_geometry) {
4234 // Convert to vector for buildBVH compatibility
4235 std::vector<uint> final_primitives(final_geometry.begin(), final_geometry.end());
4236
4237 // Ensure all primitive AABBs are cached
4238 for (uint uuid: final_geometry) {
4239 if (primitive_aabbs_cache.find(uuid) == primitive_aabbs_cache.end()) {
4240 updatePrimitiveAABBCache(uuid);
4241 }
4242 }
4243
4244 if (printmessages) {
4245 std::cout << "Optimized rebuild with " << final_primitives.size() << " primitives (using cached AABBs)" << std::endl;
4246 }
4247
4248 // Use regular buildBVH but with pre-cached AABBs for efficiency
4249 buildBVH(final_primitives);
4250
4251 // Update tracking
4252 last_bvh_geometry = final_geometry;
4253 bvh_dirty = false;
4254 soa_dirty = true; // SoA needs rebuild after BVH change
4255}
4256
4257// -------- TREE-BASED BVH IMPLEMENTATION --------
4258
4259void CollisionDetection::enableTreeBasedBVH(float isolation_distance) {
4260 tree_based_bvh_enabled = true;
4261 tree_isolation_distance = isolation_distance;
4262}
4263
4265 tree_based_bvh_enabled = false;
4266 tree_bvh_map.clear();
4267 object_to_tree_map.clear();
4268}
4269
4271 return tree_based_bvh_enabled;
4272}
4273
4275 if (static_obstacle_primitives.empty() || obstacle_spatial_grid_initialized) {
4276 return;
4277 }
4278
4279 // Set cell size to be roughly the static obstacle distance for optimal performance
4280 obstacle_spatial_grid.cell_size = 20.0f; // Slightly larger than MAX_STATIC_OBSTACLE_DISTANCE
4281 obstacle_spatial_grid.grid_cells.clear();
4282
4283 // Insert each static obstacle into the spatial grid
4284 for (uint obstacle_prim: static_obstacle_primitives) {
4285 if (context->doesPrimitiveExist(obstacle_prim)) {
4286 helios::vec3 min_corner, max_corner;
4287 context->getPrimitiveBoundingBox(obstacle_prim, min_corner, max_corner);
4288 helios::vec3 prim_center = (min_corner + max_corner) * 0.5f;
4289
4290 int64_t grid_key = obstacle_spatial_grid.getGridKey(prim_center.x, prim_center.y);
4291 obstacle_spatial_grid.grid_cells[grid_key].push_back(obstacle_prim);
4292 }
4293 }
4294
4295 obstacle_spatial_grid_initialized = true;
4296}
4297
4298std::vector<uint> CollisionDetection::ObstacleSpatialGrid::getRelevantObstacles(const helios::vec3 &position, float radius) const {
4299 std::vector<uint> relevant_obstacles;
4300
4301 // Calculate the range of grid cells to search
4302 int32_t min_grid_x = static_cast<int32_t>(std::floor((position.x - radius) / cell_size));
4303 int32_t max_grid_x = static_cast<int32_t>(std::floor((position.x + radius) / cell_size));
4304 int32_t min_grid_y = static_cast<int32_t>(std::floor((position.y - radius) / cell_size));
4305 int32_t max_grid_y = static_cast<int32_t>(std::floor((position.y + radius) / cell_size));
4306
4307 // Search all relevant grid cells
4308 for (int32_t grid_x = min_grid_x; grid_x <= max_grid_x; ++grid_x) {
4309 for (int32_t grid_y = min_grid_y; grid_y <= max_grid_y; ++grid_y) {
4310 int64_t grid_key = (static_cast<int64_t>(grid_x) << 32) | static_cast<uint32_t>(grid_y);
4311
4312 auto cell_it = grid_cells.find(grid_key);
4313 if (cell_it != grid_cells.end()) {
4314 // Add all obstacles from this cell (could add distance filtering here if needed)
4315 relevant_obstacles.insert(relevant_obstacles.end(), cell_it->second.begin(), cell_it->second.end());
4316 }
4317 }
4318 }
4319
4320 return relevant_obstacles;
4321}
4322
4323void CollisionDetection::registerTree(uint tree_object_id, const std::vector<uint> &tree_primitives) {
4324 if (!tree_based_bvh_enabled) {
4325 if (printmessages) {
4326 std::cout << "WARNING: Tree registration ignored - tree-based BVH not enabled" << std::endl;
4327 }
4328 return;
4329 }
4330
4331 // Calculate tree spatial bounds
4332 vec3 tree_center(0, 0, 0);
4333 vec3 aabb_min(1e30f, 1e30f, 1e30f);
4334 vec3 aabb_max(-1e30f, -1e30f, -1e30f);
4335
4336 for (uint prim_uuid: tree_primitives) {
4337 if (context->doesPrimitiveExist(prim_uuid)) {
4338 vec3 prim_min, prim_max;
4339 context->getPrimitiveBoundingBox(prim_uuid, prim_min, prim_max);
4340
4341 aabb_min.x = std::min(aabb_min.x, prim_min.x);
4342 aabb_min.y = std::min(aabb_min.y, prim_min.y);
4343 aabb_min.z = std::min(aabb_min.z, prim_min.z);
4344
4345 aabb_max.x = std::max(aabb_max.x, prim_max.x);
4346 aabb_max.y = std::max(aabb_max.y, prim_max.y);
4347 aabb_max.z = std::max(aabb_max.z, prim_max.z);
4348 }
4349 }
4350
4351 tree_center = (aabb_min + aabb_max) * 0.5f;
4352 float tree_radius = (aabb_max - aabb_min).magnitude() * 0.5f;
4353
4354 // Create or update tree BVH entry
4355 TreeBVH &tree_bvh = tree_bvh_map[tree_object_id];
4356 tree_bvh.tree_object_id = tree_object_id;
4357 tree_bvh.tree_center = tree_center;
4358 tree_bvh.tree_radius = tree_radius;
4359
4360 // Update object-to-tree mapping
4361 for (uint prim_uuid: tree_primitives) {
4362 object_to_tree_map[prim_uuid] = tree_object_id;
4363 }
4364}
4365
4366void CollisionDetection::setStaticObstacles(const std::vector<uint> &obstacle_primitives) {
4367 static_obstacle_primitives.clear();
4368
4369 // Validate and store static obstacles
4370 for (uint prim_uuid: obstacle_primitives) {
4371 if (context->doesPrimitiveExist(prim_uuid)) {
4372 static_obstacle_primitives.push_back(prim_uuid);
4373 }
4374 }
4375
4376 // Initialize spatial grid for fast obstacle lookup
4377 obstacle_spatial_grid_initialized = false;
4379}
4380
4381std::vector<uint> CollisionDetection::getRelevantGeometryForTree(const helios::vec3 &query_position, const std::vector<uint> &query_primitives, float max_distance) {
4382 std::vector<uint> relevant_geometry;
4383
4384 if (!tree_based_bvh_enabled) {
4385 // If tree-based BVH is disabled, return empty to signal using all geometry
4386 return relevant_geometry;
4387 }
4388
4389 // Use spatial grid for fast static obstacle lookup if available
4390 const float MAX_STATIC_OBSTACLE_DISTANCE = max_distance; // Use collision detection distance for static obstacles
4391
4392 if (obstacle_spatial_grid_initialized && !static_obstacle_primitives.empty()) {
4393 // Fast spatial grid lookup - O(1) instead of O(N)
4394 std::vector<uint> candidate_obstacles = obstacle_spatial_grid.getRelevantObstacles(query_position, MAX_STATIC_OBSTACLE_DISTANCE);
4395
4396
4397 // Still need distance check for precise filtering within grid cells
4398 for (uint static_prim: candidate_obstacles) {
4399 if (context->doesPrimitiveExist(static_prim)) {
4400 helios::vec3 min_corner, max_corner;
4401 context->getPrimitiveBoundingBox(static_prim, min_corner, max_corner);
4402 helios::vec3 prim_center = (min_corner + max_corner) * 0.5f;
4403 float distance = (query_position - prim_center).magnitude();
4404 if (distance < MAX_STATIC_OBSTACLE_DISTANCE) {
4405 relevant_geometry.push_back(static_prim);
4406 }
4407 }
4408 }
4409
4410 } else {
4411 // Fallback to linear search if spatial grid not available
4412 for (uint static_prim: static_obstacle_primitives) {
4413 if (context->doesPrimitiveExist(static_prim)) {
4414 helios::vec3 min_corner, max_corner;
4415 context->getPrimitiveBoundingBox(static_prim, min_corner, max_corner);
4416 helios::vec3 prim_center = (min_corner + max_corner) * 0.5f;
4417 float distance = (query_position - prim_center).magnitude();
4418 if (distance < MAX_STATIC_OBSTACLE_DISTANCE) {
4419 relevant_geometry.push_back(static_prim);
4420 }
4421 }
4422 }
4423 }
4424
4425 // Find which tree this query belongs to
4426 uint source_tree_id = 0;
4427 if (!query_primitives.empty()) {
4428 // Use the first query primitive to identify source tree
4429 auto it = object_to_tree_map.find(query_primitives[0]);
4430 if (it != object_to_tree_map.end()) {
4431 source_tree_id = it->second;
4432 }
4433 }
4434
4435 // If we couldn't identify the source tree, find the closest tree to query position
4436 if (source_tree_id == 0) {
4437 float min_distance = 1e30f;
4438 for (const auto &tree_pair: tree_bvh_map) {
4439 const TreeBVH &tree = tree_pair.second;
4440 float distance = (query_position - tree.tree_center).magnitude();
4441 if (distance < min_distance) {
4442 min_distance = distance;
4443 source_tree_id = tree.tree_object_id;
4444 }
4445 }
4446 }
4447
4448 // Add geometry from source tree and nearby trees within interaction distance
4449 for (const auto &tree_pair: tree_bvh_map) {
4450 const TreeBVH &tree = tree_pair.second;
4451 uint tree_id = tree_pair.first;
4452
4453 if (tree_id == source_tree_id) {
4454 // Always include source tree's own geometry
4455 for (uint prim_uuid: tree.primitive_indices) {
4456 if (context->doesPrimitiveExist(prim_uuid)) {
4457 relevant_geometry.push_back(prim_uuid);
4458 }
4459 }
4460 } else {
4461 // Check if other trees are within interaction distance
4462 float distance = (query_position - tree.tree_center).magnitude();
4463 float interaction_threshold = tree_isolation_distance + tree.tree_radius;
4464
4465 if (distance < interaction_threshold) {
4466 // Include nearby tree's geometry
4467 for (uint prim_uuid: tree.primitive_indices) {
4468 if (context->doesPrimitiveExist(prim_uuid)) {
4469 relevant_geometry.push_back(prim_uuid);
4470 }
4471 }
4472 }
4473 }
4474 }
4475
4476 return relevant_geometry;
4477}