1.3.72
 
Loading...
Searching...
No Matches
VulkanComputeBackend.cpp
Go to the documentation of this file.
1
17#include <fstream>
18#include <iostream>
19#include <cstring>
20
21namespace helios {
22
24 try {
25 VulkanDevice probe_device;
26 probe_device.initialize(false);
27 probe_device.shutdown();
28 return true;
29 } catch (...) {
30 return false;
31 }
32 }
33
34 VulkanComputeBackend::VulkanComputeBackend() : device(new VulkanDevice()), owns_device(true) {
35 // Production mode: own the device
36 }
37
38 VulkanComputeBackend::VulkanComputeBackend(VulkanDevice *external_device) : device(external_device), owns_device(false) {
39 // Test mode: borrow pre-initialized device from test singleton
40 if (!device) {
41 helios_runtime_error("ERROR (VulkanComputeBackend): external_device cannot be nullptr");
42 }
43 }
44
45 VulkanComputeBackend::~VulkanComputeBackend() {
46 shutdown();
47 if (owns_device) {
48 delete device;
49 }
50 }
51
53 // Determine whether to enable validation layers
54 // Default: disabled for tests (20-30% faster), enabled in debug builds
55 // Can be explicitly controlled via HELIOS_VULKAN_VALIDATION environment variable
56 bool enable_validation = false;
57
58#ifndef NDEBUG
59 // Debug builds: enable validation by default
60 enable_validation = true;
61#endif
62
63 // Allow environment variable override
64 const char *validation_env = std::getenv("HELIOS_VULKAN_VALIDATION");
65 if (validation_env != nullptr) {
66 std::string val(validation_env);
67 if (val == "1" || val == "true" || val == "TRUE") {
68 enable_validation = true;
69 } else if (val == "0" || val == "false" || val == "FALSE") {
70 enable_validation = false;
71 }
72 }
73
74 // Initialize Vulkan device (only if we own it - shared device already initialized)
75 if (owns_device) {
76 device->initialize(enable_validation);
77 }
78
79 // Create command resources
80 createCommandResources();
81
82 // Create descriptor sets and pipelines
83 createDescriptorSets();
84 createPipelines();
85 }
86
88 if (!device || device->getDevice() == VK_NULL_HANDLE) {
89 return; // Already shutdown
90 }
91
92 VkDevice vk_device = device->getDevice();
93
94 // Wait for device idle
95 vkDeviceWaitIdle(vk_device);
96
97 // Destroy pipelines
98 if (pipeline_direct != VK_NULL_HANDLE)
99 vkDestroyPipeline(vk_device, pipeline_direct, nullptr);
100 if (pipeline_diffuse != VK_NULL_HANDLE)
101 vkDestroyPipeline(vk_device, pipeline_diffuse, nullptr);
102 if (pipeline_camera != VK_NULL_HANDLE)
103 vkDestroyPipeline(vk_device, pipeline_camera, nullptr);
104 if (pipeline_pixel_label != VK_NULL_HANDLE)
105 vkDestroyPipeline(vk_device, pipeline_pixel_label, nullptr);
106 if (pipeline_layout != VK_NULL_HANDLE)
107 vkDestroyPipelineLayout(vk_device, pipeline_layout, nullptr);
108
109 // Destroy descriptor pool (also frees sets)
110 if (descriptor_pool != VK_NULL_HANDLE)
111 vkDestroyDescriptorPool(vk_device, descriptor_pool, nullptr);
112 if (set_layout_geometry != VK_NULL_HANDLE)
113 vkDestroyDescriptorSetLayout(vk_device, set_layout_geometry, nullptr);
114 if (set_layout_materials != VK_NULL_HANDLE)
115 vkDestroyDescriptorSetLayout(vk_device, set_layout_materials, nullptr);
116 if (set_layout_results != VK_NULL_HANDLE)
117 vkDestroyDescriptorSetLayout(vk_device, set_layout_results, nullptr);
118 if (set_layout_sky != VK_NULL_HANDLE)
119 vkDestroyDescriptorSetLayout(vk_device, set_layout_sky, nullptr);
120 if (set_layout_debug != VK_NULL_HANDLE)
121 vkDestroyDescriptorSetLayout(vk_device, set_layout_debug, nullptr);
122
123 // Destroy buffers
124 destroyBuffer(bvh_buffer);
125 destroyBuffer(primitive_indices_buffer);
126 destroyBuffer(transform_matrices_buffer);
127 destroyBuffer(primitive_types_buffer);
128 destroyBuffer(primitive_uuids_buffer);
129 destroyBuffer(primitive_positions_buffer);
130 destroyBuffer(object_subdivisions_buffer);
131 destroyBuffer(twosided_flag_buffer);
132 destroyBuffer(patch_vertices_buffer);
133 destroyBuffer(triangle_vertices_buffer);
134 destroyBuffer(normal_buffer);
135 destroyBuffer(mask_data_buffer);
136 destroyBuffer(mask_sizes_buffer);
137 destroyBuffer(mask_offsets_buffer);
138 destroyBuffer(mask_IDs_buffer);
139 destroyBuffer(uv_data_buffer);
140 destroyBuffer(uv_IDs_buffer);
141 destroyBuffer(source_positions_buffer);
142 destroyBuffer(source_types_buffer);
143 destroyBuffer(source_rotations_buffer);
144 destroyBuffer(source_widths_buffer);
145 destroyBuffer(source_fluxes_buffer);
146 destroyBuffer(source_fluxes_cam_buffer);
147 destroyBuffer(reflectivity_buffer);
148 destroyBuffer(transmissivity_buffer);
149 destroyBuffer(specular_exponent_buffer);
150 destroyBuffer(specular_scale_buffer);
151 destroyBuffer(radiation_in_buffer);
152 destroyBuffer(radiation_out_top_buffer);
153 destroyBuffer(radiation_out_bottom_buffer);
154 destroyBuffer(scatter_top_buffer);
155 destroyBuffer(scatter_bottom_buffer);
156 destroyBuffer(camera_radiation_buffer);
157 destroyBuffer(camera_pixel_label_buffer);
158 destroyBuffer(camera_pixel_depth_buffer);
159 destroyBuffer(camera_scatter_top_buffer);
160 destroyBuffer(camera_scatter_bottom_buffer);
161 destroyBuffer(radiation_specular_buffer);
162 destroyBuffer(diffuse_flux_buffer);
163 destroyBuffer(diffuse_peak_dir_buffer);
164 destroyBuffer(diffuse_extinction_buffer);
165 destroyBuffer(diffuse_dist_norm_buffer);
166 destroyBuffer(sky_radiance_params_buffer);
167 destroyBuffer(camera_sky_radiance_buffer);
168 destroyBuffer(solar_disk_radiance_buffer);
169 destroyBuffer(debug_counters_buffer);
170 destroyBuffer(bbox_vertices_buffer);
171 destroyBuffer(band_map_buffer);
172
173 // Destroy command resources
174 if (transfer_fence != VK_NULL_HANDLE)
175 vkDestroyFence(vk_device, transfer_fence, nullptr);
176 if (compute_fence != VK_NULL_HANDLE)
177 vkDestroyFence(vk_device, compute_fence, nullptr);
178 if (timestamp_query_pool != VK_NULL_HANDLE)
179 vkDestroyQueryPool(vk_device, timestamp_query_pool, nullptr);
180 if (command_pool != VK_NULL_HANDLE)
181 vkDestroyCommandPool(vk_device, command_pool, nullptr);
182
183 // Only shutdown device if we own it (shared device managed by singleton)
184 if (owns_device) {
185 device->shutdown();
186 }
187 }
188
190 validateGeometryBeforeUpload(geometry);
191
192 primitive_count = geometry.primitive_count;
193
194 if (primitive_count == 0) {
195 return; // Empty geometry
196 }
197
198 // Build BVH2 on CPU, then convert to CWBVH (8-wide with quantized AABBs)
199 bvh_nodes = bvh_builder.build(geometry);
200 std::vector<CWBVH_Node> cwbvh_nodes = bvh_builder.convertToCWBVH(bvh_nodes);
201
202 // Upload CWBVH to GPU
203 if (!cwbvh_nodes.empty()) {
204 if (bvh_buffer.buffer != VK_NULL_HANDLE) {
205 destroyBuffer(bvh_buffer);
206 }
207 bvh_buffer = createBuffer(cwbvh_nodes.size() * sizeof(CWBVH_Node), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
208 uploadBufferData(bvh_buffer, cwbvh_nodes.data(), cwbvh_nodes.size() * sizeof(CWBVH_Node));
209 }
210
211 // Upload primitive indices
212 const auto &prim_indices = bvh_builder.getPrimitiveIndices();
213 if (!prim_indices.empty()) {
214 if (primitive_indices_buffer.buffer != VK_NULL_HANDLE) {
215 destroyBuffer(primitive_indices_buffer);
216 }
217 primitive_indices_buffer = createBuffer(prim_indices.size() * sizeof(uint32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
218 uploadBufferData(primitive_indices_buffer, prim_indices.data(), prim_indices.size() * sizeof(uint32_t));
219 }
220
221 // Upload transform matrices
222 if (!geometry.transform_matrices.empty()) {
223 size_t expected_size = primitive_count * 16;
224 if (geometry.transform_matrices.size() != expected_size) {
225 helios_runtime_error("ERROR (VulkanComputeBackend::updateGeometry): transform_matrices size mismatch. Expected " + std::to_string(expected_size) + " floats (16 per primitive), got " +
226 std::to_string(geometry.transform_matrices.size()));
227 }
228
229 if (transform_matrices_buffer.buffer != VK_NULL_HANDLE) {
230 destroyBuffer(transform_matrices_buffer);
231 }
232 transform_matrices_buffer = createBuffer(geometry.transform_matrices.size() * sizeof(float), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
233 uploadBufferData(transform_matrices_buffer, geometry.transform_matrices.data(), geometry.transform_matrices.size() * sizeof(float));
234 }
235
236 // Upload primitive types
237 if (!geometry.primitive_types.empty()) {
238 if (geometry.primitive_types.size() != primitive_count) {
239 helios_runtime_error("ERROR (VulkanComputeBackend::updateGeometry): primitive_types size mismatch. Expected " + std::to_string(primitive_count) + " entries, got " + std::to_string(geometry.primitive_types.size()));
240 }
241
242 if (primitive_types_buffer.buffer != VK_NULL_HANDLE) {
243 destroyBuffer(primitive_types_buffer);
244 }
245 primitive_types_buffer = createBuffer(geometry.primitive_types.size() * sizeof(uint), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
246 uploadBufferData(primitive_types_buffer, geometry.primitive_types.data(), geometry.primitive_types.size() * sizeof(uint));
247 }
248
249 // Upload primitive UUIDs
250 if (!geometry.primitive_UUIDs.empty()) {
251 if (geometry.primitive_UUIDs.size() != primitive_count) {
252 helios_runtime_error("ERROR (VulkanComputeBackend::updateGeometry): primitive_UUIDs size mismatch. Expected " + std::to_string(primitive_count) + " entries, got " + std::to_string(geometry.primitive_UUIDs.size()));
253 }
254
255 if (primitive_uuids_buffer.buffer != VK_NULL_HANDLE) {
256 destroyBuffer(primitive_uuids_buffer);
257 }
258 primitive_uuids_buffer = createBuffer(geometry.primitive_UUIDs.size() * sizeof(uint), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
259 uploadBufferData(primitive_uuids_buffer, geometry.primitive_UUIDs.data(), geometry.primitive_UUIDs.size() * sizeof(uint));
260 }
261
262 // Upload UUID→position lookup
263 if (!geometry.primitive_positions.empty()) {
264 if (primitive_positions_buffer.buffer != VK_NULL_HANDLE) {
265 destroyBuffer(primitive_positions_buffer);
266 }
267 primitive_positions_buffer = createBuffer(geometry.primitive_positions.size() * sizeof(uint), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
268 uploadBufferData(primitive_positions_buffer, geometry.primitive_positions.data(), geometry.primitive_positions.size() * sizeof(uint));
269 }
270
271 // Upload object subdivisions
272 if (!geometry.object_subdivisions.empty()) {
273 if (geometry.object_subdivisions.size() != primitive_count) {
274 helios_runtime_error("ERROR (VulkanComputeBackend::updateGeometry): object_subdivisions size mismatch. Expected " + std::to_string(primitive_count) + " entries, got " + std::to_string(geometry.object_subdivisions.size()));
275 }
276
277 if (object_subdivisions_buffer.buffer != VK_NULL_HANDLE) {
278 destroyBuffer(object_subdivisions_buffer);
279 }
280 object_subdivisions_buffer = createBuffer(geometry.object_subdivisions.size() * sizeof(helios::int2), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
281 uploadBufferData(object_subdivisions_buffer, geometry.object_subdivisions.data(), geometry.object_subdivisions.size() * sizeof(helios::int2));
282 }
283
284 // Upload twosided flags
285 if (!geometry.twosided_flags.empty()) {
286 if (geometry.twosided_flags.size() != primitive_count) {
287 helios_runtime_error("ERROR (VulkanComputeBackend::updateGeometry): twosided_flags size mismatch. Expected " + std::to_string(primitive_count) + " entries, got " + std::to_string(geometry.twosided_flags.size()));
288 }
289
290 if (twosided_flag_buffer.buffer != VK_NULL_HANDLE) {
291 destroyBuffer(twosided_flag_buffer);
292 }
293 // Convert char to uint for GPU (easier access)
294 std::vector<uint> twosided_uint(geometry.twosided_flags.begin(), geometry.twosided_flags.end());
295 twosided_flag_buffer = createBuffer(twosided_uint.size() * sizeof(uint), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
296 uploadBufferData(twosided_flag_buffer, twosided_uint.data(), twosided_uint.size() * sizeof(uint));
297 }
298
299 // Build pre-transformed (world-space) vertex buffers indexed by global primitive index.
300 // Vertices are transformed from canonical local space to world space using each primitive's
301 // transform matrix. This eliminates per-ray matrix loads and transform_point() calls in the
302 // BVH traversal shader inner loop — the single biggest GPU performance optimization.
303 {
304 static const helios::vec3 canonical_quad[4] = {{-0.5f, -0.5f, 0.f}, {0.5f, -0.5f, 0.f}, {0.5f, 0.5f, 0.f}, {-0.5f, 0.5f, 0.f}};
305 static const helios::vec3 canonical_tri[3] = {{0.f, 0.f, 0.f}, {0.f, 1.f, 0.f}, {1.f, 1.f, 0.f}};
306
307 // Patch/tile vertex buffer: 4 vec3s per primitive slot, indexed by global prim_idx
308 std::vector<helios::vec3> patch_verts(primitive_count * 4, helios::make_vec3(0.f, 0.f, 0.f));
309 // Triangle vertex buffer: 3 vec3s per primitive slot, indexed by global prim_idx
310 std::vector<helios::vec3> tri_verts(primitive_count * 3, helios::make_vec3(0.f, 0.f, 0.f));
311
312 for (size_t i = 0; i < primitive_count; ++i) {
313 uint prim_type = geometry.primitive_types[i];
314 const float *transform = &geometry.transform_matrices[i * 16];
315
316 if (prim_type == 0 || prim_type == 3) { // Patch or Tile
317 for (int v = 0; v < 4; ++v) {
318 const helios::vec3 &p = canonical_quad[v];
319 helios::vec3 world_v;
320 world_v.x = transform[0] * p.x + transform[1] * p.y + transform[2] * p.z + transform[3];
321 world_v.y = transform[4] * p.x + transform[5] * p.y + transform[6] * p.z + transform[7];
322 world_v.z = transform[8] * p.x + transform[9] * p.y + transform[10] * p.z + transform[11];
323 patch_verts[i * 4 + v] = world_v;
324 }
325 } else if (prim_type == 1) { // Triangle
326 for (int v = 0; v < 3; ++v) {
327 const helios::vec3 &p = canonical_tri[v];
328 helios::vec3 world_v;
329 world_v.x = transform[0] * p.x + transform[1] * p.y + transform[2] * p.z + transform[3];
330 world_v.y = transform[4] * p.x + transform[5] * p.y + transform[6] * p.z + transform[7];
331 world_v.z = transform[8] * p.x + transform[9] * p.y + transform[10] * p.z + transform[11];
332 tri_verts[i * 3 + v] = world_v;
333 }
334 }
335 }
336
337 if (patch_vertices_buffer.buffer != VK_NULL_HANDLE) {
338 destroyBuffer(patch_vertices_buffer);
339 }
340 patch_vertices_buffer = createBuffer(patch_verts.size() * sizeof(helios::vec3), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
341 uploadBufferData(patch_vertices_buffer, patch_verts.data(), patch_verts.size() * sizeof(helios::vec3));
342
343 if (triangle_vertices_buffer.buffer != VK_NULL_HANDLE) {
344 destroyBuffer(triangle_vertices_buffer);
345 }
346 triangle_vertices_buffer = createBuffer(tri_verts.size() * sizeof(helios::vec3), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
347 uploadBufferData(triangle_vertices_buffer, tri_verts.data(), tri_verts.size() * sizeof(helios::vec3));
348 }
349
350 // Pre-compute world-space normals for each primitive.
351 // Eliminates per-thread get_patch_normal() / get_triangle_normal() (3 transform_point + cross product)
352 // in both direct and diffuse shaders, and also eliminates the per-hit normal computation in diffuse.
353 {
354 static const helios::vec3 patch_v0 = {0.f, 0.f, 0.f};
355 static const helios::vec3 patch_v1 = {1.f, 0.f, 0.f};
356 static const helios::vec3 patch_v2 = {0.f, 1.f, 0.f};
357 static const helios::vec3 tri_v0 = {0.f, 0.f, 0.f};
358 static const helios::vec3 tri_v1 = {0.f, 1.f, 0.f};
359 static const helios::vec3 tri_v2 = {1.f, 1.f, 0.f};
360
361 std::vector<helios::vec3> normals(primitive_count);
362
363 for (size_t i = 0; i < primitive_count; ++i) {
364 const float *t = &geometry.transform_matrices[i * 16];
365 uint prim_type = geometry.primitive_types[i];
366
367 // Choose canonical vertices based on primitive type
368 helios::vec3 cv0, cv1, cv2;
369 if (prim_type == 0 || prim_type == 2 || prim_type == 3) { // Patch, Disk, or Tile
370 cv0 = patch_v0;
371 cv1 = patch_v1;
372 cv2 = patch_v2;
373 } else if (prim_type == 1) { // Triangle
374 cv0 = tri_v0;
375 cv1 = tri_v1;
376 cv2 = tri_v2;
377 } else {
378 normals[i] = helios::make_vec3(0.f, 0.f, 1.f);
379 continue;
380 }
381
382 // Transform canonical vertices to world space
383 helios::vec3 w0, w1, w2;
384 w0.x = t[0] * cv0.x + t[1] * cv0.y + t[2] * cv0.z + t[3];
385 w0.y = t[4] * cv0.x + t[5] * cv0.y + t[6] * cv0.z + t[7];
386 w0.z = t[8] * cv0.x + t[9] * cv0.y + t[10] * cv0.z + t[11];
387 w1.x = t[0] * cv1.x + t[1] * cv1.y + t[2] * cv1.z + t[3];
388 w1.y = t[4] * cv1.x + t[5] * cv1.y + t[6] * cv1.z + t[7];
389 w1.z = t[8] * cv1.x + t[9] * cv1.y + t[10] * cv1.z + t[11];
390 w2.x = t[0] * cv2.x + t[1] * cv2.y + t[2] * cv2.z + t[3];
391 w2.y = t[4] * cv2.x + t[5] * cv2.y + t[6] * cv2.z + t[7];
392 w2.z = t[8] * cv2.x + t[9] * cv2.y + t[10] * cv2.z + t[11];
393
394 // cross(w1 - w0, w2 - w0) then normalize
395 helios::vec3 e1 = w1 - w0;
396 helios::vec3 e2 = w2 - w0;
397 helios::vec3 n = cross(e1, e2);
398 float len = std::sqrt(n.x * n.x + n.y * n.y + n.z * n.z);
399 if (len > 1e-8f) {
400 normals[i] = n / len;
401 } else {
402 normals[i] = helios::make_vec3(0.f, 0.f, 1.f);
403 }
404 }
405
406 if (normal_buffer.buffer != VK_NULL_HANDLE) {
407 destroyBuffer(normal_buffer);
408 }
409 normal_buffer = createBuffer(normals.size() * sizeof(helios::vec3), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
410 uploadBufferData(normal_buffer, normals.data(), normals.size() * sizeof(helios::vec3));
411 }
412
413 // Upload texture mask and UV data
414 {
415 // Convert mask_data from vector<bool> to flat uint32_t array (0 or 1 per texel)
416 std::vector<uint32_t> mask_offsets;
417 uint32_t current_offset = 0;
418 for (const auto &sz: geometry.mask_sizes) {
419 mask_offsets.push_back(current_offset);
420 current_offset += static_cast<uint32_t>(sz.x) * static_cast<uint32_t>(sz.y);
421 }
422
423 std::vector<uint32_t> mask_data_uint;
424 if (current_offset > 0) {
425 if (geometry.mask_data.size() < current_offset) {
426 helios_runtime_error("ERROR (VulkanComputeBackend::updateGeometry): mask_data size mismatch. Expected " + std::to_string(current_offset) + " texels, got " + std::to_string(geometry.mask_data.size()));
427 }
428 mask_data_uint.resize(current_offset, 0);
429 for (size_t i = 0; i < current_offset; ++i) {
430 mask_data_uint[i] = geometry.mask_data[i] ? 1 : 0;
431 }
432 }
433
434 // Reformat UV data: allocate 4 vec2 per primitive (flat array)
435 std::vector<helios::vec2> uv_flat(primitive_count * 4, helios::make_vec2(0.f, 0.f));
436 size_t uv_read_offset = 0;
437 for (size_t p = 0; p < primitive_count; ++p) {
438 if (!geometry.uv_IDs.empty() && geometry.uv_IDs[p] >= 0) {
439 // This primitive has custom UVs - read next 4 from uv_data
440 for (int v = 0; v < 4 && uv_read_offset < geometry.uv_data.size(); ++v) {
441 uv_flat[p * 4 + v] = geometry.uv_data[uv_read_offset++];
442 }
443 }
444 // If uv_IDs[p] == -1, keep default zeros (unused, will use parametric UVs in shader)
445 }
446
447
448 // Upload mask_data_buffer
449 if (mask_data_buffer.buffer != VK_NULL_HANDLE) {
450 destroyBuffer(mask_data_buffer);
451 }
452 if (!mask_data_uint.empty()) {
453 mask_data_buffer = createBuffer(mask_data_uint.size() * sizeof(uint32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
454 uploadBufferData(mask_data_buffer, mask_data_uint.data(), mask_data_uint.size() * sizeof(uint32_t));
455 } else {
456 // Keep placeholder buffer if no mask data
457 mask_data_buffer = createBuffer(sizeof(uint32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
458 }
459
460 // Upload mask_sizes_buffer
461 if (mask_sizes_buffer.buffer != VK_NULL_HANDLE) {
462 destroyBuffer(mask_sizes_buffer);
463 }
464 if (!geometry.mask_sizes.empty()) {
465 mask_sizes_buffer = createBuffer(geometry.mask_sizes.size() * sizeof(helios::int2), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
466 uploadBufferData(mask_sizes_buffer, geometry.mask_sizes.data(), geometry.mask_sizes.size() * sizeof(helios::int2));
467 } else {
468 mask_sizes_buffer = createBuffer(sizeof(int32_t) * 2, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
469 }
470
471 // Upload mask_offsets_buffer
472 if (mask_offsets_buffer.buffer != VK_NULL_HANDLE) {
473 destroyBuffer(mask_offsets_buffer);
474 }
475 if (!mask_offsets.empty()) {
476 mask_offsets_buffer = createBuffer(mask_offsets.size() * sizeof(uint32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
477 uploadBufferData(mask_offsets_buffer, mask_offsets.data(), mask_offsets.size() * sizeof(uint32_t));
478 } else {
479 mask_offsets_buffer = createBuffer(sizeof(uint32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
480 }
481
482 // Upload mask_IDs_buffer
483 if (mask_IDs_buffer.buffer != VK_NULL_HANDLE) {
484 destroyBuffer(mask_IDs_buffer);
485 }
486 if (!geometry.mask_IDs.empty()) {
487 // Convert to int32_t
488 std::vector<int32_t> mask_IDs_int32(geometry.mask_IDs.begin(), geometry.mask_IDs.end());
489 mask_IDs_buffer = createBuffer(mask_IDs_int32.size() * sizeof(int32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
490 uploadBufferData(mask_IDs_buffer, mask_IDs_int32.data(), mask_IDs_int32.size() * sizeof(int32_t));
491 } else {
492 mask_IDs_buffer = createBuffer(sizeof(int32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
493 }
494
495 // Upload uv_data_buffer
496 if (uv_data_buffer.buffer != VK_NULL_HANDLE) {
497 destroyBuffer(uv_data_buffer);
498 }
499 if (!uv_flat.empty()) {
500 uv_data_buffer = createBuffer(uv_flat.size() * sizeof(helios::vec2), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
501 uploadBufferData(uv_data_buffer, uv_flat.data(), uv_flat.size() * sizeof(helios::vec2));
502 } else {
503 uv_data_buffer = createBuffer(sizeof(float) * 2, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
504 }
505
506 // Upload uv_IDs_buffer
507 if (uv_IDs_buffer.buffer != VK_NULL_HANDLE) {
508 destroyBuffer(uv_IDs_buffer);
509 }
510 if (!geometry.uv_IDs.empty()) {
511 // Convert to int32_t
512 std::vector<int32_t> uv_IDs_int32(geometry.uv_IDs.begin(), geometry.uv_IDs.end());
513 uv_IDs_buffer = createBuffer(uv_IDs_int32.size() * sizeof(int32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
514 uploadBufferData(uv_IDs_buffer, uv_IDs_int32.data(), uv_IDs_int32.size() * sizeof(int32_t));
515 } else {
516 uv_IDs_buffer = createBuffer(sizeof(int32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
517 }
518 }
519
520 // Upload periodic boundary bbox data
521 {
522 bbox_count = geometry.bbox_count;
523 periodic_flag_x = geometry.periodic_flag.x;
524 periodic_flag_y = geometry.periodic_flag.y;
525
526 if (bbox_vertices_buffer.buffer != VK_NULL_HANDLE) {
527 destroyBuffer(bbox_vertices_buffer);
528 }
529
530 if (bbox_count > 0 && !geometry.bboxes.vertices.empty()) {
531 // Flatten vec3 vertices to float array (4 vertices per face, 3 floats each = 12 floats per face)
532 std::vector<float> bbox_verts_flat;
533 bbox_verts_flat.reserve(bbox_count * 12);
534 for (size_t i = 0; i < bbox_count * 4; ++i) {
535 bbox_verts_flat.push_back(geometry.bboxes.vertices[i].x);
536 bbox_verts_flat.push_back(geometry.bboxes.vertices[i].y);
537 bbox_verts_flat.push_back(geometry.bboxes.vertices[i].z);
538 }
539 bbox_vertices_buffer = createBuffer(bbox_verts_flat.size() * sizeof(float), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
540 uploadBufferData(bbox_vertices_buffer, bbox_verts_flat.data(), bbox_verts_flat.size() * sizeof(float));
541
542 // Compute domain bounds from all bbox vertices (min/max across all faces)
543 float xmin = 1e30f, xmax = -1e30f, ymin = 1e30f, ymax = -1e30f;
544 for (size_t i = 0; i < bbox_count * 4; ++i) {
545 xmin = std::min(xmin, geometry.bboxes.vertices[i].x);
546 xmax = std::max(xmax, geometry.bboxes.vertices[i].x);
547 ymin = std::min(ymin, geometry.bboxes.vertices[i].y);
548 ymax = std::max(ymax, geometry.bboxes.vertices[i].y);
549 }
550 domain_bounds[0] = xmin;
551 domain_bounds[1] = xmax;
552 domain_bounds[2] = ymin;
553 domain_bounds[3] = ymax;
554 } else {
555 // No bboxes - create placeholder buffer
556 bbox_vertices_buffer = createBuffer(sizeof(float), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
557 domain_bounds[0] = domain_bounds[1] = domain_bounds[2] = domain_bounds[3] = 0.f;
558 }
559 }
560
561 descriptors_dirty = true; // Geometry changed, need descriptor update
562 }
563
565 // No-op: BVH is built in updateGeometry()
566 }
567
569 band_count = materials.num_bands;
570
571 if (primitive_count == 0) {
572 return; // No geometry uploaded yet
573 }
574
575 // Material buffers are indexed as [source * Nbands * Nprims + band * Nprims + prim]
576 // Use materials.num_sources for validation (source_count may not be set yet if updateSources hasn't been called)
577 size_t expected_size = materials.num_sources * band_count * primitive_count;
578
579 // Upload reflectivity buffer
580 if (!materials.reflectivity.empty()) {
581 if (materials.reflectivity.size() != expected_size) {
582 helios_runtime_error("ERROR (VulkanComputeBackend::updateMaterials): reflectivity size mismatch. Expected " + std::to_string(expected_size) + " entries (Nsources * Nprims * Nbands), got " +
583 std::to_string(materials.reflectivity.size()));
584 }
585
586 if (reflectivity_buffer.buffer != VK_NULL_HANDLE) {
587 destroyBuffer(reflectivity_buffer);
588 }
589 reflectivity_buffer = createBuffer(materials.reflectivity.size() * sizeof(float), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
590 uploadBufferData(reflectivity_buffer, materials.reflectivity.data(), materials.reflectivity.size() * sizeof(float));
591 }
592
593 // Upload transmissivity buffer
594 if (!materials.transmissivity.empty()) {
595 if (materials.transmissivity.size() != expected_size) {
596 helios_runtime_error("ERROR (VulkanComputeBackend::updateMaterials): transmissivity size mismatch. Expected " + std::to_string(expected_size) + " entries (Nsources * Nprims * Nbands), got " +
597 std::to_string(materials.transmissivity.size()));
598 }
599
600 if (transmissivity_buffer.buffer != VK_NULL_HANDLE) {
601 destroyBuffer(transmissivity_buffer);
602 }
603 transmissivity_buffer = createBuffer(materials.transmissivity.size() * sizeof(float), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
604 uploadBufferData(transmissivity_buffer, materials.transmissivity.data(), materials.transmissivity.size() * sizeof(float));
605 }
606
607 // Upload specular exponent buffer (per primitive)
608 if (!materials.specular_exponent.empty()) {
609 if (materials.specular_exponent.size() != primitive_count) {
610 helios_runtime_error("ERROR (VulkanComputeBackend::updateMaterials): specular_exponent size mismatch. Expected " + std::to_string(primitive_count) + " entries (Nprims), got " + std::to_string(materials.specular_exponent.size()));
611 }
612
613 if (specular_exponent_buffer.buffer != VK_NULL_HANDLE) {
614 destroyBuffer(specular_exponent_buffer);
615 }
616 specular_exponent_buffer = createBuffer(materials.specular_exponent.size() * sizeof(float), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
617 uploadBufferData(specular_exponent_buffer, materials.specular_exponent.data(), materials.specular_exponent.size() * sizeof(float));
618 }
619
620 // Upload specular scale buffer (per primitive)
621 if (!materials.specular_scale.empty()) {
622 if (materials.specular_scale.size() != primitive_count) {
623 helios_runtime_error("ERROR (VulkanComputeBackend::updateMaterials): specular_scale size mismatch. Expected " + std::to_string(primitive_count) + " entries (Nprims), got " + std::to_string(materials.specular_scale.size()));
624 }
625
626 if (specular_scale_buffer.buffer != VK_NULL_HANDLE) {
627 destroyBuffer(specular_scale_buffer);
628 }
629 specular_scale_buffer = createBuffer(materials.specular_scale.size() * sizeof(float), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
630 uploadBufferData(specular_scale_buffer, materials.specular_scale.data(), materials.specular_scale.size() * sizeof(float));
631 }
632
633 descriptors_dirty = true; // Materials changed, need descriptor update
634 }
635
636 void VulkanComputeBackend::updateSources(const std::vector<RayTracingSource> &sources) {
637 source_count = sources.size();
638
639 if (source_count == 0) {
640 return; // No sources
641 }
642
643 // Extract source data from structs
644 std::vector<helios::vec3> positions;
645 std::vector<uint> types;
646 std::vector<helios::vec3> rotations;
647 std::vector<helios::vec2> widths;
648
649 positions.reserve(source_count);
650 types.reserve(source_count);
651 rotations.reserve(source_count);
652 widths.reserve(source_count);
653
654 for (const auto &source: sources) {
655 positions.push_back(source.position);
656 types.push_back(source.type);
657 rotations.push_back(source.rotation);
658 widths.push_back(source.width);
659 }
660
661 // Upload source positions
662 if (source_positions_buffer.buffer != VK_NULL_HANDLE) {
663 destroyBuffer(source_positions_buffer);
664 }
665 source_positions_buffer = createBuffer(positions.size() * sizeof(helios::vec3), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
666 uploadBufferData(source_positions_buffer, positions.data(), positions.size() * sizeof(helios::vec3));
667
668 // Upload source types
669 if (source_types_buffer.buffer != VK_NULL_HANDLE) {
670 destroyBuffer(source_types_buffer);
671 }
672 source_types_buffer = createBuffer(types.size() * sizeof(uint), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
673 uploadBufferData(source_types_buffer, types.data(), types.size() * sizeof(uint));
674
675 // Upload source rotations
676 if (source_rotations_buffer.buffer != VK_NULL_HANDLE) {
677 destroyBuffer(source_rotations_buffer);
678 }
679 source_rotations_buffer = createBuffer(rotations.size() * sizeof(helios::vec3), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
680 uploadBufferData(source_rotations_buffer, rotations.data(), rotations.size() * sizeof(helios::vec3));
681
682 // Upload source widths
683 if (source_widths_buffer.buffer != VK_NULL_HANDLE) {
684 destroyBuffer(source_widths_buffer);
685 }
686 source_widths_buffer = createBuffer(widths.size() * sizeof(helios::vec2), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
687 uploadBufferData(source_widths_buffer, widths.data(), widths.size() * sizeof(helios::vec2));
688 }
689
690 void VulkanComputeBackend::updateDiffuseRadiation(const std::vector<float> &flux, const std::vector<float> &extinction, const std::vector<helios::vec3> &peak_dir, const std::vector<float> &dist_norm, const std::vector<float> &sky_energy) {
691 // Intentional no-op: the Vulkan backend uploads diffuse radiation parameters directly
692 // in launchDiffuseRays() from the RayTracingLaunchParams struct, rather than caching
693 // them here. This method is required by the RayTracingBackend interface but is not
694 // called by RadiationModel.
695 }
696
697 void VulkanComputeBackend::updateSkyModel(const std::vector<helios::vec4> &sky_radiance_params, const std::vector<float> &camera_sky_radiance, const helios::vec3 &sun_direction, const std::vector<float> &solar_disk_radiance,
698 float solar_disk_cos_angle) {
699 // Store sun parameters for push constants
700 cached_sun_direction = sun_direction;
701 cached_solar_disk_cos_angle = solar_disk_cos_angle;
702
703 // Upload sky_radiance_params to existing buffer (already used by diffuse shader)
704 if (!sky_radiance_params.empty()) {
705 size_t params_size = sky_radiance_params.size() * sizeof(helios::vec4);
706 if (sky_radiance_params_buffer.buffer == VK_NULL_HANDLE || sky_radiance_params_buffer.size != params_size) {
707 if (sky_radiance_params_buffer.buffer != VK_NULL_HANDLE) {
708 destroyBuffer(sky_radiance_params_buffer);
709 }
710 sky_radiance_params_buffer = createBuffer(params_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
711 descriptors_dirty = true;
712 }
713 uploadBufferData(sky_radiance_params_buffer, sky_radiance_params.data(), params_size);
714 }
715
716 // Upload camera_sky_radiance (zenith sky radiance for camera miss shader)
717 if (!camera_sky_radiance.empty()) {
718 size_t sky_size = camera_sky_radiance.size() * sizeof(float);
719 if (camera_sky_radiance_buffer.buffer == VK_NULL_HANDLE || camera_sky_radiance_buffer.size != sky_size) {
720 if (camera_sky_radiance_buffer.buffer != VK_NULL_HANDLE) {
721 destroyBuffer(camera_sky_radiance_buffer);
722 }
723 camera_sky_radiance_buffer = createBuffer(sky_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
724 descriptors_dirty = true;
725 }
726 uploadBufferData(camera_sky_radiance_buffer, camera_sky_radiance.data(), sky_size);
727 }
728
729 // Upload solar_disk_radiance (solar disk radiance for camera miss shader)
730 if (!solar_disk_radiance.empty()) {
731 size_t solar_size = solar_disk_radiance.size() * sizeof(float);
732 if (solar_disk_radiance_buffer.buffer == VK_NULL_HANDLE || solar_disk_radiance_buffer.size != solar_size) {
733 if (solar_disk_radiance_buffer.buffer != VK_NULL_HANDLE) {
734 destroyBuffer(solar_disk_radiance_buffer);
735 }
736 solar_disk_radiance_buffer = createBuffer(solar_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
737 descriptors_dirty = true;
738 }
739 uploadBufferData(solar_disk_radiance_buffer, solar_disk_radiance.data(), solar_size);
740 }
741 }
742
744 if (primitive_count == 0 || source_count == 0) {
745 return; // No geometry or sources
746 }
747
748 // Build band mapping (same logic as diffuse)
749 launch_to_global_band.clear();
750 for (uint32_t g = 0; g < band_count; g++) {
751 if (!params.band_launch_flag.empty() && params.band_launch_flag[g]) {
752 launch_to_global_band.push_back(g);
753 }
754 }
755 if (launch_to_global_band.empty()) {
756 for (uint32_t g = 0; g < launch_band_count; g++) {
757 launch_to_global_band.push_back(g);
758 }
759 }
760
761 // Upload band mapping to GPU buffer
762 uploadBufferData(band_map_buffer, launch_to_global_band.data(), launch_to_global_band.size() * sizeof(uint32_t));
763
764 // Ensure scatter and radiation_out buffers exist (direct shader writes scatter buffers)
765 size_t buf_size = primitive_count * launch_band_count * sizeof(float);
766 VkBufferUsageFlags scatter_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
767
768 if (scatter_top_buffer.buffer == VK_NULL_HANDLE || scatter_top_buffer.size != buf_size) {
769 if (scatter_top_buffer.buffer != VK_NULL_HANDLE) {
770 destroyBuffer(scatter_top_buffer);
771 }
772 scatter_top_buffer = createBuffer(buf_size, scatter_usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
773 descriptors_dirty = true;
774 }
775 if (scatter_bottom_buffer.buffer == VK_NULL_HANDLE || scatter_bottom_buffer.size != buf_size) {
776 if (scatter_bottom_buffer.buffer != VK_NULL_HANDLE) {
777 destroyBuffer(scatter_bottom_buffer);
778 }
779 scatter_bottom_buffer = createBuffer(buf_size, scatter_usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
780 descriptors_dirty = true;
781 }
782
783 // Ensure radiation_out buffers exist (needed for descriptor set completeness)
784 VkBufferUsageFlags rad_out_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
785 if (radiation_out_top_buffer.buffer == VK_NULL_HANDLE || radiation_out_top_buffer.size != buf_size) {
786 if (radiation_out_top_buffer.buffer != VK_NULL_HANDLE) {
787 destroyBuffer(radiation_out_top_buffer);
788 }
789 radiation_out_top_buffer = createBuffer(buf_size, rad_out_usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
790 zeroBuffer(radiation_out_top_buffer);
791 descriptors_dirty = true;
792 }
793 if (radiation_out_bottom_buffer.buffer == VK_NULL_HANDLE || radiation_out_bottom_buffer.size != buf_size) {
794 if (radiation_out_bottom_buffer.buffer != VK_NULL_HANDLE) {
795 destroyBuffer(radiation_out_bottom_buffer);
796 }
797 radiation_out_bottom_buffer = createBuffer(buf_size, rad_out_usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
798 zeroBuffer(radiation_out_bottom_buffer);
799 descriptors_dirty = true;
800 }
801
802 // Zero scatter buffers before each direct launch (runBand does not zero them before direct)
803 zeroBuffer(scatter_top_buffer);
804 zeroBuffer(scatter_bottom_buffer);
805
806 // Update descriptor sets only if buffers changed
807 if (descriptors_dirty) {
808 updateDescriptorSets();
809 descriptors_dirty = false;
810 }
811 VkDevice vk_device = device->getDevice();
812
813 // Record COMPUTE command buffer
814 VkCommandBufferBeginInfo begin_info{};
815 begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
816 begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
817 vkBeginCommandBuffer(compute_command_buffer, &begin_info);
818
819 // Bind pipeline
820 vkCmdBindPipeline(compute_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_direct);
821
822 // Bind descriptor sets
823 VkDescriptorSet sets[] = {set_geometry, set_materials, set_results};
824 vkCmdBindDescriptorSets(compute_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, 3, sets, 0, nullptr);
825
826 // Push constants (expanded for 3D dispatch with 2D primitive tiling)
827 struct PushConstants {
828 uint launch_offset;
829 uint launch_count;
830 uint rays_per_primitive;
831 uint random_seed;
832 uint current_band;
833 uint band_count;
834 uint source_count;
835 uint primitive_count;
836 uint debug_mode; // 1 = enable bounds checking, 0 = production
837 uint launch_dim_x; // Grid dimension X for stratified sampling
838 uint launch_dim_y; // Grid dimension Y for stratified sampling
839 uint prim_tiles_y; // Number of primitive tiles in Y dimension
840 uint prims_per_tile; // Primitives per tile (65535 max)
841 uint material_band_count; // Global band count (for material buffers)
842 uint periodic_flag_x; // 1 if periodic in X direction
843 uint periodic_flag_y; // 1 if periodic in Y direction
844 uint bbox_count; // Number of bbox faces (0-4)
845 float domain_xmin; // Domain bounds for periodic wrapping
846 float domain_xmax;
847 float domain_ymin;
848 float domain_ymax;
849 uint specular_reflection_enabled; // 0=disabled, 1=default scale, 2=user scale
850 } push_constants;
851
852 // Compute 2D grid dimensions for stratified sampling (matches OptiX)
853 uint32_t launch_dim_x = static_cast<uint32_t>(std::ceil(std::sqrt(static_cast<double>(params.rays_per_primitive))));
854 uint32_t launch_dim_y = launch_dim_x;
855
856 push_constants.launch_offset = params.launch_offset;
857 push_constants.launch_count = params.launch_count;
858 push_constants.rays_per_primitive = params.rays_per_primitive;
859 push_constants.random_seed = params.random_seed;
860 push_constants.current_band = params.current_band;
861 push_constants.band_count = launch_band_count; // Use launch band count (not global)
862 push_constants.source_count = source_count;
863 push_constants.primitive_count = primitive_count;
864 push_constants.launch_dim_x = launch_dim_x;
865 push_constants.launch_dim_y = launch_dim_y;
866 push_constants.material_band_count = band_count; // Global band count for material indexing
867
868// Enable debug bounds checking (can be disabled in production builds)
869#ifdef HELIOS_DEBUG
870 push_constants.debug_mode = 1;
871#else
872 push_constants.debug_mode = 0;
873#endif
874
875 // Periodic boundary parameters
876 push_constants.periodic_flag_x = static_cast<uint32_t>(periodic_flag_x);
877 push_constants.periodic_flag_y = static_cast<uint32_t>(periodic_flag_y);
878 push_constants.bbox_count = bbox_count;
879 push_constants.domain_xmin = domain_bounds[0];
880 push_constants.domain_xmax = domain_bounds[1];
881 push_constants.domain_ymin = domain_bounds[2];
882 push_constants.domain_ymax = domain_bounds[3];
883 push_constants.specular_reflection_enabled = params.specular_reflection_enabled;
884
885 // 3D dispatch with 2D primitive tiling to avoid sub-batching
886 // Tile primitives into Y dimension when count exceeds 65535 to use full Vulkan dispatch space
887 const uint32_t WG_X = 8; // Must match shader local_size_x
888 const uint32_t WG_Y = 32; // Must match shader local_size_y
889 const uint32_t MAX_PRIMS_PER_TILE = 65535;
890
891 uint32_t dispatch_x = (launch_dim_x + WG_X - 1) / WG_X;
892 uint32_t dispatch_y_rays = (launch_dim_y + WG_Y - 1) / WG_Y;
893
894 // Compute primitive tiling
895 uint32_t prims_per_tile = std::min(params.launch_count, MAX_PRIMS_PER_TILE);
896 uint32_t prim_tiles_y = (params.launch_count + MAX_PRIMS_PER_TILE - 1) / MAX_PRIMS_PER_TILE;
897
898 uint32_t dispatch_y = dispatch_y_rays * prim_tiles_y;
899 uint32_t dispatch_z = prims_per_tile;
900
901 push_constants.prim_tiles_y = prim_tiles_y;
902 push_constants.prims_per_tile = prims_per_tile;
903
904 vkCmdPushConstants(compute_command_buffer, pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(PushConstants), &push_constants);
905 vkCmdDispatch(compute_command_buffer, dispatch_x, dispatch_y, dispatch_z);
906
907 // Buffer memory barrier to ensure storage buffer writes are visible for readback
908 // CRITICAL: Use buffer-specific barrier instead of global barrier for MoltenVK compatibility
909 VkBufferMemoryBarrier buffer_barriers[3];
910
911 // Radiation_in buffer barrier
912 buffer_barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
913 buffer_barriers[0].pNext = nullptr;
914 buffer_barriers[0].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
915 buffer_barriers[0].dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT;
916 buffer_barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
917 buffer_barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
918 buffer_barriers[0].buffer = radiation_in_buffer.buffer;
919 buffer_barriers[0].offset = 0;
920 buffer_barriers[0].size = VK_WHOLE_SIZE;
921
922 // Scatter_top buffer barrier (direct shader writes scatter)
923 buffer_barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
924 buffer_barriers[1].pNext = nullptr;
925 buffer_barriers[1].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
926 buffer_barriers[1].dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT;
927 buffer_barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
928 buffer_barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
929 buffer_barriers[1].buffer = scatter_top_buffer.buffer;
930 buffer_barriers[1].offset = 0;
931 buffer_barriers[1].size = VK_WHOLE_SIZE;
932
933 // Scatter_bottom buffer barrier
934 buffer_barriers[2].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
935 buffer_barriers[2].pNext = nullptr;
936 buffer_barriers[2].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
937 buffer_barriers[2].dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT;
938 buffer_barriers[2].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
939 buffer_barriers[2].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
940 buffer_barriers[2].buffer = scatter_bottom_buffer.buffer;
941 buffer_barriers[2].offset = 0;
942 buffer_barriers[2].size = VK_WHOLE_SIZE;
943
944 vkCmdPipelineBarrier(compute_command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, // No global memory barriers
945 3, buffer_barriers, // Buffer-specific barriers
946 0, nullptr); // No image barriers
947
948 vkEndCommandBuffer(compute_command_buffer);
949
950 // Submit command buffer with COMPUTE fence
951 VkSubmitInfo submit_info{};
952 submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
953 submit_info.commandBufferCount = 1;
954 submit_info.pCommandBuffers = &compute_command_buffer;
955
956 vkResetFences(vk_device, 1, &compute_fence);
957 VkResult result = vkQueueSubmit(device->getComputeQueue(), 1, &submit_info, compute_fence);
958 if (result != VK_SUCCESS) {
959 helios_runtime_error("ERROR (VulkanComputeBackend::launchDirectRays): vkQueueSubmit failed. VkResult: " + std::to_string(result));
960 }
961
962 // Wait for compute to complete (no timeout - large scenes can take minutes)
963 VkResult wait_result;
964 do {
965 wait_result = vkWaitForFences(vk_device, 1, &compute_fence, VK_TRUE, 1000000000ULL);
966 if (wait_result != VK_SUCCESS && wait_result != VK_TIMEOUT) {
967 helios_runtime_error("ERROR (VulkanComputeBackend::launchDirectRays): vkWaitForFences failed. VkResult: " + std::to_string(wait_result));
968 }
969 } while (wait_result == VK_TIMEOUT);
970 }
971
973 if (primitive_count == 0) {
974 return; // No geometry
975 }
976
977 // Ensure radiation_out_top/bottom buffers exist (required by shader)
978 size_t rad_out_size = primitive_count * launch_band_count * sizeof(float);
979 if (radiation_out_top_buffer.buffer == VK_NULL_HANDLE || radiation_out_top_buffer.size != rad_out_size) {
980 if (radiation_out_top_buffer.buffer != VK_NULL_HANDLE) {
981 destroyBuffer(radiation_out_top_buffer);
982 }
983 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
984 radiation_out_top_buffer = createBuffer(rad_out_size, usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
985 zeroBuffer(radiation_out_top_buffer);
986 descriptors_dirty = true;
987 }
988
989 if (radiation_out_bottom_buffer.buffer == VK_NULL_HANDLE || radiation_out_bottom_buffer.size != rad_out_size) {
990 if (radiation_out_bottom_buffer.buffer != VK_NULL_HANDLE) {
991 destroyBuffer(radiation_out_bottom_buffer);
992 }
993 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
994 radiation_out_bottom_buffer = createBuffer(rad_out_size, usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
995 zeroBuffer(radiation_out_bottom_buffer);
996 descriptors_dirty = true;
997 }
998
999 // Ensure scatter_top/bottom buffers exist (required by shader and barriers)
1000 if (scatter_top_buffer.buffer == VK_NULL_HANDLE || scatter_top_buffer.size != rad_out_size) {
1001 if (scatter_top_buffer.buffer != VK_NULL_HANDLE) {
1002 destroyBuffer(scatter_top_buffer);
1003 }
1004 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1005 scatter_top_buffer = createBuffer(rad_out_size, usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1006 zeroBuffer(scatter_top_buffer);
1007 descriptors_dirty = true;
1008 }
1009 if (scatter_bottom_buffer.buffer == VK_NULL_HANDLE || scatter_bottom_buffer.size != rad_out_size) {
1010 if (scatter_bottom_buffer.buffer != VK_NULL_HANDLE) {
1011 destroyBuffer(scatter_bottom_buffer);
1012 }
1013 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1014 scatter_bottom_buffer = createBuffer(rad_out_size, usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1015 zeroBuffer(scatter_bottom_buffer);
1016 descriptors_dirty = true;
1017 }
1018
1019 // Upload radiation_out_top and radiation_out_bottom if provided
1020 if (!params.radiation_out_top.empty() && !params.radiation_out_bottom.empty()) {
1022 }
1023
1024 // Upload diffuse sky parameters
1025 if (!params.diffuse_flux.empty()) {
1026 // Diffuse flux buffer
1027 size_t flux_size = params.diffuse_flux.size() * sizeof(float);
1028 if (diffuse_flux_buffer.buffer == VK_NULL_HANDLE || diffuse_flux_buffer.size != flux_size) {
1029 if (diffuse_flux_buffer.buffer != VK_NULL_HANDLE) {
1030 destroyBuffer(diffuse_flux_buffer);
1031 }
1032 diffuse_flux_buffer = createBuffer(flux_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
1033 descriptors_dirty = true;
1034 }
1035 uploadBufferData(diffuse_flux_buffer, params.diffuse_flux.data(), flux_size);
1036 }
1037
1038 if (!params.diffuse_peak_dir.empty()) {
1039 // Diffuse peak direction buffer
1040 size_t peak_dir_size = params.diffuse_peak_dir.size() * sizeof(helios::vec3);
1041 if (diffuse_peak_dir_buffer.buffer == VK_NULL_HANDLE || diffuse_peak_dir_buffer.size != peak_dir_size) {
1042 if (diffuse_peak_dir_buffer.buffer != VK_NULL_HANDLE) {
1043 destroyBuffer(diffuse_peak_dir_buffer);
1044 }
1045 diffuse_peak_dir_buffer = createBuffer(peak_dir_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
1046 descriptors_dirty = true;
1047 }
1048 uploadBufferData(diffuse_peak_dir_buffer, params.diffuse_peak_dir.data(), peak_dir_size);
1049 }
1050
1051 if (!params.diffuse_extinction.empty()) {
1052 // Diffuse extinction buffer
1053 size_t extinction_size = params.diffuse_extinction.size() * sizeof(float);
1054 if (diffuse_extinction_buffer.buffer == VK_NULL_HANDLE || diffuse_extinction_buffer.size != extinction_size) {
1055 if (diffuse_extinction_buffer.buffer != VK_NULL_HANDLE) {
1056 destroyBuffer(diffuse_extinction_buffer);
1057 }
1058 diffuse_extinction_buffer = createBuffer(extinction_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
1059 descriptors_dirty = true;
1060 }
1061 uploadBufferData(diffuse_extinction_buffer, params.diffuse_extinction.data(), extinction_size);
1062 }
1063
1064 if (!params.diffuse_dist_norm.empty()) {
1065 // Diffuse distribution normalization buffer
1066 size_t dist_norm_size = params.diffuse_dist_norm.size() * sizeof(float);
1067 if (diffuse_dist_norm_buffer.buffer == VK_NULL_HANDLE || diffuse_dist_norm_buffer.size != dist_norm_size) {
1068 if (diffuse_dist_norm_buffer.buffer != VK_NULL_HANDLE) {
1069 destroyBuffer(diffuse_dist_norm_buffer);
1070 }
1071 diffuse_dist_norm_buffer = createBuffer(dist_norm_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
1072 descriptors_dirty = true;
1073 }
1074 uploadBufferData(diffuse_dist_norm_buffer, params.diffuse_dist_norm.data(), dist_norm_size);
1075 }
1076
1077 // Ensure all sky parameter buffers exist (required by shader even if empty/zero)
1078 // Only create if null - don't resize (that's handled by upload sections above)
1079 if (diffuse_flux_buffer.buffer == VK_NULL_HANDLE) {
1080 size_t flux_size = band_count * sizeof(float);
1081 diffuse_flux_buffer = createBuffer(flux_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1082 zeroBuffer(diffuse_flux_buffer);
1083 descriptors_dirty = true;
1084 }
1085
1086 if (diffuse_peak_dir_buffer.buffer == VK_NULL_HANDLE) {
1087 size_t peak_dir_size = band_count * sizeof(helios::vec3);
1088 diffuse_peak_dir_buffer = createBuffer(peak_dir_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1089 zeroBuffer(diffuse_peak_dir_buffer);
1090 descriptors_dirty = true;
1091 }
1092
1093 if (diffuse_extinction_buffer.buffer == VK_NULL_HANDLE) {
1094 size_t extinction_size = band_count * sizeof(float);
1095 diffuse_extinction_buffer = createBuffer(extinction_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1096 zeroBuffer(diffuse_extinction_buffer);
1097 descriptors_dirty = true;
1098 }
1099
1100 if (diffuse_dist_norm_buffer.buffer == VK_NULL_HANDLE) {
1101 size_t dist_norm_size = band_count * sizeof(float);
1102 diffuse_dist_norm_buffer = createBuffer(dist_norm_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1103 zeroBuffer(diffuse_dist_norm_buffer);
1104 descriptors_dirty = true;
1105 }
1106
1107 // Sky radiance params (Prague model) - always recreate with current data
1108 {
1109 size_t sky_params_size = launch_band_count * sizeof(helios::vec4);
1110 if (sky_radiance_params_buffer.buffer != VK_NULL_HANDLE) {
1111 destroyBuffer(sky_radiance_params_buffer);
1112 }
1113 sky_radiance_params_buffer = createBuffer(sky_params_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
1114 descriptors_dirty = true;
1115 if (!params.sky_radiance_params.empty() && params.sky_radiance_params.size() == launch_band_count) {
1116 uploadBufferData(sky_radiance_params_buffer, params.sky_radiance_params.data(), sky_params_size);
1117 } else {
1118 zeroBuffer(sky_radiance_params_buffer);
1119 }
1120 }
1121
1122 // Create debug counters buffer if needed (5 uint32_t counters)
1123 // MUST be before descriptor update check
1124 if (debug_counters_buffer.buffer == VK_NULL_HANDLE) {
1125 debug_counters_buffer = createBuffer(5 * sizeof(uint32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1126 zeroBuffer(debug_counters_buffer);
1127 descriptors_dirty = true;
1128 } else {
1129 // Zero counters before each dispatch
1130 zeroBuffer(debug_counters_buffer);
1131 }
1132
1133 // Update descriptor sets only if buffers changed
1134 if (descriptors_dirty) {
1135 updateDescriptorSets();
1136 descriptors_dirty = false;
1137 }
1138 VkDevice vk_device = device->getDevice();
1139
1140 // Compute 2D grid dimensions for stratified hemisphere sampling (matches OptiX: ceil(sqrt(rays_per_primitive)))
1141 uint32_t launch_dim_x = static_cast<uint32_t>(std::ceil(std::sqrt(static_cast<double>(params.rays_per_primitive))));
1142 uint32_t launch_dim_y = launch_dim_x;
1143
1144 // Use the launch_face specified by the caller (RadiationModel already loops over faces)
1145 uint32_t launch_face = params.launch_face;
1146
1147 // Build band mapping: launch band index → global band index
1148 launch_to_global_band.clear();
1149 for (uint32_t g = 0; g < band_count; g++) {
1150 if (!params.band_launch_flag.empty() && params.band_launch_flag[g]) {
1151 launch_to_global_band.push_back(g);
1152 }
1153 }
1154 if (launch_to_global_band.empty()) {
1155 // Fallback: identity mapping (all bands active)
1156 for (uint32_t g = 0; g < launch_band_count; g++) {
1157 launch_to_global_band.push_back(g);
1158 }
1159 }
1160
1161 // Upload band mapping to GPU buffer
1162 uploadBufferData(band_map_buffer, launch_to_global_band.data(), launch_to_global_band.size() * sizeof(uint32_t));
1163
1164 // Push constants struct (expanded for diffuse rays)
1165 struct PushConstants {
1166 uint32_t launch_offset;
1167 uint32_t launch_count;
1168 uint32_t rays_per_primitive;
1169 uint32_t random_seed;
1170 uint32_t band_count;
1171 uint32_t source_count;
1172 uint32_t primitive_count;
1173 uint32_t launch_face; // 0 = bottom, 1 = top
1174 uint32_t launch_dim_x; // Grid dimension X
1175 uint32_t launch_dim_y; // Grid dimension Y
1176 uint32_t material_band_count; // Global band count (for material buffers)
1177 uint32_t periodic_flag_x; // 1 if periodic in X direction
1178 uint32_t periodic_flag_y; // 1 if periodic in Y direction
1179 uint32_t bbox_count; // Number of bbox faces (0-4)
1180 float domain_xmin; // Domain bounds for periodic wrapping
1181 float domain_xmax;
1182 float domain_ymin;
1183 float domain_ymax;
1184 uint32_t prim_tiles_y; // Number of primitive tiles in Y dimension
1185 uint32_t prims_per_tile; // Primitives per tile (65535 max)
1186 } push_constants;
1187
1188 // Initialize invariant push constants
1189 push_constants.rays_per_primitive = params.rays_per_primitive;
1190 push_constants.random_seed = params.random_seed;
1191 push_constants.band_count = launch_band_count; // Use launch band count (not global)
1192 push_constants.material_band_count = band_count; // Global band count for material indexing
1193 push_constants.source_count = source_count;
1194 push_constants.primitive_count = primitive_count;
1195 push_constants.launch_face = launch_face;
1196 push_constants.launch_dim_x = launch_dim_x;
1197 push_constants.launch_dim_y = launch_dim_y;
1198
1199 // Periodic boundary parameters
1200 push_constants.periodic_flag_x = static_cast<uint32_t>(periodic_flag_x);
1201 push_constants.periodic_flag_y = static_cast<uint32_t>(periodic_flag_y);
1202 push_constants.bbox_count = bbox_count;
1203 push_constants.domain_xmin = domain_bounds[0];
1204 push_constants.domain_xmax = domain_bounds[1];
1205 push_constants.domain_ymin = domain_bounds[2];
1206 push_constants.domain_ymax = domain_bounds[3];
1207
1208 // 3D dispatch with 2D primitive tiling to avoid exceeding Vulkan maxComputeWorkGroupCount[2] = 65535
1209 // Tile primitives into Y dimension when count exceeds 65535, matching the direct shader approach
1210 const uint32_t WG_X = 8; // Must match shader local_size_x
1211 const uint32_t WG_Y = 32; // Must match shader local_size_y
1212 const uint32_t MAX_PRIMS_PER_TILE = 65535;
1213
1214 uint32_t dispatch_x = (launch_dim_x + WG_X - 1) / WG_X;
1215 uint32_t dispatch_y_rays = (launch_dim_y + WG_Y - 1) / WG_Y;
1216
1217 // Compute primitive tiling
1218 uint32_t prims_per_tile = std::min(params.launch_count, MAX_PRIMS_PER_TILE);
1219 uint32_t prim_tiles_y = (params.launch_count + MAX_PRIMS_PER_TILE - 1) / MAX_PRIMS_PER_TILE;
1220
1221 uint32_t dispatch_y = dispatch_y_rays * prim_tiles_y;
1222 uint32_t dispatch_z = prims_per_tile;
1223
1224 push_constants.launch_offset = params.launch_offset;
1225 push_constants.launch_count = params.launch_count;
1226 push_constants.prim_tiles_y = prim_tiles_y;
1227 push_constants.prims_per_tile = prims_per_tile;
1228
1229 {
1230 // Record COMPUTE command buffer
1231 VkCommandBufferBeginInfo begin_info{};
1232 begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
1233 begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
1234 vkBeginCommandBuffer(compute_command_buffer, &begin_info);
1235
1236 // Reset timestamp queries for this command buffer
1237 vkCmdResetQueryPool(compute_command_buffer, timestamp_query_pool, 0, 2);
1238
1239 // Bind pipeline
1240 vkCmdBindPipeline(compute_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_diffuse);
1241
1242 // Bind descriptor sets (geometry, materials, results, sky, debug)
1243 VkDescriptorSet sets[] = {set_geometry, set_materials, set_results, set_sky, set_debug};
1244 vkCmdBindDescriptorSets(compute_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, 5, sets, 0, nullptr);
1245
1246 // Write timestamp before dispatch (measures GPU start time)
1247 vkCmdWriteTimestamp(compute_command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, timestamp_query_pool, 0);
1248
1249 // Single dispatch — all primitives via 2D tiling, all bands handled inside shader
1250 vkCmdPushConstants(compute_command_buffer, pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(PushConstants), &push_constants);
1251 vkCmdDispatch(compute_command_buffer, dispatch_x, dispatch_y, dispatch_z);
1252
1253 // Write timestamp after dispatch (measures GPU end time)
1254 vkCmdWriteTimestamp(compute_command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, timestamp_query_pool, 1);
1255
1256 // Final buffer memory barriers to ensure storage buffer writes are visible
1257 VkBufferMemoryBarrier buffer_barriers[4];
1258
1259 // Radiation_in buffer barrier
1260 buffer_barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
1261 buffer_barriers[0].pNext = nullptr;
1262 buffer_barriers[0].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
1263 buffer_barriers[0].dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT;
1264 buffer_barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1265 buffer_barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1266 buffer_barriers[0].buffer = radiation_in_buffer.buffer;
1267 buffer_barriers[0].offset = 0;
1268 buffer_barriers[0].size = VK_WHOLE_SIZE;
1269
1270 // Scatter_top buffer barrier
1271 buffer_barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
1272 buffer_barriers[1].pNext = nullptr;
1273 buffer_barriers[1].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
1274 buffer_barriers[1].dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT;
1275 buffer_barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1276 buffer_barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1277 buffer_barriers[1].buffer = scatter_top_buffer.buffer;
1278 buffer_barriers[1].offset = 0;
1279 buffer_barriers[1].size = VK_WHOLE_SIZE;
1280
1281 // Scatter_bottom buffer barrier
1282 buffer_barriers[2].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
1283 buffer_barriers[2].pNext = nullptr;
1284 buffer_barriers[2].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
1285 buffer_barriers[2].dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT;
1286 buffer_barriers[2].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1287 buffer_barriers[2].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1288 buffer_barriers[2].buffer = scatter_bottom_buffer.buffer;
1289 buffer_barriers[2].offset = 0;
1290 buffer_barriers[2].size = VK_WHOLE_SIZE;
1291
1292 // Radiation_out_top buffer barrier (read-only for diffuse rays)
1293 buffer_barriers[3].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
1294 buffer_barriers[3].pNext = nullptr;
1295 buffer_barriers[3].srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
1296 buffer_barriers[3].dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
1297 buffer_barriers[3].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1298 buffer_barriers[3].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1299 buffer_barriers[3].buffer = radiation_out_top_buffer.buffer;
1300 buffer_barriers[3].offset = 0;
1301 buffer_barriers[3].size = VK_WHOLE_SIZE;
1302
1303 vkCmdPipelineBarrier(compute_command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, // No global memory barriers
1304 4, buffer_barriers, // Buffer-specific barriers
1305 0, nullptr); // No image barriers
1306
1307 vkEndCommandBuffer(compute_command_buffer);
1308
1309 // Submit command buffer with COMPUTE fence
1310 VkSubmitInfo submit_info{};
1311 submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
1312 submit_info.commandBufferCount = 1;
1313 submit_info.pCommandBuffers = &compute_command_buffer;
1314
1315 vkResetFences(vk_device, 1, &compute_fence);
1316 VkResult result = vkQueueSubmit(device->getComputeQueue(), 1, &submit_info, compute_fence);
1317 if (result != VK_SUCCESS) {
1318 helios_runtime_error("ERROR (VulkanComputeBackend::launchDiffuseRays): vkQueueSubmit failed. VkResult: " + std::to_string(result));
1319 }
1320
1321 // Wait for compute to complete (no timeout - large scenes can take minutes)
1322 VkResult wait_result;
1323 do {
1324 wait_result = vkWaitForFences(vk_device, 1, &compute_fence, VK_TRUE, 1000000000ULL);
1325 if (wait_result != VK_SUCCESS && wait_result != VK_TIMEOUT) {
1326 helios_runtime_error("ERROR (VulkanComputeBackend::launchDiffuseRays): vkWaitForFences failed. VkResult: " + std::to_string(wait_result));
1327 }
1328 } while (wait_result == VK_TIMEOUT);
1329 }
1330 }
1331
1333 if (primitive_count == 0) {
1334 return; // No geometry
1335 }
1336
1337 // Build band mapping (same logic as direct/diffuse)
1338 launch_to_global_band.clear();
1339 for (uint32_t g = 0; g < band_count; g++) {
1340 if (!params.band_launch_flag.empty() && params.band_launch_flag[g]) {
1341 launch_to_global_band.push_back(g);
1342 }
1343 }
1344 if (launch_to_global_band.empty()) {
1345 for (uint32_t g = 0; g < launch_band_count; g++) {
1346 launch_to_global_band.push_back(g);
1347 }
1348 }
1349
1350 // Ensure camera_radiation_buffer exists at FULL resolution
1351 size_t total_pixels = size_t(params.camera_resolution_full.x) * size_t(params.camera_resolution_full.y);
1352 size_t radiation_size = total_pixels * launch_band_count * sizeof(float);
1353 if (camera_radiation_buffer.buffer == VK_NULL_HANDLE || camera_radiation_buffer.size != radiation_size) {
1354 if (camera_radiation_buffer.buffer != VK_NULL_HANDLE) {
1355 destroyBuffer(camera_radiation_buffer);
1356 }
1357 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1358 camera_radiation_buffer = createBuffer(radiation_size, usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1359 descriptors_dirty = true;
1360 }
1361
1362 // Zero camera radiation buffer before tile loop (matches OptiX line 713)
1363 // CRITICAL: Only zero if this is the first tile (pixel_offset == 0,0)
1364 // Subsequent tiles accumulate into the same buffer
1365 if (params.camera_pixel_offset.x == 0 && params.camera_pixel_offset.y == 0) {
1366 zeroBuffer(camera_radiation_buffer);
1367 }
1368
1369 // Update descriptor sets if buffers changed
1370 if (descriptors_dirty) {
1371 updateDescriptorSets();
1372 descriptors_dirty = false;
1373 }
1374
1375 VkDevice vk_device = device->getDevice();
1376
1377 // Record COMPUTE command buffer
1378 VkCommandBufferBeginInfo begin_info{};
1379 begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
1380 begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
1381 vkBeginCommandBuffer(compute_command_buffer, &begin_info);
1382
1383 // Bind pipeline
1384 vkCmdBindPipeline(compute_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_camera);
1385
1386 // Bind descriptor sets
1387 VkDescriptorSet sets[] = {set_geometry, set_materials, set_results, set_sky, set_debug};
1388 vkCmdBindDescriptorSets(compute_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, 5, sets, 0, nullptr);
1389
1390 // Build push constants
1391 struct PushConstants {
1392 helios::vec3 camera_position; // 12 bytes
1393 float viewplane_length; // 4 bytes
1394 float camera_direction_x; // 4 bytes
1395 float camera_direction_y; // 4 bytes
1396 float focal_length; // 4 bytes
1397 float lens_diameter; // 4 bytes
1398 float fov_aspect_ratio; // 4 bytes
1399 uint32_t resolution_x; // 4 bytes
1400 uint32_t resolution_y; // 4 bytes
1401 uint32_t resolution_full_x; // 4 bytes
1402 uint32_t resolution_full_y; // 4 bytes
1403 uint32_t pixel_offset_x; // 4 bytes
1404 uint32_t pixel_offset_y; // 4 bytes
1405 uint32_t antialiasing_samples; // 4 bytes
1406 uint32_t random_seed; // 4 bytes
1407 uint32_t band_count; // 4 bytes
1408 uint32_t source_count; // 4 bytes
1409 uint32_t primitive_count; // 4 bytes
1410 helios::vec3 sun_direction; // 12 bytes
1411 float solar_disk_cos_angle; // 4 bytes
1412 uint32_t periodic_flag_x; // 4 bytes
1413 uint32_t periodic_flag_y; // 4 bytes
1414 uint32_t bbox_count; // 4 bytes
1415 float domain_xmin; // 4 bytes
1416 float domain_xmax; // 4 bytes
1417 float domain_ymin; // 4 bytes
1418 float domain_ymax; // 4 bytes
1419 uint32_t specular_reflection_enabled; // 4 bytes
1420 } push_constants{}; // Total: 128 bytes
1421
1422 push_constants.camera_position = params.camera_position;
1423 push_constants.viewplane_length = params.camera_viewplane_length;
1424 push_constants.camera_direction_x = params.camera_direction.x;
1425 push_constants.camera_direction_y = params.camera_direction.y;
1426 push_constants.focal_length = params.camera_focal_length;
1427 push_constants.lens_diameter = params.camera_lens_diameter;
1428 push_constants.fov_aspect_ratio = params.camera_fov_aspect;
1429 push_constants.resolution_x = params.camera_resolution.x;
1430 push_constants.resolution_y = params.camera_resolution.y;
1431 push_constants.resolution_full_x = params.camera_resolution_full.x;
1432 push_constants.resolution_full_y = params.camera_resolution_full.y;
1433 push_constants.pixel_offset_x = params.camera_pixel_offset.x;
1434 push_constants.pixel_offset_y = params.camera_pixel_offset.y;
1435 push_constants.antialiasing_samples = params.antialiasing_samples;
1436 push_constants.random_seed = params.random_seed;
1437 push_constants.band_count = launch_band_count;
1438 push_constants.source_count = source_count;
1439 push_constants.primitive_count = primitive_count;
1440 push_constants.sun_direction = cached_sun_direction;
1441 push_constants.solar_disk_cos_angle = cached_solar_disk_cos_angle;
1442 push_constants.periodic_flag_x = static_cast<uint32_t>(periodic_flag_x);
1443 push_constants.periodic_flag_y = static_cast<uint32_t>(periodic_flag_y);
1444 push_constants.bbox_count = bbox_count;
1445 push_constants.domain_xmin = domain_bounds[0];
1446 push_constants.domain_xmax = domain_bounds[1];
1447 push_constants.domain_ymin = domain_bounds[2];
1448 push_constants.domain_ymax = domain_bounds[3];
1449 push_constants.specular_reflection_enabled = params.specular_reflection_enabled;
1450
1451 vkCmdPushConstants(compute_command_buffer, pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(PushConstants), &push_constants);
1452
1453 // Compute dispatch dimensions (workgroup size 16x16x1)
1454 const uint32_t WG_X = 16;
1455 const uint32_t WG_Y = 16;
1456 uint32_t dispatch_x = (params.camera_resolution.x + WG_X - 1) / WG_X;
1457 uint32_t dispatch_y = (params.camera_resolution.y + WG_Y - 1) / WG_Y;
1458 uint32_t dispatch_z = params.antialiasing_samples;
1459
1460 vkCmdDispatch(compute_command_buffer, dispatch_x, dispatch_y, dispatch_z);
1461
1462 // Buffer memory barrier for camera_radiation_buffer
1463 VkBufferMemoryBarrier barrier{};
1464 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
1465 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
1466 barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT;
1467 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1468 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1469 barrier.buffer = camera_radiation_buffer.buffer;
1470 barrier.offset = 0;
1471 barrier.size = VK_WHOLE_SIZE;
1472
1473 vkCmdPipelineBarrier(compute_command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 1, &barrier, 0, nullptr);
1474
1475 vkEndCommandBuffer(compute_command_buffer);
1476
1477 // Submit command buffer
1478 VkSubmitInfo submit_info{};
1479 submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
1480 submit_info.commandBufferCount = 1;
1481 submit_info.pCommandBuffers = &compute_command_buffer;
1482
1483 vkResetFences(vk_device, 1, &compute_fence);
1484 VkResult result = vkQueueSubmit(device->getComputeQueue(), 1, &submit_info, compute_fence);
1485 if (result != VK_SUCCESS) {
1486 helios_runtime_error("ERROR (VulkanComputeBackend::launchCameraRays): vkQueueSubmit failed. VkResult: " + std::to_string(result));
1487 }
1488
1489 // Wait for compute to complete (no timeout - large scenes can take minutes)
1490 VkResult wait_result;
1491 do {
1492 wait_result = vkWaitForFences(vk_device, 1, &compute_fence, VK_TRUE, 1000000000ULL);
1493 if (wait_result != VK_SUCCESS && wait_result != VK_TIMEOUT) {
1494 helios_runtime_error("ERROR (VulkanComputeBackend::launchCameraRays): vkWaitForFences failed. VkResult: " + std::to_string(wait_result));
1495 }
1496 } while (wait_result == VK_TIMEOUT);
1497 }
1498
1500 if (primitive_count == 0) {
1501 return; // No geometry
1502 }
1503
1504 // Update descriptor sets if buffers changed
1505 if (descriptors_dirty) {
1506 updateDescriptorSets();
1507 descriptors_dirty = false;
1508 }
1509
1510 VkDevice vk_device = device->getDevice();
1511
1512 // Record COMPUTE command buffer
1513 VkCommandBufferBeginInfo begin_info{};
1514 begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
1515 begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
1516 vkBeginCommandBuffer(compute_command_buffer, &begin_info);
1517
1518 // Bind pipeline
1519 vkCmdBindPipeline(compute_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_pixel_label);
1520
1521 // Bind descriptor sets
1522 VkDescriptorSet sets[] = {set_geometry, set_materials, set_results, set_sky, set_debug};
1523 vkCmdBindDescriptorSets(compute_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, 5, sets, 0, nullptr);
1524
1525 // Build push constants (same layout as camera_raygen)
1526 struct PushConstants {
1527 helios::vec3 camera_position;
1528 float viewplane_length;
1529 float camera_direction_x;
1530 float camera_direction_y;
1531 float focal_length;
1532 float lens_diameter;
1533 float fov_aspect_ratio;
1534 uint32_t resolution_x;
1535 uint32_t resolution_y;
1536 uint32_t resolution_full_x;
1537 uint32_t resolution_full_y;
1538 uint32_t pixel_offset_x;
1539 uint32_t pixel_offset_y;
1540 uint32_t antialiasing_samples;
1541 uint32_t random_seed;
1542 uint32_t band_count;
1543 uint32_t source_count;
1544 uint32_t primitive_count;
1545 helios::vec3 sun_direction;
1546 float solar_disk_cos_angle;
1547 uint32_t periodic_flag_x;
1548 uint32_t periodic_flag_y;
1549 uint32_t bbox_count;
1550 float domain_xmin;
1551 float domain_xmax;
1552 float domain_ymin;
1553 float domain_ymax;
1554 uint32_t padding;
1555 } push_constants{};
1556
1557 push_constants.camera_position = params.camera_position;
1558 push_constants.viewplane_length = params.camera_viewplane_length;
1559 push_constants.camera_direction_x = params.camera_direction.x;
1560 push_constants.camera_direction_y = params.camera_direction.y;
1561 push_constants.focal_length = params.camera_focal_length;
1562 push_constants.lens_diameter = params.camera_lens_diameter;
1563 push_constants.fov_aspect_ratio = params.camera_fov_aspect;
1564 push_constants.resolution_x = params.camera_resolution.x;
1565 push_constants.resolution_y = params.camera_resolution.y;
1566 push_constants.resolution_full_x = params.camera_resolution_full.x;
1567 push_constants.resolution_full_y = params.camera_resolution_full.y;
1568 push_constants.pixel_offset_x = params.camera_pixel_offset.x;
1569 push_constants.pixel_offset_y = params.camera_pixel_offset.y;
1570 push_constants.antialiasing_samples = 1; // No AA for pixel label
1571 push_constants.random_seed = params.random_seed;
1572 push_constants.band_count = launch_band_count;
1573 push_constants.source_count = source_count;
1574 push_constants.primitive_count = primitive_count;
1575 push_constants.sun_direction = cached_sun_direction;
1576 push_constants.solar_disk_cos_angle = cached_solar_disk_cos_angle;
1577 push_constants.periodic_flag_x = static_cast<uint32_t>(periodic_flag_x);
1578 push_constants.periodic_flag_y = static_cast<uint32_t>(periodic_flag_y);
1579 push_constants.bbox_count = bbox_count;
1580 push_constants.domain_xmin = domain_bounds[0];
1581 push_constants.domain_xmax = domain_bounds[1];
1582 push_constants.domain_ymin = domain_bounds[2];
1583 push_constants.domain_ymax = domain_bounds[3];
1584 push_constants.padding = 0;
1585
1586 vkCmdPushConstants(compute_command_buffer, pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(PushConstants), &push_constants);
1587
1588 // Compute dispatch dimensions (workgroup size 16x16x1, no AA dimension)
1589 const uint32_t WG_X = 16;
1590 const uint32_t WG_Y = 16;
1591 uint32_t dispatch_x = (params.camera_resolution.x + WG_X - 1) / WG_X;
1592 uint32_t dispatch_y = (params.camera_resolution.y + WG_Y - 1) / WG_Y;
1593 uint32_t dispatch_z = 1; // No antialiasing
1594
1595 vkCmdDispatch(compute_command_buffer, dispatch_x, dispatch_y, dispatch_z);
1596
1597 // Buffer memory barriers for pixel label and depth buffers
1598 VkBufferMemoryBarrier barriers[2];
1599
1600 barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
1601 barriers[0].pNext = nullptr;
1602 barriers[0].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
1603 barriers[0].dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT;
1604 barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1605 barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1606 barriers[0].buffer = camera_pixel_label_buffer.buffer;
1607 barriers[0].offset = 0;
1608 barriers[0].size = VK_WHOLE_SIZE;
1609
1610 barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
1611 barriers[1].pNext = nullptr;
1612 barriers[1].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
1613 barriers[1].dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT;
1614 barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1615 barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1616 barriers[1].buffer = camera_pixel_depth_buffer.buffer;
1617 barriers[1].offset = 0;
1618 barriers[1].size = VK_WHOLE_SIZE;
1619
1620 vkCmdPipelineBarrier(compute_command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 2, barriers, 0, nullptr);
1621
1622 vkEndCommandBuffer(compute_command_buffer);
1623
1624 // Submit command buffer
1625 VkSubmitInfo submit_info{};
1626 submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
1627 submit_info.commandBufferCount = 1;
1628 submit_info.pCommandBuffers = &compute_command_buffer;
1629
1630 vkResetFences(vk_device, 1, &compute_fence);
1631 VkResult result = vkQueueSubmit(device->getComputeQueue(), 1, &submit_info, compute_fence);
1632 if (result != VK_SUCCESS) {
1633 helios_runtime_error("ERROR (VulkanComputeBackend::launchPixelLabelRays): vkQueueSubmit failed. VkResult: " + std::to_string(result));
1634 }
1635
1636 // Wait for compute to complete (no timeout - large scenes can take minutes)
1637 VkResult wait_result;
1638 do {
1639 wait_result = vkWaitForFences(vk_device, 1, &compute_fence, VK_TRUE, 1000000000ULL);
1640 if (wait_result != VK_SUCCESS && wait_result != VK_TIMEOUT) {
1641 helios_runtime_error("ERROR (VulkanComputeBackend::launchPixelLabelRays): vkWaitForFences failed. VkResult: " + std::to_string(wait_result));
1642 }
1643 } while (wait_result == VK_TIMEOUT);
1644 }
1645
1647 if (primitive_count == 0 || launch_band_count == 0) {
1648 return; // No results to download
1649 }
1650
1651 size_t buffer_size = primitive_count * launch_band_count;
1652
1653 // Resize output vectors
1654 results.radiation_in.resize(buffer_size);
1655 results.radiation_out_top.resize(buffer_size);
1656 results.radiation_out_bottom.resize(buffer_size);
1657 results.num_primitives = primitive_count;
1658 results.num_bands = launch_band_count;
1659
1660 // Download radiation_in buffer
1661 if (radiation_in_buffer.buffer != VK_NULL_HANDLE) {
1662 // WORKAROUND for MoltenVK: Direct map instead of transfer command buffer
1663 // This works around coherency issues with compute shader writes
1664 vkQueueWaitIdle(device->getComputeQueue()); // Ensure compute is done
1665
1666 void *mapped;
1667 VkResult result = vmaMapMemory(device->getAllocator(), radiation_in_buffer.allocation, &mapped);
1668 if (result == VK_SUCCESS) {
1669 // Invalidate mapped memory range to ensure coherency with GPU writes
1670 // Use VMA's invalidate which handles nonCoherentAtomSize alignment automatically
1671 vmaInvalidateAllocation(device->getAllocator(), radiation_in_buffer.allocation, 0, VK_WHOLE_SIZE);
1672
1673 std::memcpy(results.radiation_in.data(), mapped, buffer_size * sizeof(float));
1674 vmaUnmapMemory(device->getAllocator(), radiation_in_buffer.allocation);
1675
1676 // Check for shader error codes (negative values)
1677 if (results.radiation_in[0] < 0.0f) {
1678 std::string error_msg = "ERROR (VulkanComputeBackend): Compute shader reported error. Code: " + std::to_string(results.radiation_in[0]);
1679 if (results.radiation_in[0] == -999.0f) {
1680 error_msg += " (Invalid subdivisions: NX=" + std::to_string(results.radiation_in[1]) + " NY=" + std::to_string(results.radiation_in[2]) + ")";
1681 } else if (results.radiation_in[0] == -998.0f) {
1682 error_msg += " (Subdivisions too large: NX=" + std::to_string(results.radiation_in[1]) + " NY=" + std::to_string(results.radiation_in[2]) + ")";
1683 } else if (results.radiation_in[0] == -997.0f) {
1684 error_msg += " (Zero rays per dimension)";
1685 } else if (results.radiation_in[0] == -996.0f) {
1686 error_msg += " (Too many rays per dimension: " + std::to_string(results.radiation_in[1]) + ")";
1687 }
1688 helios_runtime_error(error_msg);
1689 }
1690
1691 } else {
1692 downloadBufferData(radiation_in_buffer, results.radiation_in.data(), buffer_size * sizeof(float));
1693 }
1694 } else {
1695 std::fill(results.radiation_in.begin(), results.radiation_in.end(), 0.0f);
1696 }
1697
1698 // Download radiation_out_top buffer
1699 if (radiation_out_top_buffer.buffer != VK_NULL_HANDLE) {
1700 void *mapped;
1701 VkResult result = vmaMapMemory(device->getAllocator(), radiation_out_top_buffer.allocation, &mapped);
1702 if (result == VK_SUCCESS) {
1703 vmaInvalidateAllocation(device->getAllocator(), radiation_out_top_buffer.allocation, 0, VK_WHOLE_SIZE);
1704 std::memcpy(results.radiation_out_top.data(), mapped, buffer_size * sizeof(float));
1705 vmaUnmapMemory(device->getAllocator(), radiation_out_top_buffer.allocation);
1706 }
1707 } else {
1708 std::fill(results.radiation_out_top.begin(), results.radiation_out_top.end(), 0.0f);
1709 }
1710
1711 // Download radiation_out_bottom buffer
1712 if (radiation_out_bottom_buffer.buffer != VK_NULL_HANDLE) {
1713 void *mapped;
1714 VkResult result = vmaMapMemory(device->getAllocator(), radiation_out_bottom_buffer.allocation, &mapped);
1715 if (result == VK_SUCCESS) {
1716 vmaInvalidateAllocation(device->getAllocator(), radiation_out_bottom_buffer.allocation, 0, VK_WHOLE_SIZE);
1717 std::memcpy(results.radiation_out_bottom.data(), mapped, buffer_size * sizeof(float));
1718 vmaUnmapMemory(device->getAllocator(), radiation_out_bottom_buffer.allocation);
1719 }
1720 } else {
1721 std::fill(results.radiation_out_bottom.begin(), results.radiation_out_bottom.end(), 0.0f);
1722 }
1723
1724 // Download scatter_top buffer
1725 results.scatter_buff_top.resize(buffer_size);
1726 if (scatter_top_buffer.buffer != VK_NULL_HANDLE) {
1727 void *mapped;
1728 VkResult result = vmaMapMemory(device->getAllocator(), scatter_top_buffer.allocation, &mapped);
1729 if (result == VK_SUCCESS) {
1730 vmaInvalidateAllocation(device->getAllocator(), scatter_top_buffer.allocation, 0, VK_WHOLE_SIZE);
1731 std::memcpy(results.scatter_buff_top.data(), mapped, buffer_size * sizeof(float));
1732 vmaUnmapMemory(device->getAllocator(), scatter_top_buffer.allocation);
1733 }
1734 } else {
1735 std::fill(results.scatter_buff_top.begin(), results.scatter_buff_top.end(), 0.0f);
1736 }
1737
1738 // Download scatter_bottom buffer
1739 results.scatter_buff_bottom.resize(buffer_size);
1740 if (scatter_bottom_buffer.buffer != VK_NULL_HANDLE) {
1741 void *mapped;
1742 VkResult result = vmaMapMemory(device->getAllocator(), scatter_bottom_buffer.allocation, &mapped);
1743 if (result == VK_SUCCESS) {
1744 vmaInvalidateAllocation(device->getAllocator(), scatter_bottom_buffer.allocation, 0, VK_WHOLE_SIZE);
1745 std::memcpy(results.scatter_buff_bottom.data(), mapped, buffer_size * sizeof(float));
1746 vmaUnmapMemory(device->getAllocator(), scatter_bottom_buffer.allocation);
1747 }
1748 } else {
1749 std::fill(results.scatter_buff_bottom.begin(), results.scatter_buff_bottom.end(), 0.0f);
1750 }
1751
1752 // Camera scatter buffers: use regular scatter as proxy for camera-weighted scatter.
1753 // This is exact when camera spectral response is uniform (1.0 across all wavelengths).
1754 // For non-uniform camera responses, Vulkan shaders would need dedicated camera scatter
1755 // buffers with camera-weighted materials (rho_cam, tau_cam), matching OptiX (rayHit.cu:223-236).
1756 results.scatter_buff_top_cam = results.scatter_buff_top;
1758 }
1759
1760 void VulkanComputeBackend::getCameraResults(std::vector<float> &pixel_data, std::vector<uint> &pixel_labels, std::vector<float> &pixel_depths, uint camera_id, const helios::int2 &resolution) {
1761 size_t total_pixels = size_t(resolution.x) * size_t(resolution.y);
1762 if (total_pixels == 0) {
1763 return; // No pixels
1764 }
1765
1766 // Download camera_radiation_buffer (pixel_data)
1767 pixel_data.resize(total_pixels * launch_band_count);
1768 if (camera_radiation_buffer.buffer != VK_NULL_HANDLE && !pixel_data.empty()) {
1769 vkQueueWaitIdle(device->getComputeQueue()); // Ensure compute is done
1770
1771 void *mapped;
1772 VkResult result = vmaMapMemory(device->getAllocator(), camera_radiation_buffer.allocation, &mapped);
1773 if (result == VK_SUCCESS) {
1774 vmaInvalidateAllocation(device->getAllocator(), camera_radiation_buffer.allocation, 0, VK_WHOLE_SIZE);
1775 std::memcpy(pixel_data.data(), mapped, pixel_data.size() * sizeof(float));
1776 vmaUnmapMemory(device->getAllocator(), camera_radiation_buffer.allocation);
1777 } else {
1778 downloadBufferData(camera_radiation_buffer, pixel_data.data(), pixel_data.size() * sizeof(float));
1779 }
1780 } else {
1781 std::fill(pixel_data.begin(), pixel_data.end(), 0.0f);
1782 }
1783
1784 // Download camera_pixel_label_buffer (pixel_labels)
1785 pixel_labels.resize(total_pixels);
1786 if (camera_pixel_label_buffer.buffer != VK_NULL_HANDLE && !pixel_labels.empty()) {
1787 void *mapped;
1788 VkResult result = vmaMapMemory(device->getAllocator(), camera_pixel_label_buffer.allocation, &mapped);
1789 if (result == VK_SUCCESS) {
1790 vmaInvalidateAllocation(device->getAllocator(), camera_pixel_label_buffer.allocation, 0, VK_WHOLE_SIZE);
1791 std::memcpy(pixel_labels.data(), mapped, pixel_labels.size() * sizeof(uint));
1792 vmaUnmapMemory(device->getAllocator(), camera_pixel_label_buffer.allocation);
1793 } else {
1794 downloadBufferData(camera_pixel_label_buffer, pixel_labels.data(), pixel_labels.size() * sizeof(uint));
1795 }
1796 } else {
1797 std::fill(pixel_labels.begin(), pixel_labels.end(), 0u);
1798 }
1799
1800 // Download camera_pixel_depth_buffer (pixel_depths)
1801 pixel_depths.resize(total_pixels);
1802 if (camera_pixel_depth_buffer.buffer != VK_NULL_HANDLE && !pixel_depths.empty()) {
1803 void *mapped;
1804 VkResult result = vmaMapMemory(device->getAllocator(), camera_pixel_depth_buffer.allocation, &mapped);
1805 if (result == VK_SUCCESS) {
1806 vmaInvalidateAllocation(device->getAllocator(), camera_pixel_depth_buffer.allocation, 0, VK_WHOLE_SIZE);
1807 std::memcpy(pixel_depths.data(), mapped, pixel_depths.size() * sizeof(float));
1808 vmaUnmapMemory(device->getAllocator(), camera_pixel_depth_buffer.allocation);
1809 } else {
1810 downloadBufferData(camera_pixel_depth_buffer, pixel_depths.data(), pixel_depths.size() * sizeof(float));
1811 }
1812 } else {
1813 std::fill(pixel_depths.begin(), pixel_depths.end(), 0.0f);
1814 }
1815 }
1816
1817 void VulkanComputeBackend::zeroRadiationBuffers(size_t launch_band_count_param) {
1818 if (primitive_count == 0 || band_count == 0) {
1819 return; // No geometry or bands
1820 }
1821
1822 // Store launch band count for this runBand() call
1823 launch_band_count = static_cast<uint32_t>(launch_band_count_param);
1824
1825 // Create or resize band_map buffer [launch_band_count × uint32]
1826 size_t band_map_size = launch_band_count * sizeof(uint32_t);
1827 if (band_map_buffer.buffer == VK_NULL_HANDLE || band_map_buffer.size != band_map_size) {
1828 if (band_map_buffer.buffer != VK_NULL_HANDLE) {
1829 destroyBuffer(band_map_buffer);
1830 }
1831 band_map_buffer = createBuffer(band_map_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
1832 descriptors_dirty = true;
1833 }
1834
1835 size_t buffer_size = primitive_count * launch_band_count;
1836
1837 // Create or resize radiation_in buffer
1838 // CRITICAL: Use AUTO_PREFER_HOST for coherent memory (matches working Vulkan compute examples)
1839 // This ensures HOST_VISIBLE | HOST_COHERENT memory which works reliably on MoltenVK
1840 if (radiation_in_buffer.buffer == VK_NULL_HANDLE || radiation_in_buffer.size != buffer_size * sizeof(float)) {
1841 if (radiation_in_buffer.buffer != VK_NULL_HANDLE) {
1842 destroyBuffer(radiation_in_buffer);
1843 }
1844 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1845 radiation_in_buffer = createBuffer(buffer_size * sizeof(float), usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1846 }
1847
1848 // Create or resize radiation_specular buffer [source * primitive * band]
1849 // Only create if source_count > 0 (specular requires sources)
1850 if (source_count > 0) {
1851 size_t specular_buffer_size = source_count * primitive_count * launch_band_count;
1852 if (radiation_specular_buffer.buffer == VK_NULL_HANDLE || radiation_specular_buffer.size != specular_buffer_size * sizeof(float)) {
1853 if (radiation_specular_buffer.buffer != VK_NULL_HANDLE) {
1854 destroyBuffer(radiation_specular_buffer);
1855 }
1856 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1857 radiation_specular_buffer = createBuffer(specular_buffer_size * sizeof(float), usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1858 }
1859 zeroBuffer(radiation_specular_buffer);
1860 }
1861
1862 // Zero radiation buffers
1863 zeroBuffer(radiation_in_buffer);
1864
1865 descriptors_dirty = true; // Result buffers created/changed
1866 }
1867
1869 if (primitive_count == 0 || launch_band_count == 0) {
1870 return; // No geometry or bands
1871 }
1872
1873 size_t buffer_size = primitive_count * launch_band_count;
1874
1875 // Create or resize scatter_top buffer
1876 if (scatter_top_buffer.buffer == VK_NULL_HANDLE || scatter_top_buffer.size != buffer_size * sizeof(float)) {
1877 if (scatter_top_buffer.buffer != VK_NULL_HANDLE) {
1878 destroyBuffer(scatter_top_buffer);
1879 }
1880 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1881 scatter_top_buffer = createBuffer(buffer_size * sizeof(float), usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1882 }
1883
1884 // Create or resize scatter_bottom buffer
1885 if (scatter_bottom_buffer.buffer == VK_NULL_HANDLE || scatter_bottom_buffer.size != buffer_size * sizeof(float)) {
1886 if (scatter_bottom_buffer.buffer != VK_NULL_HANDLE) {
1887 destroyBuffer(scatter_bottom_buffer);
1888 }
1889 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1890 scatter_bottom_buffer = createBuffer(buffer_size * sizeof(float), usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1891 }
1892
1893 // Zero both buffers
1894 zeroBuffer(scatter_top_buffer);
1895 zeroBuffer(scatter_bottom_buffer);
1896
1897 descriptors_dirty = true; // Scatter buffers created/changed
1898 }
1899
1901 size_t total_pixels = size_t(resolution.x) * size_t(resolution.y);
1902 if (total_pixels == 0) {
1903 return; // No pixels
1904 }
1905
1906 // Create or resize camera_pixel_label_buffer
1907 VkDeviceSize label_size = total_pixels * sizeof(uint32_t);
1908 if (camera_pixel_label_buffer.buffer == VK_NULL_HANDLE || camera_pixel_label_buffer.size != label_size) {
1909 if (camera_pixel_label_buffer.buffer != VK_NULL_HANDLE) {
1910 destroyBuffer(camera_pixel_label_buffer);
1911 }
1912 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1913 camera_pixel_label_buffer = createBuffer(label_size, usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1914 descriptors_dirty = true;
1915 }
1916
1917 // Create or resize camera_pixel_depth_buffer
1918 VkDeviceSize depth_size = total_pixels * sizeof(float);
1919 if (camera_pixel_depth_buffer.buffer == VK_NULL_HANDLE || camera_pixel_depth_buffer.size != depth_size) {
1920 if (camera_pixel_depth_buffer.buffer != VK_NULL_HANDLE) {
1921 destroyBuffer(camera_pixel_depth_buffer);
1922 }
1923 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1924 camera_pixel_depth_buffer = createBuffer(depth_size, usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1925 descriptors_dirty = true;
1926 }
1927
1928 // Zero both buffers
1929 zeroBuffer(camera_pixel_label_buffer);
1930 zeroBuffer(camera_pixel_depth_buffer);
1931 }
1932
1934 if (primitive_count == 0 || launch_band_count == 0) {
1935 return; // No geometry or bands
1936 }
1937
1938 // Use launch_band_count (the count of bands in the current runBand dispatch),
1939 // not the global band_count. The scatter and radiation_out buffers are sized
1940 // to launch_band_count everywhere else (zeroScatterBuffers, zeroRadiationBuffers,
1941 // launchDirectRays, launchDiffuseRays). Using band_count here would compute a
1942 // larger size than the source buffer and crash via OOB read in memcpy whenever
1943 // the launch is a strict subset of all bands (e.g. SIF emission dispatch after
1944 // a recursive excitation-band dispatch raises the global band_count).
1945 size_t buffer_size = primitive_count * launch_band_count * sizeof(float);
1946
1947 // Create radiation_out_top/bottom buffers if they don't exist
1948 if (radiation_out_top_buffer.buffer == VK_NULL_HANDLE || radiation_out_top_buffer.size != buffer_size) {
1949 if (radiation_out_top_buffer.buffer != VK_NULL_HANDLE) {
1950 destroyBuffer(radiation_out_top_buffer);
1951 }
1952 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1953 radiation_out_top_buffer = createBuffer(buffer_size, usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1954 descriptors_dirty = true;
1955 }
1956
1957 if (radiation_out_bottom_buffer.buffer == VK_NULL_HANDLE || radiation_out_bottom_buffer.size != buffer_size) {
1958 if (radiation_out_bottom_buffer.buffer != VK_NULL_HANDLE) {
1959 destroyBuffer(radiation_out_bottom_buffer);
1960 }
1961 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1962 radiation_out_bottom_buffer = createBuffer(buffer_size, usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
1963 descriptors_dirty = true;
1964 }
1965
1966 // Wait for any pending compute work and invalidate scatter buffers before reading
1967 vkQueueWaitIdle(device->getComputeQueue());
1968
1969 // Copy scatter_top → radiation_out_top
1970 vmaInvalidateAllocation(device->getAllocator(), scatter_top_buffer.allocation, 0, VK_WHOLE_SIZE);
1971 void *src_top = nullptr;
1972 void *dst_top = nullptr;
1973 vmaMapMemory(device->getAllocator(), scatter_top_buffer.allocation, &src_top);
1974 vmaMapMemory(device->getAllocator(), radiation_out_top_buffer.allocation, &dst_top);
1975 memcpy(dst_top, src_top, buffer_size);
1976 vmaFlushAllocation(device->getAllocator(), radiation_out_top_buffer.allocation, 0, VK_WHOLE_SIZE);
1977 vmaUnmapMemory(device->getAllocator(), scatter_top_buffer.allocation);
1978 vmaUnmapMemory(device->getAllocator(), radiation_out_top_buffer.allocation);
1979
1980 // Copy scatter_bottom → radiation_out_bottom
1981 vmaInvalidateAllocation(device->getAllocator(), scatter_bottom_buffer.allocation, 0, VK_WHOLE_SIZE);
1982 void *src_bottom = nullptr;
1983 void *dst_bottom = nullptr;
1984 vmaMapMemory(device->getAllocator(), scatter_bottom_buffer.allocation, &src_bottom);
1985 vmaMapMemory(device->getAllocator(), radiation_out_bottom_buffer.allocation, &dst_bottom);
1986 memcpy(dst_bottom, src_bottom, buffer_size);
1987 vmaFlushAllocation(device->getAllocator(), radiation_out_bottom_buffer.allocation, 0, VK_WHOLE_SIZE);
1988 vmaUnmapMemory(device->getAllocator(), scatter_bottom_buffer.allocation);
1989 vmaUnmapMemory(device->getAllocator(), radiation_out_bottom_buffer.allocation);
1990 }
1991
1992 void VulkanComputeBackend::uploadRadiationOut(const std::vector<float> &radiation_out_top, const std::vector<float> &radiation_out_bottom) {
1993 if (radiation_out_top.empty() || radiation_out_bottom.empty()) {
1994 return; // No data to upload
1995 }
1996
1997 size_t buffer_size = radiation_out_top.size() * sizeof(float);
1998
1999 // Create radiation_out_top buffer if needed
2000 if (radiation_out_top_buffer.buffer == VK_NULL_HANDLE || radiation_out_top_buffer.size != buffer_size) {
2001 if (radiation_out_top_buffer.buffer != VK_NULL_HANDLE) {
2002 destroyBuffer(radiation_out_top_buffer);
2003 }
2004 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
2005 radiation_out_top_buffer = createBuffer(buffer_size, usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
2006 descriptors_dirty = true;
2007 }
2008
2009 // Create radiation_out_bottom buffer if needed
2010 if (radiation_out_bottom_buffer.buffer == VK_NULL_HANDLE || radiation_out_bottom_buffer.size != buffer_size) {
2011 if (radiation_out_bottom_buffer.buffer != VK_NULL_HANDLE) {
2012 destroyBuffer(radiation_out_bottom_buffer);
2013 }
2014 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
2015 radiation_out_bottom_buffer = createBuffer(buffer_size, usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
2016 descriptors_dirty = true;
2017 }
2018
2019 // Upload data using direct mapping (HOST_VISIBLE buffers, matching copyScatterToRadiation approach)
2020 void *dst_top = nullptr;
2021 void *dst_bottom = nullptr;
2022 VkResult result_top = vmaMapMemory(device->getAllocator(), radiation_out_top_buffer.allocation, &dst_top);
2023 VkResult result_bottom = vmaMapMemory(device->getAllocator(), radiation_out_bottom_buffer.allocation, &dst_bottom);
2024
2025 if (result_top == VK_SUCCESS) {
2026 std::memcpy(dst_top, radiation_out_top.data(), buffer_size);
2027 vmaFlushAllocation(device->getAllocator(), radiation_out_top_buffer.allocation, 0, VK_WHOLE_SIZE);
2028 vmaUnmapMemory(device->getAllocator(), radiation_out_top_buffer.allocation);
2029 }
2030 if (result_bottom == VK_SUCCESS) {
2031 std::memcpy(dst_bottom, radiation_out_bottom.data(), buffer_size);
2032 vmaFlushAllocation(device->getAllocator(), radiation_out_bottom_buffer.allocation, 0, VK_WHOLE_SIZE);
2033 vmaUnmapMemory(device->getAllocator(), radiation_out_bottom_buffer.allocation);
2034 }
2035 if (result_top != VK_SUCCESS || result_bottom != VK_SUCCESS) {
2036 helios_runtime_error("ERROR (VulkanComputeBackend::uploadRadiationOut): Failed to map radiation output buffers.");
2037 }
2038 }
2039
2040 void VulkanComputeBackend::uploadCameraScatterBuffers(const std::vector<float> &scatter_top_cam, const std::vector<float> &scatter_bottom_cam) {
2041 if (scatter_top_cam.empty() || scatter_bottom_cam.empty()) {
2042 return; // No data to upload
2043 }
2044
2045 size_t buffer_size = scatter_top_cam.size() * sizeof(float);
2046
2047 // Create or resize camera_scatter_top_buffer
2048 if (camera_scatter_top_buffer.buffer == VK_NULL_HANDLE || camera_scatter_top_buffer.size != buffer_size) {
2049 if (camera_scatter_top_buffer.buffer != VK_NULL_HANDLE) {
2050 destroyBuffer(camera_scatter_top_buffer);
2051 }
2052 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
2053 camera_scatter_top_buffer = createBuffer(buffer_size, usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
2054 descriptors_dirty = true;
2055 }
2056
2057 // Create or resize camera_scatter_bottom_buffer
2058 if (camera_scatter_bottom_buffer.buffer == VK_NULL_HANDLE || camera_scatter_bottom_buffer.size != buffer_size) {
2059 if (camera_scatter_bottom_buffer.buffer != VK_NULL_HANDLE) {
2060 destroyBuffer(camera_scatter_bottom_buffer);
2061 }
2062 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
2063 camera_scatter_bottom_buffer = createBuffer(buffer_size, usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
2064 descriptors_dirty = true;
2065 }
2066
2067 // Upload data to both buffers
2068 uploadBufferData(camera_scatter_top_buffer, scatter_top_cam.data(), buffer_size);
2069 uploadBufferData(camera_scatter_bottom_buffer, scatter_bottom_cam.data(), buffer_size);
2070 }
2071
2072 void VulkanComputeBackend::zeroCameraScatterBuffers(size_t launch_band_count_param) {
2073 if (primitive_count == 0 || launch_band_count_param == 0) {
2074 return; // No geometry or bands
2075 }
2076
2077 size_t buffer_size = primitive_count * launch_band_count_param * sizeof(float);
2078
2079 // Create or resize camera_scatter_top_buffer
2080 if (camera_scatter_top_buffer.buffer == VK_NULL_HANDLE || camera_scatter_top_buffer.size != buffer_size) {
2081 if (camera_scatter_top_buffer.buffer != VK_NULL_HANDLE) {
2082 destroyBuffer(camera_scatter_top_buffer);
2083 }
2084 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
2085 camera_scatter_top_buffer = createBuffer(buffer_size, usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
2086 descriptors_dirty = true;
2087 }
2088
2089 // Create or resize camera_scatter_bottom_buffer
2090 if (camera_scatter_bottom_buffer.buffer == VK_NULL_HANDLE || camera_scatter_bottom_buffer.size != buffer_size) {
2091 if (camera_scatter_bottom_buffer.buffer != VK_NULL_HANDLE) {
2092 destroyBuffer(camera_scatter_bottom_buffer);
2093 }
2094 VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
2095 camera_scatter_bottom_buffer = createBuffer(buffer_size, usage, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
2096 descriptors_dirty = true;
2097 }
2098
2099 // Zero both buffers
2100 zeroBuffer(camera_scatter_top_buffer);
2101 zeroBuffer(camera_scatter_bottom_buffer);
2102 }
2103
2104 void VulkanComputeBackend::uploadSourceFluxes(const std::vector<float> &fluxes) {
2105 if (fluxes.empty() || source_count == 0) {
2106 return; // No fluxes or sources
2107 }
2108
2109 // Fluxes are indexed by [source * Nbands_launch + band]
2110 // We expect Nsources * Nbands_launch entries
2111 size_t expected_size = source_count * launch_band_count;
2112
2113 if (fluxes.size() != expected_size && fluxes.size() != source_count) {
2114 // Allow single-band upload (size = Nsources) or full upload (size = Nsources * Nbands)
2115 if (fluxes.size() != source_count) {
2116 helios_runtime_error("ERROR (VulkanComputeBackend::uploadSourceFluxes): fluxes size mismatch. Expected " + std::to_string(source_count) + " (single band) or " + std::to_string(expected_size) + " (all bands), got " +
2117 std::to_string(fluxes.size()));
2118 }
2119 }
2120
2121 if (source_fluxes_buffer.buffer != VK_NULL_HANDLE) {
2122 destroyBuffer(source_fluxes_buffer);
2123 }
2124 source_fluxes_buffer = createBuffer(fluxes.size() * sizeof(float), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2125 uploadBufferData(source_fluxes_buffer, fluxes.data(), fluxes.size() * sizeof(float));
2126
2127 descriptors_dirty = true; // New buffer created, descriptors need update
2128 }
2129
2130 void VulkanComputeBackend::uploadSourceFluxesCam(const std::vector<float> &fluxes_cam) {
2131 if (fluxes_cam.empty() || source_count == 0) {
2132 return; // No camera weights or sources
2133 }
2134
2135 // Camera spectral response weights indexed by [source * Nbands_launch + band]
2136 size_t expected_size = source_count * launch_band_count;
2137
2138 if (fluxes_cam.size() != expected_size) {
2139 helios_runtime_error("ERROR (VulkanComputeBackend::uploadSourceFluxesCam): fluxes_cam size mismatch. Expected " + std::to_string(expected_size) + " (Nsources * Nbands_launch), got " + std::to_string(fluxes_cam.size()));
2140 }
2141
2142 if (source_fluxes_cam_buffer.buffer != VK_NULL_HANDLE) {
2143 destroyBuffer(source_fluxes_cam_buffer);
2144 }
2145 source_fluxes_cam_buffer = createBuffer(fluxes_cam.size() * sizeof(float), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2146 uploadBufferData(source_fluxes_cam_buffer, fluxes_cam.data(), fluxes_cam.size() * sizeof(float));
2147
2148 descriptors_dirty = true; // New buffer uploaded, descriptors need update
2149 }
2150
2152 // Query VMA statistics
2153 VmaTotalStatistics stats;
2154 vmaCalculateStatistics(device->getAllocator(), &stats);
2155
2156 std::cout << "========== Vulkan Memory Usage ==========" << std::endl;
2157 std::cout << "Allocated blocks: " << stats.total.statistics.blockCount << std::endl;
2158 std::cout << "Allocated memory: " << (stats.total.statistics.allocationBytes / 1024.0 / 1024.0) << " MB" << std::endl;
2159 std::cout << "Used memory: " << (stats.total.statistics.blockBytes / 1024.0 / 1024.0) << " MB" << std::endl;
2160
2161 // Query physical device memory properties
2162 VkPhysicalDeviceMemoryProperties mem_props;
2163 vkGetPhysicalDeviceMemoryProperties(device->getPhysicalDevice(), &mem_props);
2164
2165 std::cout << "\nTotal device memory heaps: " << mem_props.memoryHeapCount << std::endl;
2166 for (uint32_t i = 0; i < mem_props.memoryHeapCount; ++i) {
2167 std::cout << " Heap " << i << ": " << (mem_props.memoryHeaps[i].size / 1024.0 / 1024.0 / 1024.0) << " GB";
2168 if (mem_props.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) {
2169 std::cout << " (device-local)";
2170 }
2171 std::cout << std::endl;
2172 }
2173 std::cout << "==========================================" << std::endl;
2174 }
2175
2176 // ========== Helper methods ==========
2177
2178 VulkanComputeBackend::Buffer VulkanComputeBackend::createBuffer(VkDeviceSize size, VkBufferUsageFlags usage, VmaMemoryUsage mem_usage) {
2179 Buffer buffer;
2180 buffer.size = size;
2181
2182 VkBufferCreateInfo buffer_info{};
2183 buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
2184 buffer_info.size = size;
2185 buffer_info.usage = usage | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
2186 buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
2187
2188 VmaAllocationCreateInfo alloc_info{};
2189 alloc_info.usage = mem_usage;
2190
2191 // CRITICAL for MoltenVK: Request host-coherent memory for compute shader result buffers
2192 // This matches working Vulkan compute examples that use HOST_VISIBLE | HOST_COHERENT
2193 if (mem_usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST) {
2194 alloc_info.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
2195 alloc_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
2196 }
2197
2198 // Optimization: Use dedicated memory for large GPU-only buffers (>64 MB)
2199 if (mem_usage == VMA_MEMORY_USAGE_GPU_ONLY && size > 64 * 1024 * 1024) {
2200 alloc_info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2201 }
2202
2203 // Optimization: Keep staging buffers persistently mapped for faster CPU access
2204 if (mem_usage == VMA_MEMORY_USAGE_CPU_ONLY) {
2205 alloc_info.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
2206 }
2207
2208 VkResult result = vmaCreateBuffer(device->getAllocator(), &buffer_info, &alloc_info, &buffer.buffer, &buffer.allocation, nullptr);
2209 if (result != VK_SUCCESS) {
2210 helios_runtime_error("ERROR (VulkanComputeBackend::createBuffer): Failed to create buffer. VkResult: " + std::to_string(result));
2211 }
2212
2213 return buffer;
2214 }
2215
2216 void VulkanComputeBackend::destroyBuffer(Buffer &buffer) {
2217 if (buffer.buffer != VK_NULL_HANDLE) {
2218 vmaDestroyBuffer(device->getAllocator(), buffer.buffer, buffer.allocation);
2219 buffer.buffer = VK_NULL_HANDLE;
2220 buffer.allocation = VK_NULL_HANDLE;
2221 buffer.size = 0;
2222 }
2223 }
2224
2225 void VulkanComputeBackend::uploadBufferData(Buffer &buffer, const void *data, size_t size) {
2226 // Create staging buffer
2227 Buffer staging = createBuffer(size, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VMA_MEMORY_USAGE_CPU_ONLY);
2228
2229 // Map and copy
2230 void *mapped;
2231 vmaMapMemory(device->getAllocator(), staging.allocation, &mapped);
2232 std::memcpy(mapped, data, size);
2233 vmaUnmapMemory(device->getAllocator(), staging.allocation);
2234
2235 // Copy staging → device buffer
2236 VkCommandBufferBeginInfo begin_info{};
2237 begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
2238 begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
2239 vkBeginCommandBuffer(transfer_command_buffer, &begin_info);
2240
2241 VkBufferCopy copy_region{};
2242 copy_region.size = size;
2243 vkCmdCopyBuffer(transfer_command_buffer, staging.buffer, buffer.buffer, 1, &copy_region);
2244
2245 // Add memory barrier to ensure transfer completes before compute shader access
2246 VkBufferMemoryBarrier barrier{};
2247 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
2248 barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
2249 barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
2250 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2251 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2252 barrier.buffer = buffer.buffer;
2253 barrier.offset = 0;
2254 barrier.size = VK_WHOLE_SIZE;
2255
2256 vkCmdPipelineBarrier(transfer_command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 1, &barrier, 0, nullptr);
2257
2258 vkEndCommandBuffer(transfer_command_buffer);
2259
2260 // Submit with fence
2261 VkSubmitInfo submit_info{};
2262 submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
2263 submit_info.commandBufferCount = 1;
2264 submit_info.pCommandBuffers = &transfer_command_buffer;
2265
2266 vkResetFences(device->getDevice(), 1, &transfer_fence);
2267 vkQueueSubmit(device->getComputeQueue(), 1, &submit_info, transfer_fence);
2268
2269 // Polling-based timeout (MoltenVK doesn't respect timeout parameter)
2270 const int max_attempts = 50;
2271 const uint64_t poll_interval_ns = 100000000ULL; // 100ms
2272 bool completed = false;
2273
2274 for (int attempt = 0; attempt < max_attempts; ++attempt) {
2275 VkResult result = vkWaitForFences(device->getDevice(), 1, &transfer_fence, VK_TRUE, poll_interval_ns);
2276 if (result == VK_SUCCESS) {
2277 completed = true;
2278 break;
2279 } else if (result != VK_TIMEOUT) {
2280 helios_runtime_error("ERROR (VulkanComputeBackend::uploadBufferData): vkWaitForFences failed. VkResult: " + std::to_string(result));
2281 }
2282 }
2283
2284 if (!completed) {
2285 helios_runtime_error("ERROR (VulkanComputeBackend::uploadBufferData): GPU buffer upload timed out after 5 seconds. Buffer size: " + std::to_string(size));
2286 }
2287
2288 // SAFETY: Staging buffer destroyed only after fence signals (GPU copy complete)
2289 destroyBuffer(staging);
2290 }
2291
2292 void VulkanComputeBackend::downloadBufferData(const Buffer &buffer, void *data, size_t size) {
2293 // CRITICAL: Wait for ALL GPU work to complete before downloading
2294 // This ensures compute shader writes are visible to the transfer operation
2295 vkQueueWaitIdle(device->getComputeQueue());
2296
2297 // Create staging buffer
2298 Buffer staging = createBuffer(size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_CPU_ONLY);
2299
2300 // Copy device → staging
2301 VkCommandBufferBeginInfo begin_info{};
2302 begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
2303 begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
2304 vkBeginCommandBuffer(transfer_command_buffer, &begin_info);
2305
2306 // Add memory barrier to ensure compute shader writes complete before transfer
2307 VkBufferMemoryBarrier barrier{};
2308 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
2309 barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
2310 barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
2311 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2312 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2313 barrier.buffer = buffer.buffer;
2314 barrier.offset = 0;
2315 barrier.size = VK_WHOLE_SIZE;
2316
2317 vkCmdPipelineBarrier(transfer_command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 1, &barrier, 0, nullptr);
2318
2319 VkBufferCopy copy_region{};
2320 copy_region.size = size;
2321 vkCmdCopyBuffer(transfer_command_buffer, buffer.buffer, staging.buffer, 1, &copy_region);
2322
2323 vkEndCommandBuffer(transfer_command_buffer);
2324
2325 // Submit with fence
2326 VkSubmitInfo submit_info{};
2327 submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
2328 submit_info.commandBufferCount = 1;
2329 submit_info.pCommandBuffers = &transfer_command_buffer;
2330
2331 vkResetFences(device->getDevice(), 1, &transfer_fence);
2332 vkQueueSubmit(device->getComputeQueue(), 1, &submit_info, transfer_fence);
2333
2334 // Polling-based timeout (MoltenVK doesn't respect timeout parameter)
2335 const int max_attempts = 50;
2336 const uint64_t poll_interval_ns = 100000000ULL; // 100ms
2337 bool completed = false;
2338
2339 for (int attempt = 0; attempt < max_attempts; ++attempt) {
2340 VkResult result = vkWaitForFences(device->getDevice(), 1, &transfer_fence, VK_TRUE, poll_interval_ns);
2341 if (result == VK_SUCCESS) {
2342 completed = true;
2343 break;
2344 } else if (result != VK_TIMEOUT) {
2345 helios_runtime_error("ERROR (VulkanComputeBackend::downloadBufferData): vkWaitForFences failed. VkResult: " + std::to_string(result));
2346 }
2347 }
2348
2349 if (!completed) {
2350 helios_runtime_error("ERROR (VulkanComputeBackend::downloadBufferData): GPU buffer download timed out after 5 seconds. Buffer size: " + std::to_string(size));
2351 }
2352
2353 // Map and read
2354 void *mapped;
2355 vmaMapMemory(device->getAllocator(), staging.allocation, &mapped);
2356 std::memcpy(data, mapped, size);
2357 vmaUnmapMemory(device->getAllocator(), staging.allocation);
2358
2359 // Cleanup staging
2360 destroyBuffer(staging);
2361 }
2362
2363 void VulkanComputeBackend::zeroBuffer(Buffer &buffer) {
2364 VkCommandBufferBeginInfo begin_info{};
2365 begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
2366 begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
2367 vkBeginCommandBuffer(transfer_command_buffer, &begin_info);
2368
2369 vkCmdFillBuffer(transfer_command_buffer, buffer.buffer, 0, buffer.size, 0);
2370
2371 vkEndCommandBuffer(transfer_command_buffer);
2372
2373 // Submit with fence
2374 VkSubmitInfo submit_info{};
2375 submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
2376 submit_info.commandBufferCount = 1;
2377 submit_info.pCommandBuffers = &transfer_command_buffer;
2378
2379 vkResetFences(device->getDevice(), 1, &transfer_fence);
2380 vkQueueSubmit(device->getComputeQueue(), 1, &submit_info, transfer_fence);
2381
2382 // Polling-based timeout (MoltenVK doesn't respect timeout parameter)
2383 const int max_attempts = 50;
2384 const uint64_t poll_interval_ns = 100000000ULL; // 100ms
2385 bool completed = false;
2386
2387 for (int attempt = 0; attempt < max_attempts; ++attempt) {
2388 VkResult result = vkWaitForFences(device->getDevice(), 1, &transfer_fence, VK_TRUE, poll_interval_ns);
2389 if (result == VK_SUCCESS) {
2390 completed = true;
2391 break;
2392 } else if (result != VK_TIMEOUT) {
2393 helios_runtime_error("ERROR (VulkanComputeBackend::zeroBuffer): vkWaitForFences failed. VkResult: " + std::to_string(result));
2394 }
2395 }
2396
2397 if (!completed) {
2398 helios_runtime_error("ERROR (VulkanComputeBackend::zeroBuffer): GPU buffer clear timed out after 5 seconds. Buffer size: " + std::to_string(buffer.size));
2399 }
2400 }
2401
2402 void VulkanComputeBackend::createCommandResources() {
2403 VkDevice vk_device = device->getDevice();
2404
2405 VkCommandPoolCreateInfo pool_info{};
2406 pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
2407 pool_info.queueFamilyIndex = device->getComputeQueueFamily();
2408 pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
2409
2410 VkResult result = vkCreateCommandPool(vk_device, &pool_info, nullptr, &command_pool);
2411 if (result != VK_SUCCESS) {
2412 helios_runtime_error("ERROR (VulkanComputeBackend::createCommandResources): Failed to create command pool. VkResult: " + std::to_string(result));
2413 }
2414
2415 // Allocate TWO command buffers: one for transfers, one for compute
2416 VkCommandBufferAllocateInfo alloc_info{};
2417 alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
2418 alloc_info.commandPool = command_pool;
2419 alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
2420 alloc_info.commandBufferCount = 2;
2421
2422 VkCommandBuffer buffers[2];
2423 result = vkAllocateCommandBuffers(vk_device, &alloc_info, buffers);
2424 if (result != VK_SUCCESS) {
2425 helios_runtime_error("ERROR (VulkanComputeBackend::createCommandResources): Failed to allocate command buffers. VkResult: " + std::to_string(result));
2426 }
2427
2428 transfer_command_buffer = buffers[0];
2429 compute_command_buffer = buffers[1];
2430
2431 // Create fences for synchronization
2432 VkFenceCreateInfo fence_info{};
2433 fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
2434 fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT; // Start signaled so first wait succeeds
2435
2436 result = vkCreateFence(vk_device, &fence_info, nullptr, &transfer_fence);
2437 if (result != VK_SUCCESS) {
2438 helios_runtime_error("ERROR (VulkanComputeBackend::createCommandResources): Failed to create transfer fence. VkResult: " + std::to_string(result));
2439 }
2440
2441 result = vkCreateFence(vk_device, &fence_info, nullptr, &compute_fence);
2442 if (result != VK_SUCCESS) {
2443 helios_runtime_error("ERROR (VulkanComputeBackend::createCommandResources): Failed to create compute fence. VkResult: " + std::to_string(result));
2444 }
2445
2446 // Create timestamp query pool for GPU profiling (2 queries: start and end)
2447 VkQueryPoolCreateInfo query_pool_info{};
2448 query_pool_info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
2449 query_pool_info.queryType = VK_QUERY_TYPE_TIMESTAMP;
2450 query_pool_info.queryCount = 2;
2451
2452 result = vkCreateQueryPool(vk_device, &query_pool_info, nullptr, &timestamp_query_pool);
2453 if (result != VK_SUCCESS) {
2454 helios_runtime_error("ERROR (VulkanComputeBackend::createCommandResources): Failed to create timestamp query pool. VkResult: " + std::to_string(result));
2455 }
2456
2457 // Get timestamp period (nanoseconds per tick) for converting timestamps to time
2458 VkPhysicalDeviceProperties props;
2459 vkGetPhysicalDeviceProperties(device->getPhysicalDevice(), &props);
2460 timestamp_period = props.limits.timestampPeriod;
2461 }
2462
2463 void VulkanComputeBackend::createDescriptorSets() {
2464 VkDevice vk_device = device->getDevice();
2465
2466 // ========== Create Descriptor Set Layouts ==========
2467
2468 // Set 0: Geometry buffers (changes on geometry update)
2469 std::vector<VkDescriptorSetLayoutBinding> geometry_bindings = {
2470 {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // BVH nodes
2471 {1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Primitive indices
2472 {2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Transform matrices
2473 {3, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Primitive types
2474 {4, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Primitive UUIDs
2475 {5, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Primitive positions
2476 {6, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Object subdivisions
2477 {7, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Twosided flags
2478 {8, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Patch vertices
2479 {9, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Triangle vertices
2480 {10, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // World-space normals
2481 {11, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Mask data (uint)
2482 {12, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Mask sizes (ivec2)
2483 {13, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Mask offsets (uint)
2484 {14, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Mask IDs (int)
2485 {15, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // UV data (vec2)
2486 {16, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // UV IDs (int)
2487 {17, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Bbox vertices (periodic boundary)
2488 };
2489
2490 VkDescriptorSetLayoutCreateInfo layout_info{};
2491 layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
2492 layout_info.bindingCount = static_cast<uint32_t>(geometry_bindings.size());
2493 layout_info.pBindings = geometry_bindings.data();
2494
2495 if (vkCreateDescriptorSetLayout(vk_device, &layout_info, nullptr, &set_layout_geometry) != VK_SUCCESS) {
2496 helios_runtime_error("ERROR (VulkanComputeBackend::createDescriptorSets): Failed to create geometry descriptor set layout");
2497 }
2498
2499 // Set 1: Material/Source buffers (changes per simulation)
2500 std::vector<VkDescriptorSetLayoutBinding> material_bindings = {
2501 {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Source positions
2502 {1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Source types
2503 {2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Source rotations
2504 {3, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Source widths
2505 {4, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Source fluxes
2506 {5, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Reflectivity
2507 {6, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Transmissivity
2508 {7, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Specular exponent
2509 {8, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Specular scale
2510 {9, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Source fluxes (camera-weighted)
2511 {10, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // Band map
2512 };
2513
2514 layout_info.bindingCount = static_cast<uint32_t>(material_bindings.size());
2515 layout_info.pBindings = material_bindings.data();
2516
2517 if (vkCreateDescriptorSetLayout(vk_device, &layout_info, nullptr, &set_layout_materials) != VK_SUCCESS) {
2518 helios_runtime_error("ERROR (VulkanComputeBackend::createDescriptorSets): Failed to create material descriptor set layout");
2519 }
2520
2521 // Set 2: Result buffers (read/write, zeroed per-launch)
2522 std::vector<VkDescriptorSetLayoutBinding> result_bindings = {
2523 {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // radiation_in
2524 {1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // radiation_out_top
2525 {2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // radiation_out_bottom
2526 {3, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // scatter_top
2527 {4, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // scatter_bottom
2528 {5, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // camera_radiation
2529 {6, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // camera_pixel_label
2530 {7, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // camera_pixel_depth
2531 {8, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // camera_scatter_top
2532 {9, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // camera_scatter_bottom
2533 {10, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // radiation_specular
2534 };
2535
2536 layout_info.bindingCount = static_cast<uint32_t>(result_bindings.size());
2537 layout_info.pBindings = result_bindings.data();
2538
2539 if (vkCreateDescriptorSetLayout(vk_device, &layout_info, nullptr, &set_layout_results) != VK_SUCCESS) {
2540 helios_runtime_error("ERROR (VulkanComputeBackend::createDescriptorSets): Failed to create result descriptor set layout");
2541 }
2542
2543 // Set 3: Sky parameters (read-only diffuse parameters)
2544 std::vector<VkDescriptorSetLayoutBinding> sky_bindings = {
2545 {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // diffuse_flux
2546 {1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // diffuse_peak_dir
2547 {2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // diffuse_extinction
2548 {3, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // diffuse_dist_norm
2549 {4, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // sky_radiance_params
2550 {5, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // camera_sky_radiance
2551 {6, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // solar_disk_radiance
2552 };
2553
2554 layout_info.bindingCount = static_cast<uint32_t>(sky_bindings.size());
2555 layout_info.pBindings = sky_bindings.data();
2556
2557 if (vkCreateDescriptorSetLayout(vk_device, &layout_info, nullptr, &set_layout_sky) != VK_SUCCESS) {
2558 helios_runtime_error("ERROR (VulkanComputeBackend::createDescriptorSets): Failed to create sky descriptor set layout");
2559 }
2560
2561 // Set 4: Debug counters (profiling/diagnostics)
2562 std::vector<VkDescriptorSetLayoutBinding> debug_bindings = {
2563 {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}, // debug_counters
2564 };
2565
2566 layout_info.bindingCount = static_cast<uint32_t>(debug_bindings.size());
2567 layout_info.pBindings = debug_bindings.data();
2568
2569 if (vkCreateDescriptorSetLayout(vk_device, &layout_info, nullptr, &set_layout_debug) != VK_SUCCESS) {
2570 helios_runtime_error("ERROR (VulkanComputeBackend::createDescriptorSets): Failed to create debug descriptor set layout");
2571 }
2572
2573 // ========== Create Descriptor Pool ==========
2574
2575 std::vector<VkDescriptorPoolSize> pool_sizes = {
2576 {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 62}, // All sets: 18 geo + 11 mat + 11 result + 7 sky + 1 debug + margin
2577 };
2578
2579 VkDescriptorPoolCreateInfo pool_info{};
2580 pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
2581 pool_info.poolSizeCount = static_cast<uint32_t>(pool_sizes.size());
2582 pool_info.pPoolSizes = pool_sizes.data();
2583 pool_info.maxSets = 5; // geometry, materials, results, sky, debug
2584
2585 if (vkCreateDescriptorPool(vk_device, &pool_info, nullptr, &descriptor_pool) != VK_SUCCESS) {
2586 helios_runtime_error("ERROR (VulkanComputeBackend::createDescriptorSets): Failed to create descriptor pool");
2587 }
2588
2589 // ========== Allocate Descriptor Sets ==========
2590
2591 VkDescriptorSetLayout layouts[] = {set_layout_geometry, set_layout_materials, set_layout_results, set_layout_sky, set_layout_debug};
2592
2593 VkDescriptorSetAllocateInfo alloc_info{};
2594 alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
2595 alloc_info.descriptorPool = descriptor_pool;
2596 alloc_info.descriptorSetCount = 5;
2597 alloc_info.pSetLayouts = layouts;
2598
2599 VkDescriptorSet sets[5];
2600 if (vkAllocateDescriptorSets(vk_device, &alloc_info, sets) != VK_SUCCESS) {
2601 helios_runtime_error("ERROR (VulkanComputeBackend::createDescriptorSets): Failed to allocate descriptor sets");
2602 }
2603
2604 set_geometry = sets[0];
2605 set_materials = sets[1];
2606 set_results = sets[2];
2607 set_sky = sets[3];
2608 set_debug = sets[4];
2609
2610 // ========== Create placeholder sky parameter buffers ==========
2611 // MoltenVK requires all descriptor buffers to exist before pipeline creation
2612 // to determine argument buffer resource base types during shader compilation.
2613 // These will be resized properly when launchDiffuseRays() is first called.
2614
2615 const size_t placeholder_size = sizeof(float); // Minimal 1-element buffer
2616 // All placeholders include TRANSFER_DST for zeroing - VMA may reuse freed memory with stale data
2617
2618 diffuse_flux_buffer = createBuffer(placeholder_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2619 zeroBuffer(diffuse_flux_buffer);
2620 diffuse_peak_dir_buffer = createBuffer(sizeof(helios::vec3), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2621 zeroBuffer(diffuse_peak_dir_buffer);
2622 diffuse_extinction_buffer = createBuffer(placeholder_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2623 zeroBuffer(diffuse_extinction_buffer);
2624 diffuse_dist_norm_buffer = createBuffer(placeholder_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2625 zeroBuffer(diffuse_dist_norm_buffer);
2626 sky_radiance_params_buffer = createBuffer(sizeof(helios::vec4), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2627 zeroBuffer(sky_radiance_params_buffer);
2628 camera_sky_radiance_buffer = createBuffer(placeholder_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2629 zeroBuffer(camera_sky_radiance_buffer);
2630 solar_disk_radiance_buffer = createBuffer(placeholder_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2631 zeroBuffer(solar_disk_radiance_buffer);
2632
2633 // ========== Create placeholder camera result buffers ==========
2634 // MoltenVK requires these before pipeline creation (camera shaders reference them)
2635 camera_radiation_buffer = createBuffer(placeholder_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
2636 zeroBuffer(camera_radiation_buffer);
2637 camera_pixel_label_buffer = createBuffer(sizeof(uint32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
2638 zeroBuffer(camera_pixel_label_buffer);
2639 camera_pixel_depth_buffer = createBuffer(placeholder_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
2640 zeroBuffer(camera_pixel_depth_buffer);
2641 camera_scatter_top_buffer = createBuffer(placeholder_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
2642 zeroBuffer(camera_scatter_top_buffer);
2643 camera_scatter_bottom_buffer = createBuffer(placeholder_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_HOST);
2644 zeroBuffer(camera_scatter_bottom_buffer);
2645
2646 // ========== Create placeholder specular buffers ==========
2647 // MoltenVK requires these before pipeline creation (camera/direct shaders reference them)
2648 specular_exponent_buffer = createBuffer(placeholder_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2649 zeroBuffer(specular_exponent_buffer);
2650 specular_scale_buffer = createBuffer(placeholder_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2651 zeroBuffer(specular_scale_buffer);
2652 source_fluxes_cam_buffer = createBuffer(placeholder_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2653 zeroBuffer(source_fluxes_cam_buffer);
2654 radiation_specular_buffer = createBuffer(placeholder_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2655 zeroBuffer(radiation_specular_buffer);
2656
2657 // ========== Create placeholder mask/UV buffers ==========
2658 // Same requirement as sky parameters — needed before pipeline creation for MoltenVK
2659 mask_data_buffer = createBuffer(sizeof(uint32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2660 mask_sizes_buffer = createBuffer(sizeof(int32_t) * 2, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2661 mask_offsets_buffer = createBuffer(sizeof(uint32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2662 mask_IDs_buffer = createBuffer(sizeof(int32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2663 uv_data_buffer = createBuffer(sizeof(float) * 2, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2664 uv_IDs_buffer = createBuffer(sizeof(int32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2665 bbox_vertices_buffer = createBuffer(sizeof(float), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
2666
2667 // Update descriptor set 3 (sky parameters) with placeholder buffers
2668 // Note: Geometry/material/result buffers don't exist yet, so we only update set 3
2669 VkDescriptorBufferInfo diffuse_flux_info{};
2670 diffuse_flux_info.buffer = diffuse_flux_buffer.buffer;
2671 diffuse_flux_info.offset = 0;
2672 diffuse_flux_info.range = VK_WHOLE_SIZE;
2673
2674 VkDescriptorBufferInfo diffuse_peak_dir_info{};
2675 diffuse_peak_dir_info.buffer = diffuse_peak_dir_buffer.buffer;
2676 diffuse_peak_dir_info.offset = 0;
2677 diffuse_peak_dir_info.range = VK_WHOLE_SIZE;
2678
2679 VkDescriptorBufferInfo diffuse_extinction_info{};
2680 diffuse_extinction_info.buffer = diffuse_extinction_buffer.buffer;
2681 diffuse_extinction_info.offset = 0;
2682 diffuse_extinction_info.range = VK_WHOLE_SIZE;
2683
2684 VkDescriptorBufferInfo diffuse_dist_norm_info{};
2685 diffuse_dist_norm_info.buffer = diffuse_dist_norm_buffer.buffer;
2686 diffuse_dist_norm_info.offset = 0;
2687 diffuse_dist_norm_info.range = VK_WHOLE_SIZE;
2688
2689 VkDescriptorBufferInfo sky_radiance_params_info{};
2690 sky_radiance_params_info.buffer = sky_radiance_params_buffer.buffer;
2691 sky_radiance_params_info.offset = 0;
2692 sky_radiance_params_info.range = VK_WHOLE_SIZE;
2693
2694 std::vector<VkWriteDescriptorSet> descriptor_writes;
2695
2696 VkWriteDescriptorSet write{};
2697 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
2698 write.dstSet = set_sky;
2699 write.dstBinding = 0;
2700 write.dstArrayElement = 0;
2701 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2702 write.descriptorCount = 1;
2703 write.pBufferInfo = &diffuse_flux_info;
2704 descriptor_writes.push_back(write);
2705
2706 write.dstBinding = 1;
2707 write.pBufferInfo = &diffuse_peak_dir_info;
2708 descriptor_writes.push_back(write);
2709
2710 write.dstBinding = 2;
2711 write.pBufferInfo = &diffuse_extinction_info;
2712 descriptor_writes.push_back(write);
2713
2714 write.dstBinding = 3;
2715 write.pBufferInfo = &diffuse_dist_norm_info;
2716 descriptor_writes.push_back(write);
2717
2718 write.dstBinding = 4;
2719 write.pBufferInfo = &sky_radiance_params_info;
2720 descriptor_writes.push_back(write);
2721
2722 VkDescriptorBufferInfo camera_sky_radiance_info{};
2723 camera_sky_radiance_info.buffer = camera_sky_radiance_buffer.buffer;
2724 camera_sky_radiance_info.offset = 0;
2725 camera_sky_radiance_info.range = VK_WHOLE_SIZE;
2726
2727 write.dstBinding = 5;
2728 write.pBufferInfo = &camera_sky_radiance_info;
2729 descriptor_writes.push_back(write);
2730
2731 VkDescriptorBufferInfo solar_disk_radiance_info{};
2732 solar_disk_radiance_info.buffer = solar_disk_radiance_buffer.buffer;
2733 solar_disk_radiance_info.offset = 0;
2734 solar_disk_radiance_info.range = VK_WHOLE_SIZE;
2735
2736 write.dstBinding = 6;
2737 write.pBufferInfo = &solar_disk_radiance_info;
2738 descriptor_writes.push_back(write);
2739
2740 // Descriptor writes for camera result placeholder buffers (set_results bindings 5-9)
2741 VkDescriptorBufferInfo camera_radiation_info{};
2742 camera_radiation_info.buffer = camera_radiation_buffer.buffer;
2743 camera_radiation_info.offset = 0;
2744 camera_radiation_info.range = VK_WHOLE_SIZE;
2745
2746 VkDescriptorBufferInfo camera_pixel_label_info{};
2747 camera_pixel_label_info.buffer = camera_pixel_label_buffer.buffer;
2748 camera_pixel_label_info.offset = 0;
2749 camera_pixel_label_info.range = VK_WHOLE_SIZE;
2750
2751 VkDescriptorBufferInfo camera_pixel_depth_info{};
2752 camera_pixel_depth_info.buffer = camera_pixel_depth_buffer.buffer;
2753 camera_pixel_depth_info.offset = 0;
2754 camera_pixel_depth_info.range = VK_WHOLE_SIZE;
2755
2756 VkDescriptorBufferInfo camera_scatter_top_info{};
2757 camera_scatter_top_info.buffer = camera_scatter_top_buffer.buffer;
2758 camera_scatter_top_info.offset = 0;
2759 camera_scatter_top_info.range = VK_WHOLE_SIZE;
2760
2761 VkDescriptorBufferInfo camera_scatter_bottom_info{};
2762 camera_scatter_bottom_info.buffer = camera_scatter_bottom_buffer.buffer;
2763 camera_scatter_bottom_info.offset = 0;
2764 camera_scatter_bottom_info.range = VK_WHOLE_SIZE;
2765
2766 write.dstSet = set_results;
2767 write.dstBinding = 5;
2768 write.pBufferInfo = &camera_radiation_info;
2769 descriptor_writes.push_back(write);
2770
2771 write.dstBinding = 6;
2772 write.pBufferInfo = &camera_pixel_label_info;
2773 descriptor_writes.push_back(write);
2774
2775 write.dstBinding = 7;
2776 write.pBufferInfo = &camera_pixel_depth_info;
2777 descriptor_writes.push_back(write);
2778
2779 write.dstBinding = 8;
2780 write.pBufferInfo = &camera_scatter_top_info;
2781 descriptor_writes.push_back(write);
2782
2783 write.dstBinding = 9;
2784 write.pBufferInfo = &camera_scatter_bottom_info;
2785 descriptor_writes.push_back(write);
2786
2787 // Descriptor writes for mask/UV placeholder buffers (set_geometry bindings 11-16)
2788 VkDescriptorBufferInfo mask_data_info{};
2789 mask_data_info.buffer = mask_data_buffer.buffer;
2790 mask_data_info.offset = 0;
2791 mask_data_info.range = VK_WHOLE_SIZE;
2792
2793 VkDescriptorBufferInfo mask_sizes_info{};
2794 mask_sizes_info.buffer = mask_sizes_buffer.buffer;
2795 mask_sizes_info.offset = 0;
2796 mask_sizes_info.range = VK_WHOLE_SIZE;
2797
2798 VkDescriptorBufferInfo mask_offsets_info{};
2799 mask_offsets_info.buffer = mask_offsets_buffer.buffer;
2800 mask_offsets_info.offset = 0;
2801 mask_offsets_info.range = VK_WHOLE_SIZE;
2802
2803 VkDescriptorBufferInfo mask_IDs_info{};
2804 mask_IDs_info.buffer = mask_IDs_buffer.buffer;
2805 mask_IDs_info.offset = 0;
2806 mask_IDs_info.range = VK_WHOLE_SIZE;
2807
2808 VkDescriptorBufferInfo uv_data_info{};
2809 uv_data_info.buffer = uv_data_buffer.buffer;
2810 uv_data_info.offset = 0;
2811 uv_data_info.range = VK_WHOLE_SIZE;
2812
2813 VkDescriptorBufferInfo uv_IDs_info{};
2814 uv_IDs_info.buffer = uv_IDs_buffer.buffer;
2815 uv_IDs_info.offset = 0;
2816 uv_IDs_info.range = VK_WHOLE_SIZE;
2817
2818 write.dstSet = set_geometry;
2819 write.dstBinding = 11;
2820 write.pBufferInfo = &mask_data_info;
2821 descriptor_writes.push_back(write);
2822
2823 write.dstBinding = 12;
2824 write.pBufferInfo = &mask_sizes_info;
2825 descriptor_writes.push_back(write);
2826
2827 write.dstBinding = 13;
2828 write.pBufferInfo = &mask_offsets_info;
2829 descriptor_writes.push_back(write);
2830
2831 write.dstBinding = 14;
2832 write.pBufferInfo = &mask_IDs_info;
2833 descriptor_writes.push_back(write);
2834
2835 write.dstBinding = 15;
2836 write.pBufferInfo = &uv_data_info;
2837 descriptor_writes.push_back(write);
2838
2839 write.dstBinding = 16;
2840 write.pBufferInfo = &uv_IDs_info;
2841 descriptor_writes.push_back(write);
2842
2843 VkDescriptorBufferInfo bbox_verts_info{};
2844 bbox_verts_info.buffer = bbox_vertices_buffer.buffer;
2845 bbox_verts_info.offset = 0;
2846 bbox_verts_info.range = VK_WHOLE_SIZE;
2847
2848 write.dstBinding = 17;
2849 write.pBufferInfo = &bbox_verts_info;
2850 descriptor_writes.push_back(write);
2851
2852 vkUpdateDescriptorSets(vk_device, static_cast<uint32_t>(descriptor_writes.size()), descriptor_writes.data(), 0, nullptr);
2853 }
2854
2855 void VulkanComputeBackend::createPipelines() {
2856 VkDevice vk_device = device->getDevice();
2857
2858 // ========== Create Pipeline Layout ==========
2859
2860 VkDescriptorSetLayout set_layouts[] = {set_layout_geometry, set_layout_materials, set_layout_results, set_layout_sky, set_layout_debug};
2861
2862 // Push constants (128 bytes max for MoltenVK compatibility)
2863 const uint32_t push_constant_size = 128;
2864
2865 // Validate against device limits
2866 const VkPhysicalDeviceProperties &props = device->getDeviceProperties();
2867 if (push_constant_size > props.limits.maxPushConstantsSize) {
2868 helios_runtime_error("ERROR (VulkanComputeBackend::createPipelines): Push constant size (" + std::to_string(push_constant_size) + " bytes) exceeds device limit (" + std::to_string(props.limits.maxPushConstantsSize) + " bytes)");
2869 }
2870
2871 VkPushConstantRange push_constant_range{};
2872 push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
2873 push_constant_range.offset = 0;
2874 push_constant_range.size = push_constant_size;
2875
2876 VkPipelineLayoutCreateInfo pipeline_layout_info{};
2877 pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
2878 pipeline_layout_info.setLayoutCount = 5; // geometry, materials, results, sky, debug
2879 pipeline_layout_info.pSetLayouts = set_layouts;
2880 pipeline_layout_info.pushConstantRangeCount = 1;
2881 pipeline_layout_info.pPushConstantRanges = &push_constant_range;
2882
2883 if (vkCreatePipelineLayout(vk_device, &pipeline_layout_info, nullptr, &pipeline_layout) != VK_SUCCESS) {
2884 helios_runtime_error("ERROR (VulkanComputeBackend::createPipelines): Failed to create pipeline layout");
2885 }
2886
2887 // ========== Load Shaders ==========
2888
2889 // Shader paths (relative to build directory)
2890 std::string shader_dir = "plugins/radiation/";
2891
2892 VkShaderModule shader_direct = loadShader(shader_dir + "direct_raygen.spv");
2893 VkShaderModule shader_diffuse = loadShader(shader_dir + "diffuse_raygen.spv");
2894 VkShaderModule shader_camera = loadShader(shader_dir + "camera_raygen.spv");
2895 VkShaderModule shader_pixel_label = loadShader(shader_dir + "pixel_label_raygen.spv");
2896
2897 // ========== Create Compute Pipelines ==========
2898
2899 VkComputePipelineCreateInfo pipeline_info{};
2900 pipeline_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
2901 pipeline_info.layout = pipeline_layout;
2902
2903 // Direct ray pipeline
2904 pipeline_info.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
2905 pipeline_info.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
2906 pipeline_info.stage.module = shader_direct;
2907 pipeline_info.stage.pName = "main";
2908
2909 if (vkCreateComputePipelines(vk_device, VK_NULL_HANDLE, 1, &pipeline_info, nullptr, &pipeline_direct) != VK_SUCCESS) {
2910 helios_runtime_error("ERROR (VulkanComputeBackend::createPipelines): Failed to create direct ray pipeline");
2911 }
2912
2913 // Diffuse ray pipeline
2914 pipeline_info.stage.module = shader_diffuse;
2915 if (vkCreateComputePipelines(vk_device, VK_NULL_HANDLE, 1, &pipeline_info, nullptr, &pipeline_diffuse) != VK_SUCCESS) {
2916 helios_runtime_error("ERROR (VulkanComputeBackend::createPipelines): Failed to create diffuse ray pipeline");
2917 }
2918
2919 // Camera ray pipeline
2920 pipeline_info.stage.module = shader_camera;
2921 if (vkCreateComputePipelines(vk_device, VK_NULL_HANDLE, 1, &pipeline_info, nullptr, &pipeline_camera) != VK_SUCCESS) {
2922 helios_runtime_error("ERROR (VulkanComputeBackend::createPipelines): Failed to create camera ray pipeline");
2923 }
2924
2925 // Pixel label pipeline
2926 pipeline_info.stage.module = shader_pixel_label;
2927 if (vkCreateComputePipelines(vk_device, VK_NULL_HANDLE, 1, &pipeline_info, nullptr, &pipeline_pixel_label) != VK_SUCCESS) {
2928 helios_runtime_error("ERROR (VulkanComputeBackend::createPipelines): Failed to create pixel label pipeline");
2929 }
2930
2931 // Cleanup shader modules (no longer needed after pipeline creation)
2932 vkDestroyShaderModule(vk_device, shader_direct, nullptr);
2933 vkDestroyShaderModule(vk_device, shader_diffuse, nullptr);
2934 vkDestroyShaderModule(vk_device, shader_camera, nullptr);
2935 vkDestroyShaderModule(vk_device, shader_pixel_label, nullptr);
2936 }
2937
2938 VkShaderModule VulkanComputeBackend::loadShader(const std::string &filename) {
2939 // Read SPIR-V file
2940 std::ifstream file(filename, std::ios::binary | std::ios::ate);
2941 if (!file.is_open()) {
2942 helios_runtime_error("ERROR (VulkanComputeBackend::loadShader): Failed to open shader file: " + filename);
2943 }
2944
2945 size_t file_size = file.tellg();
2946 if (file_size == 0) {
2947 helios_runtime_error("ERROR (VulkanComputeBackend::loadShader): Shader file is empty: " + filename);
2948 }
2949 if (file_size % 4 != 0) {
2950 helios_runtime_error("ERROR (VulkanComputeBackend::loadShader): Invalid SPIR-V file size (not multiple of 4 bytes): " + filename);
2951 }
2952
2953 std::vector<uint32_t> code(file_size / 4);
2954 file.seekg(0);
2955 file.read(reinterpret_cast<char *>(code.data()), file_size);
2956 file.close();
2957
2958 // Validate SPIR-V magic number (0x07230203)
2959 if (code.empty() || code[0] != 0x07230203) {
2960 helios_runtime_error("ERROR (VulkanComputeBackend::loadShader): Invalid SPIR-V magic number in: " + filename + ". Expected 0x07230203, got 0x" + std::to_string(code.empty() ? 0 : code[0]));
2961 }
2962
2963 // Create shader module
2964 VkShaderModuleCreateInfo create_info{};
2965 create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
2966 create_info.codeSize = code.size() * sizeof(uint32_t);
2967 create_info.pCode = code.data();
2968
2969 VkShaderModule shader_module;
2970 VkResult result = vkCreateShaderModule(device->getDevice(), &create_info, nullptr, &shader_module);
2971 if (result != VK_SUCCESS) {
2972 helios_runtime_error("ERROR (VulkanComputeBackend::loadShader): Failed to create shader module from: " + filename + " (VkResult: " + std::to_string(result) + ")");
2973 }
2974
2975 return shader_module;
2976 }
2977
2978 void VulkanComputeBackend::updateDescriptorSets() {
2979 VkDevice vk_device = device->getDevice();
2980
2981 std::vector<VkWriteDescriptorSet> descriptor_writes;
2982
2983 // ========== Set 0: Geometry Buffers ==========
2984
2985 VkDescriptorBufferInfo bvh_info{};
2986 bvh_info.buffer = bvh_buffer.buffer;
2987 bvh_info.offset = 0;
2988 bvh_info.range = VK_WHOLE_SIZE;
2989
2990 VkDescriptorBufferInfo prim_indices_info{};
2991 prim_indices_info.buffer = primitive_indices_buffer.buffer;
2992 prim_indices_info.offset = 0;
2993 prim_indices_info.range = VK_WHOLE_SIZE;
2994
2995 VkDescriptorBufferInfo transform_info{};
2996 transform_info.buffer = transform_matrices_buffer.buffer;
2997 transform_info.offset = 0;
2998 transform_info.range = VK_WHOLE_SIZE;
2999
3000 VkDescriptorBufferInfo prim_types_info{};
3001 prim_types_info.buffer = primitive_types_buffer.buffer;
3002 prim_types_info.offset = 0;
3003 prim_types_info.range = VK_WHOLE_SIZE;
3004
3005 VkDescriptorBufferInfo prim_uuids_info{};
3006 prim_uuids_info.buffer = primitive_uuids_buffer.buffer;
3007 prim_uuids_info.offset = 0;
3008 prim_uuids_info.range = VK_WHOLE_SIZE;
3009
3010 VkDescriptorBufferInfo prim_positions_info{};
3011 prim_positions_info.buffer = primitive_positions_buffer.buffer;
3012 prim_positions_info.offset = 0;
3013 prim_positions_info.range = VK_WHOLE_SIZE;
3014
3015 VkDescriptorBufferInfo obj_subdivisions_info{};
3016 obj_subdivisions_info.buffer = object_subdivisions_buffer.buffer;
3017 obj_subdivisions_info.offset = 0;
3018 obj_subdivisions_info.range = VK_WHOLE_SIZE;
3019
3020 VkDescriptorBufferInfo twosided_info{};
3021 twosided_info.buffer = twosided_flag_buffer.buffer;
3022 twosided_info.offset = 0;
3023 twosided_info.range = VK_WHOLE_SIZE;
3024
3025 VkDescriptorBufferInfo patch_vertices_info{};
3026 patch_vertices_info.buffer = patch_vertices_buffer.buffer;
3027 patch_vertices_info.offset = 0;
3028 patch_vertices_info.range = VK_WHOLE_SIZE;
3029
3030 VkDescriptorBufferInfo triangle_vertices_info{};
3031 triangle_vertices_info.buffer = triangle_vertices_buffer.buffer;
3032 triangle_vertices_info.offset = 0;
3033 triangle_vertices_info.range = VK_WHOLE_SIZE;
3034
3035 VkDescriptorBufferInfo normal_info{};
3036 normal_info.buffer = normal_buffer.buffer;
3037 normal_info.offset = 0;
3038 normal_info.range = VK_WHOLE_SIZE;
3039
3040 // Only add descriptor writes for non-null buffers
3041 if (bvh_buffer.buffer != VK_NULL_HANDLE) {
3042 VkWriteDescriptorSet write{};
3043 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3044 write.dstSet = set_geometry;
3045 write.dstBinding = 0;
3046 write.dstArrayElement = 0;
3047 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3048 write.descriptorCount = 1;
3049 write.pBufferInfo = &bvh_info;
3050 descriptor_writes.push_back(write);
3051 }
3052
3053 if (primitive_indices_buffer.buffer != VK_NULL_HANDLE) {
3054 VkWriteDescriptorSet write{};
3055 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3056 write.dstSet = set_geometry;
3057 write.dstBinding = 1;
3058 write.dstArrayElement = 0;
3059 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3060 write.descriptorCount = 1;
3061 write.pBufferInfo = &prim_indices_info;
3062 descriptor_writes.push_back(write);
3063 }
3064
3065 if (transform_matrices_buffer.buffer != VK_NULL_HANDLE) {
3066 VkWriteDescriptorSet write{};
3067 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3068 write.dstSet = set_geometry;
3069 write.dstBinding = 2;
3070 write.dstArrayElement = 0;
3071 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3072 write.descriptorCount = 1;
3073 write.pBufferInfo = &transform_info;
3074 descriptor_writes.push_back(write);
3075 }
3076
3077 if (primitive_types_buffer.buffer != VK_NULL_HANDLE) {
3078 VkWriteDescriptorSet write{};
3079 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3080 write.dstSet = set_geometry;
3081 write.dstBinding = 3;
3082 write.dstArrayElement = 0;
3083 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3084 write.descriptorCount = 1;
3085 write.pBufferInfo = &prim_types_info;
3086 descriptor_writes.push_back(write);
3087 }
3088
3089 if (primitive_uuids_buffer.buffer != VK_NULL_HANDLE) {
3090 VkWriteDescriptorSet write{};
3091 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3092 write.dstSet = set_geometry;
3093 write.dstBinding = 4;
3094 write.dstArrayElement = 0;
3095 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3096 write.descriptorCount = 1;
3097 write.pBufferInfo = &prim_uuids_info;
3098 descriptor_writes.push_back(write);
3099 }
3100
3101 if (primitive_positions_buffer.buffer != VK_NULL_HANDLE) {
3102 VkWriteDescriptorSet write{};
3103 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3104 write.dstSet = set_geometry;
3105 write.dstBinding = 5;
3106 write.dstArrayElement = 0;
3107 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3108 write.descriptorCount = 1;
3109 write.pBufferInfo = &prim_positions_info;
3110 descriptor_writes.push_back(write);
3111 }
3112
3113 if (object_subdivisions_buffer.buffer != VK_NULL_HANDLE) {
3114 VkWriteDescriptorSet write{};
3115 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3116 write.dstSet = set_geometry;
3117 write.dstBinding = 6;
3118 write.dstArrayElement = 0;
3119 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3120 write.descriptorCount = 1;
3121 write.pBufferInfo = &obj_subdivisions_info;
3122 descriptor_writes.push_back(write);
3123 }
3124
3125 if (twosided_flag_buffer.buffer != VK_NULL_HANDLE) {
3126 VkWriteDescriptorSet write{};
3127 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3128 write.dstSet = set_geometry;
3129 write.dstBinding = 7;
3130 write.dstArrayElement = 0;
3131 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3132 write.descriptorCount = 1;
3133 write.pBufferInfo = &twosided_info;
3134 descriptor_writes.push_back(write);
3135 }
3136
3137 if (patch_vertices_buffer.buffer != VK_NULL_HANDLE) {
3138 VkWriteDescriptorSet write{};
3139 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3140 write.dstSet = set_geometry;
3141 write.dstBinding = 8;
3142 write.dstArrayElement = 0;
3143 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3144 write.descriptorCount = 1;
3145 write.pBufferInfo = &patch_vertices_info;
3146 descriptor_writes.push_back(write);
3147 }
3148
3149 if (triangle_vertices_buffer.buffer != VK_NULL_HANDLE) {
3150 VkWriteDescriptorSet write{};
3151 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3152 write.dstSet = set_geometry;
3153 write.dstBinding = 9;
3154 write.dstArrayElement = 0;
3155 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3156 write.descriptorCount = 1;
3157 write.pBufferInfo = &triangle_vertices_info;
3158 descriptor_writes.push_back(write);
3159 }
3160
3161 if (normal_buffer.buffer != VK_NULL_HANDLE) {
3162 VkWriteDescriptorSet write{};
3163 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3164 write.dstSet = set_geometry;
3165 write.dstBinding = 10;
3166 write.dstArrayElement = 0;
3167 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3168 write.descriptorCount = 1;
3169 write.pBufferInfo = &normal_info;
3170 descriptor_writes.push_back(write);
3171 }
3172
3173 // Mask/UV texture data buffers (bindings 11-16)
3174 VkDescriptorBufferInfo mask_data_info{};
3175 mask_data_info.buffer = mask_data_buffer.buffer;
3176 mask_data_info.offset = 0;
3177 mask_data_info.range = VK_WHOLE_SIZE;
3178
3179 VkDescriptorBufferInfo mask_sizes_info{};
3180 mask_sizes_info.buffer = mask_sizes_buffer.buffer;
3181 mask_sizes_info.offset = 0;
3182 mask_sizes_info.range = VK_WHOLE_SIZE;
3183
3184 VkDescriptorBufferInfo mask_offsets_info{};
3185 mask_offsets_info.buffer = mask_offsets_buffer.buffer;
3186 mask_offsets_info.offset = 0;
3187 mask_offsets_info.range = VK_WHOLE_SIZE;
3188
3189 VkDescriptorBufferInfo mask_IDs_info{};
3190 mask_IDs_info.buffer = mask_IDs_buffer.buffer;
3191 mask_IDs_info.offset = 0;
3192 mask_IDs_info.range = VK_WHOLE_SIZE;
3193
3194 VkDescriptorBufferInfo uv_data_info{};
3195 uv_data_info.buffer = uv_data_buffer.buffer;
3196 uv_data_info.offset = 0;
3197 uv_data_info.range = VK_WHOLE_SIZE;
3198
3199 VkDescriptorBufferInfo uv_IDs_info{};
3200 uv_IDs_info.buffer = uv_IDs_buffer.buffer;
3201 uv_IDs_info.offset = 0;
3202 uv_IDs_info.range = VK_WHOLE_SIZE;
3203
3204 if (mask_data_buffer.buffer != VK_NULL_HANDLE) {
3205 VkWriteDescriptorSet write{};
3206 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3207 write.dstSet = set_geometry;
3208 write.dstBinding = 11;
3209 write.dstArrayElement = 0;
3210 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3211 write.descriptorCount = 1;
3212 write.pBufferInfo = &mask_data_info;
3213 descriptor_writes.push_back(write);
3214 }
3215
3216 if (mask_sizes_buffer.buffer != VK_NULL_HANDLE) {
3217 VkWriteDescriptorSet write{};
3218 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3219 write.dstSet = set_geometry;
3220 write.dstBinding = 12;
3221 write.dstArrayElement = 0;
3222 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3223 write.descriptorCount = 1;
3224 write.pBufferInfo = &mask_sizes_info;
3225 descriptor_writes.push_back(write);
3226 }
3227
3228 if (mask_offsets_buffer.buffer != VK_NULL_HANDLE) {
3229 VkWriteDescriptorSet write{};
3230 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3231 write.dstSet = set_geometry;
3232 write.dstBinding = 13;
3233 write.dstArrayElement = 0;
3234 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3235 write.descriptorCount = 1;
3236 write.pBufferInfo = &mask_offsets_info;
3237 descriptor_writes.push_back(write);
3238 }
3239
3240 if (mask_IDs_buffer.buffer != VK_NULL_HANDLE) {
3241 VkWriteDescriptorSet write{};
3242 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3243 write.dstSet = set_geometry;
3244 write.dstBinding = 14;
3245 write.dstArrayElement = 0;
3246 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3247 write.descriptorCount = 1;
3248 write.pBufferInfo = &mask_IDs_info;
3249 descriptor_writes.push_back(write);
3250 }
3251
3252 if (uv_data_buffer.buffer != VK_NULL_HANDLE) {
3253 VkWriteDescriptorSet write{};
3254 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3255 write.dstSet = set_geometry;
3256 write.dstBinding = 15;
3257 write.dstArrayElement = 0;
3258 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3259 write.descriptorCount = 1;
3260 write.pBufferInfo = &uv_data_info;
3261 descriptor_writes.push_back(write);
3262 }
3263
3264 if (uv_IDs_buffer.buffer != VK_NULL_HANDLE) {
3265 VkWriteDescriptorSet write{};
3266 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3267 write.dstSet = set_geometry;
3268 write.dstBinding = 16;
3269 write.dstArrayElement = 0;
3270 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3271 write.descriptorCount = 1;
3272 write.pBufferInfo = &uv_IDs_info;
3273 descriptor_writes.push_back(write);
3274 }
3275
3276 VkDescriptorBufferInfo bbox_verts_info{};
3277 bbox_verts_info.buffer = bbox_vertices_buffer.buffer;
3278 bbox_verts_info.offset = 0;
3279 bbox_verts_info.range = VK_WHOLE_SIZE;
3280
3281 if (bbox_vertices_buffer.buffer != VK_NULL_HANDLE) {
3282 VkWriteDescriptorSet write{};
3283 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3284 write.dstSet = set_geometry;
3285 write.dstBinding = 17;
3286 write.dstArrayElement = 0;
3287 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3288 write.descriptorCount = 1;
3289 write.pBufferInfo = &bbox_verts_info;
3290 descriptor_writes.push_back(write);
3291 }
3292
3293 // ========== Set 1: Material/Source Buffers ==========
3294
3295 VkDescriptorBufferInfo source_pos_info{};
3296 source_pos_info.buffer = source_positions_buffer.buffer;
3297 source_pos_info.offset = 0;
3298 source_pos_info.range = VK_WHOLE_SIZE;
3299
3300 VkDescriptorBufferInfo source_types_info{};
3301 source_types_info.buffer = source_types_buffer.buffer;
3302 source_types_info.offset = 0;
3303 source_types_info.range = VK_WHOLE_SIZE;
3304
3305 VkDescriptorBufferInfo source_rot_info{};
3306 source_rot_info.buffer = source_rotations_buffer.buffer;
3307 source_rot_info.offset = 0;
3308 source_rot_info.range = VK_WHOLE_SIZE;
3309
3310 VkDescriptorBufferInfo source_widths_info{};
3311 source_widths_info.buffer = source_widths_buffer.buffer;
3312 source_widths_info.offset = 0;
3313 source_widths_info.range = VK_WHOLE_SIZE;
3314
3315 VkDescriptorBufferInfo source_fluxes_info{};
3316 source_fluxes_info.buffer = source_fluxes_buffer.buffer;
3317 source_fluxes_info.offset = 0;
3318 source_fluxes_info.range = VK_WHOLE_SIZE;
3319
3320 VkDescriptorBufferInfo reflectivity_info{};
3321 reflectivity_info.buffer = reflectivity_buffer.buffer;
3322 reflectivity_info.offset = 0;
3323 reflectivity_info.range = VK_WHOLE_SIZE;
3324
3325 VkDescriptorBufferInfo transmissivity_info{};
3326 transmissivity_info.buffer = transmissivity_buffer.buffer;
3327 transmissivity_info.offset = 0;
3328 transmissivity_info.range = VK_WHOLE_SIZE;
3329
3330 if (source_positions_buffer.buffer != VK_NULL_HANDLE) {
3331 VkWriteDescriptorSet write{};
3332 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3333 write.dstSet = set_materials;
3334 write.dstBinding = 0;
3335 write.dstArrayElement = 0;
3336 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3337 write.descriptorCount = 1;
3338 write.pBufferInfo = &source_pos_info;
3339 descriptor_writes.push_back(write);
3340 }
3341
3342 if (source_types_buffer.buffer != VK_NULL_HANDLE) {
3343 VkWriteDescriptorSet write{};
3344 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3345 write.dstSet = set_materials;
3346 write.dstBinding = 1;
3347 write.dstArrayElement = 0;
3348 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3349 write.descriptorCount = 1;
3350 write.pBufferInfo = &source_types_info;
3351 descriptor_writes.push_back(write);
3352 }
3353
3354 if (source_rotations_buffer.buffer != VK_NULL_HANDLE) {
3355 VkWriteDescriptorSet write{};
3356 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3357 write.dstSet = set_materials;
3358 write.dstBinding = 2;
3359 write.dstArrayElement = 0;
3360 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3361 write.descriptorCount = 1;
3362 write.pBufferInfo = &source_rot_info;
3363 descriptor_writes.push_back(write);
3364 }
3365
3366 if (source_widths_buffer.buffer != VK_NULL_HANDLE) {
3367 VkWriteDescriptorSet write{};
3368 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3369 write.dstSet = set_materials;
3370 write.dstBinding = 3;
3371 write.dstArrayElement = 0;
3372 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3373 write.descriptorCount = 1;
3374 write.pBufferInfo = &source_widths_info;
3375 descriptor_writes.push_back(write);
3376 }
3377
3378 if (source_fluxes_buffer.buffer != VK_NULL_HANDLE) {
3379 VkWriteDescriptorSet write{};
3380 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3381 write.dstSet = set_materials;
3382 write.dstBinding = 4;
3383 write.dstArrayElement = 0;
3384 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3385 write.descriptorCount = 1;
3386 write.pBufferInfo = &source_fluxes_info;
3387 descriptor_writes.push_back(write);
3388 }
3389
3390 if (reflectivity_buffer.buffer != VK_NULL_HANDLE) {
3391 VkWriteDescriptorSet write{};
3392 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3393 write.dstSet = set_materials;
3394 write.dstBinding = 5;
3395 write.dstArrayElement = 0;
3396 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3397 write.descriptorCount = 1;
3398 write.pBufferInfo = &reflectivity_info;
3399 descriptor_writes.push_back(write);
3400 }
3401
3402 if (transmissivity_buffer.buffer != VK_NULL_HANDLE) {
3403 VkWriteDescriptorSet write{};
3404 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3405 write.dstSet = set_materials;
3406 write.dstBinding = 6;
3407 write.dstArrayElement = 0;
3408 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3409 write.descriptorCount = 1;
3410 write.pBufferInfo = &transmissivity_info;
3411 descriptor_writes.push_back(write);
3412 }
3413
3414 // Specular property buffers
3415 VkDescriptorBufferInfo specular_exponent_info{};
3416 specular_exponent_info.buffer = specular_exponent_buffer.buffer;
3417 specular_exponent_info.offset = 0;
3418 specular_exponent_info.range = VK_WHOLE_SIZE;
3419
3420 VkDescriptorBufferInfo specular_scale_info{};
3421 specular_scale_info.buffer = specular_scale_buffer.buffer;
3422 specular_scale_info.offset = 0;
3423 specular_scale_info.range = VK_WHOLE_SIZE;
3424
3425 VkDescriptorBufferInfo source_fluxes_cam_info{};
3426 source_fluxes_cam_info.buffer = source_fluxes_cam_buffer.buffer;
3427 source_fluxes_cam_info.offset = 0;
3428 source_fluxes_cam_info.range = VK_WHOLE_SIZE;
3429
3430 if (specular_exponent_buffer.buffer != VK_NULL_HANDLE) {
3431 VkWriteDescriptorSet write{};
3432 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3433 write.dstSet = set_materials;
3434 write.dstBinding = 7;
3435 write.dstArrayElement = 0;
3436 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3437 write.descriptorCount = 1;
3438 write.pBufferInfo = &specular_exponent_info;
3439 descriptor_writes.push_back(write);
3440 }
3441
3442 if (specular_scale_buffer.buffer != VK_NULL_HANDLE) {
3443 VkWriteDescriptorSet write{};
3444 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3445 write.dstSet = set_materials;
3446 write.dstBinding = 8;
3447 write.dstArrayElement = 0;
3448 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3449 write.descriptorCount = 1;
3450 write.pBufferInfo = &specular_scale_info;
3451 descriptor_writes.push_back(write);
3452 }
3453
3454 if (source_fluxes_cam_buffer.buffer != VK_NULL_HANDLE) {
3455 VkWriteDescriptorSet write{};
3456 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3457 write.dstSet = set_materials;
3458 write.dstBinding = 9;
3459 write.dstArrayElement = 0;
3460 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3461 write.descriptorCount = 1;
3462 write.pBufferInfo = &source_fluxes_cam_info;
3463 descriptor_writes.push_back(write);
3464 }
3465
3466 VkDescriptorBufferInfo band_map_info{};
3467 band_map_info.buffer = band_map_buffer.buffer;
3468 band_map_info.offset = 0;
3469 band_map_info.range = VK_WHOLE_SIZE;
3470
3471 if (band_map_buffer.buffer != VK_NULL_HANDLE) {
3472 VkWriteDescriptorSet write{};
3473 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3474 write.dstSet = set_materials;
3475 write.dstBinding = 10;
3476 write.dstArrayElement = 0;
3477 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3478 write.descriptorCount = 1;
3479 write.pBufferInfo = &band_map_info;
3480 descriptor_writes.push_back(write);
3481 }
3482
3483 // ========== Set 2: Result Buffers ==========
3484
3485 VkDescriptorBufferInfo rad_in_info{};
3486 rad_in_info.buffer = radiation_in_buffer.buffer;
3487 rad_in_info.offset = 0;
3488 rad_in_info.range = VK_WHOLE_SIZE;
3489
3490 VkDescriptorBufferInfo rad_out_top_info{};
3491 rad_out_top_info.buffer = radiation_out_top_buffer.buffer;
3492 rad_out_top_info.offset = 0;
3493 rad_out_top_info.range = VK_WHOLE_SIZE;
3494
3495 VkDescriptorBufferInfo rad_out_bottom_info{};
3496 rad_out_bottom_info.buffer = radiation_out_bottom_buffer.buffer;
3497 rad_out_bottom_info.offset = 0;
3498 rad_out_bottom_info.range = VK_WHOLE_SIZE;
3499
3500 VkDescriptorBufferInfo scatter_top_info{};
3501 scatter_top_info.buffer = scatter_top_buffer.buffer;
3502 scatter_top_info.offset = 0;
3503 scatter_top_info.range = VK_WHOLE_SIZE;
3504
3505 VkDescriptorBufferInfo scatter_bottom_info{};
3506 scatter_bottom_info.buffer = scatter_bottom_buffer.buffer;
3507 scatter_bottom_info.offset = 0;
3508 scatter_bottom_info.range = VK_WHOLE_SIZE;
3509
3510 if (radiation_in_buffer.buffer != VK_NULL_HANDLE) {
3511 VkWriteDescriptorSet write{};
3512 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3513 write.dstSet = set_results;
3514 write.dstBinding = 0;
3515 write.dstArrayElement = 0;
3516 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3517 write.descriptorCount = 1;
3518 write.pBufferInfo = &rad_in_info;
3519 descriptor_writes.push_back(write);
3520 }
3521
3522 if (radiation_out_top_buffer.buffer != VK_NULL_HANDLE) {
3523 VkWriteDescriptorSet write{};
3524 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3525 write.dstSet = set_results;
3526 write.dstBinding = 1;
3527 write.dstArrayElement = 0;
3528 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3529 write.descriptorCount = 1;
3530 write.pBufferInfo = &rad_out_top_info;
3531 descriptor_writes.push_back(write);
3532 } else {
3533 // radiation_out_top_buffer not yet allocated; descriptor update skipped
3534 }
3535
3536 if (radiation_out_bottom_buffer.buffer != VK_NULL_HANDLE) {
3537 VkWriteDescriptorSet write{};
3538 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3539 write.dstSet = set_results;
3540 write.dstBinding = 2;
3541 write.dstArrayElement = 0;
3542 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3543 write.descriptorCount = 1;
3544 write.pBufferInfo = &rad_out_bottom_info;
3545 descriptor_writes.push_back(write);
3546 }
3547
3548 if (scatter_top_buffer.buffer != VK_NULL_HANDLE) {
3549 VkWriteDescriptorSet write{};
3550 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3551 write.dstSet = set_results;
3552 write.dstBinding = 3;
3553 write.dstArrayElement = 0;
3554 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3555 write.descriptorCount = 1;
3556 write.pBufferInfo = &scatter_top_info;
3557 descriptor_writes.push_back(write);
3558 }
3559
3560 if (scatter_bottom_buffer.buffer != VK_NULL_HANDLE) {
3561 VkWriteDescriptorSet write{};
3562 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3563 write.dstSet = set_results;
3564 write.dstBinding = 4;
3565 write.dstArrayElement = 0;
3566 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3567 write.descriptorCount = 1;
3568 write.pBufferInfo = &scatter_bottom_info;
3569 descriptor_writes.push_back(write);
3570 }
3571
3572 // Camera result buffers (bindings 5-9)
3573 VkDescriptorBufferInfo camera_radiation_info{};
3574 camera_radiation_info.buffer = camera_radiation_buffer.buffer;
3575 camera_radiation_info.offset = 0;
3576 camera_radiation_info.range = VK_WHOLE_SIZE;
3577
3578 VkDescriptorBufferInfo camera_pixel_label_info{};
3579 camera_pixel_label_info.buffer = camera_pixel_label_buffer.buffer;
3580 camera_pixel_label_info.offset = 0;
3581 camera_pixel_label_info.range = VK_WHOLE_SIZE;
3582
3583 VkDescriptorBufferInfo camera_pixel_depth_info{};
3584 camera_pixel_depth_info.buffer = camera_pixel_depth_buffer.buffer;
3585 camera_pixel_depth_info.offset = 0;
3586 camera_pixel_depth_info.range = VK_WHOLE_SIZE;
3587
3588 VkDescriptorBufferInfo camera_scatter_top_info{};
3589 camera_scatter_top_info.buffer = camera_scatter_top_buffer.buffer;
3590 camera_scatter_top_info.offset = 0;
3591 camera_scatter_top_info.range = VK_WHOLE_SIZE;
3592
3593 VkDescriptorBufferInfo camera_scatter_bottom_info{};
3594 camera_scatter_bottom_info.buffer = camera_scatter_bottom_buffer.buffer;
3595 camera_scatter_bottom_info.offset = 0;
3596 camera_scatter_bottom_info.range = VK_WHOLE_SIZE;
3597
3598 if (camera_radiation_buffer.buffer != VK_NULL_HANDLE) {
3599 VkWriteDescriptorSet write{};
3600 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3601 write.dstSet = set_results;
3602 write.dstBinding = 5;
3603 write.dstArrayElement = 0;
3604 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3605 write.descriptorCount = 1;
3606 write.pBufferInfo = &camera_radiation_info;
3607 descriptor_writes.push_back(write);
3608 }
3609
3610 if (camera_pixel_label_buffer.buffer != VK_NULL_HANDLE) {
3611 VkWriteDescriptorSet write{};
3612 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3613 write.dstSet = set_results;
3614 write.dstBinding = 6;
3615 write.dstArrayElement = 0;
3616 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3617 write.descriptorCount = 1;
3618 write.pBufferInfo = &camera_pixel_label_info;
3619 descriptor_writes.push_back(write);
3620 }
3621
3622 if (camera_pixel_depth_buffer.buffer != VK_NULL_HANDLE) {
3623 VkWriteDescriptorSet write{};
3624 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3625 write.dstSet = set_results;
3626 write.dstBinding = 7;
3627 write.dstArrayElement = 0;
3628 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3629 write.descriptorCount = 1;
3630 write.pBufferInfo = &camera_pixel_depth_info;
3631 descriptor_writes.push_back(write);
3632 }
3633
3634 if (camera_scatter_top_buffer.buffer != VK_NULL_HANDLE) {
3635 VkWriteDescriptorSet write{};
3636 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3637 write.dstSet = set_results;
3638 write.dstBinding = 8;
3639 write.dstArrayElement = 0;
3640 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3641 write.descriptorCount = 1;
3642 write.pBufferInfo = &camera_scatter_top_info;
3643 descriptor_writes.push_back(write);
3644 }
3645
3646 if (camera_scatter_bottom_buffer.buffer != VK_NULL_HANDLE) {
3647 VkWriteDescriptorSet write{};
3648 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3649 write.dstSet = set_results;
3650 write.dstBinding = 9;
3651 write.dstArrayElement = 0;
3652 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3653 write.descriptorCount = 1;
3654 write.pBufferInfo = &camera_scatter_bottom_info;
3655 descriptor_writes.push_back(write);
3656 }
3657
3658 VkDescriptorBufferInfo radiation_specular_info{};
3659 radiation_specular_info.buffer = radiation_specular_buffer.buffer;
3660 radiation_specular_info.offset = 0;
3661 radiation_specular_info.range = VK_WHOLE_SIZE;
3662
3663 if (radiation_specular_buffer.buffer != VK_NULL_HANDLE) {
3664 VkWriteDescriptorSet write{};
3665 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3666 write.dstSet = set_results;
3667 write.dstBinding = 10;
3668 write.dstArrayElement = 0;
3669 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3670 write.descriptorCount = 1;
3671 write.pBufferInfo = &radiation_specular_info;
3672 descriptor_writes.push_back(write);
3673 }
3674
3675 // ========== Set 3: Sky Parameters ==========
3676
3677 VkDescriptorBufferInfo diffuse_flux_info{};
3678 diffuse_flux_info.buffer = diffuse_flux_buffer.buffer;
3679 diffuse_flux_info.offset = 0;
3680 diffuse_flux_info.range = VK_WHOLE_SIZE;
3681
3682 VkDescriptorBufferInfo diffuse_peak_dir_info{};
3683 diffuse_peak_dir_info.buffer = diffuse_peak_dir_buffer.buffer;
3684 diffuse_peak_dir_info.offset = 0;
3685 diffuse_peak_dir_info.range = VK_WHOLE_SIZE;
3686
3687 VkDescriptorBufferInfo diffuse_extinction_info{};
3688 diffuse_extinction_info.buffer = diffuse_extinction_buffer.buffer;
3689 diffuse_extinction_info.offset = 0;
3690 diffuse_extinction_info.range = VK_WHOLE_SIZE;
3691
3692 VkDescriptorBufferInfo diffuse_dist_norm_info{};
3693 diffuse_dist_norm_info.buffer = diffuse_dist_norm_buffer.buffer;
3694 diffuse_dist_norm_info.offset = 0;
3695 diffuse_dist_norm_info.range = VK_WHOLE_SIZE;
3696
3697 VkDescriptorBufferInfo sky_radiance_params_info{};
3698 sky_radiance_params_info.buffer = sky_radiance_params_buffer.buffer;
3699 sky_radiance_params_info.offset = 0;
3700 sky_radiance_params_info.range = VK_WHOLE_SIZE;
3701
3702 if (diffuse_flux_buffer.buffer != VK_NULL_HANDLE) {
3703 VkWriteDescriptorSet write{};
3704 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3705 write.dstSet = set_sky;
3706 write.dstBinding = 0;
3707 write.dstArrayElement = 0;
3708 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3709 write.descriptorCount = 1;
3710 write.pBufferInfo = &diffuse_flux_info;
3711 descriptor_writes.push_back(write);
3712 }
3713
3714 if (diffuse_peak_dir_buffer.buffer != VK_NULL_HANDLE) {
3715 VkWriteDescriptorSet write{};
3716 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3717 write.dstSet = set_sky;
3718 write.dstBinding = 1;
3719 write.dstArrayElement = 0;
3720 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3721 write.descriptorCount = 1;
3722 write.pBufferInfo = &diffuse_peak_dir_info;
3723 descriptor_writes.push_back(write);
3724 }
3725
3726 if (diffuse_extinction_buffer.buffer != VK_NULL_HANDLE) {
3727 VkWriteDescriptorSet write{};
3728 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3729 write.dstSet = set_sky;
3730 write.dstBinding = 2;
3731 write.dstArrayElement = 0;
3732 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3733 write.descriptorCount = 1;
3734 write.pBufferInfo = &diffuse_extinction_info;
3735 descriptor_writes.push_back(write);
3736 }
3737
3738 if (diffuse_dist_norm_buffer.buffer != VK_NULL_HANDLE) {
3739 VkWriteDescriptorSet write{};
3740 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3741 write.dstSet = set_sky;
3742 write.dstBinding = 3;
3743 write.dstArrayElement = 0;
3744 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3745 write.descriptorCount = 1;
3746 write.pBufferInfo = &diffuse_dist_norm_info;
3747 descriptor_writes.push_back(write);
3748 }
3749
3750 if (sky_radiance_params_buffer.buffer != VK_NULL_HANDLE) {
3751 VkWriteDescriptorSet write{};
3752 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3753 write.dstSet = set_sky;
3754 write.dstBinding = 4;
3755 write.dstArrayElement = 0;
3756 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3757 write.descriptorCount = 1;
3758 write.pBufferInfo = &sky_radiance_params_info;
3759 descriptor_writes.push_back(write);
3760 }
3761
3762 // Camera sky buffers (bindings 5-6)
3763 VkDescriptorBufferInfo camera_sky_radiance_info{};
3764 camera_sky_radiance_info.buffer = camera_sky_radiance_buffer.buffer;
3765 camera_sky_radiance_info.offset = 0;
3766 camera_sky_radiance_info.range = VK_WHOLE_SIZE;
3767
3768 VkDescriptorBufferInfo solar_disk_radiance_info{};
3769 solar_disk_radiance_info.buffer = solar_disk_radiance_buffer.buffer;
3770 solar_disk_radiance_info.offset = 0;
3771 solar_disk_radiance_info.range = VK_WHOLE_SIZE;
3772
3773 if (camera_sky_radiance_buffer.buffer != VK_NULL_HANDLE) {
3774 VkWriteDescriptorSet write{};
3775 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3776 write.dstSet = set_sky;
3777 write.dstBinding = 5;
3778 write.dstArrayElement = 0;
3779 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3780 write.descriptorCount = 1;
3781 write.pBufferInfo = &camera_sky_radiance_info;
3782 descriptor_writes.push_back(write);
3783 }
3784
3785 if (solar_disk_radiance_buffer.buffer != VK_NULL_HANDLE) {
3786 VkWriteDescriptorSet write{};
3787 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3788 write.dstSet = set_sky;
3789 write.dstBinding = 6;
3790 write.dstArrayElement = 0;
3791 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3792 write.descriptorCount = 1;
3793 write.pBufferInfo = &solar_disk_radiance_info;
3794 descriptor_writes.push_back(write);
3795 }
3796
3797 // ========== Set 4: Debug Counters ==========
3798
3799 VkDescriptorBufferInfo debug_counters_info{};
3800 debug_counters_info.buffer = debug_counters_buffer.buffer;
3801 debug_counters_info.offset = 0;
3802 debug_counters_info.range = VK_WHOLE_SIZE;
3803
3804 if (debug_counters_buffer.buffer != VK_NULL_HANDLE) {
3805 VkWriteDescriptorSet write{};
3806 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
3807 write.dstSet = set_debug;
3808 write.dstBinding = 0;
3809 write.dstArrayElement = 0;
3810 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3811 write.descriptorCount = 1;
3812 write.pBufferInfo = &debug_counters_info;
3813 descriptor_writes.push_back(write);
3814 }
3815
3816 // Apply all descriptor writes
3817 if (!descriptor_writes.empty()) {
3818 vkUpdateDescriptorSets(vk_device, static_cast<uint32_t>(descriptor_writes.size()), descriptor_writes.data(), 0, nullptr);
3819 }
3820 }
3821
3822} // namespace helios