1.3.72
 
Loading...
Searching...
No Matches
VulkanDevice.cpp
Go to the documentation of this file.
1
16#include "VulkanDevice.h"
17#include <algorithm>
18#include <cstring>
19#include <iostream>
20
21// Suppress nullability warnings from VMA header on macOS
22#ifdef __clang__
23#pragma clang diagnostic push
24#pragma clang diagnostic ignored "-Wnullability-completeness"
25#endif
26#define VMA_IMPLEMENTATION
27#include <vk_mem_alloc.h>
28#ifdef __clang__
29#pragma clang diagnostic pop
30#endif
31
32namespace helios {
33
34 // ========== Debug messenger callback ==========
35
36 static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, void *pUserData) {
37 std::string severity_str;
38 if (messageSeverity >= VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
39 severity_str = "ERROR";
40 } else if (messageSeverity >= VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) {
41 severity_str = "WARNING";
42 } else if (messageSeverity >= VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
43 severity_str = "INFO";
44 } else {
45 severity_str = "VERBOSE";
46 }
47
48 // Use std::cout instead of std::cerr (Helios convention)
49 std::cout << "[Vulkan " << severity_str << "] " << pCallbackData->pMessage << std::endl;
50
51 return VK_FALSE;
52 }
53
54 // ========== VulkanDevice implementation ==========
55
56 VulkanDevice::VulkanDevice() = default;
57
58 VulkanDevice::~VulkanDevice() {
59 shutdown();
60 }
61
62 void VulkanDevice::initialize(bool enable_validation) {
63 createInstance(enable_validation);
64 selectPhysicalDevice();
65 createLogicalDevice();
66 createAllocator();
67
68 detectMoltenVK();
69 detectAtomicFloat();
70
72
73 // Verify actual GPU execution works (catches MoltenVK on headless CI runners)
75 }
76
78 if (allocator != VK_NULL_HANDLE) {
79 vmaDestroyAllocator(allocator);
80 allocator = VK_NULL_HANDLE;
81 }
82
83 if (device != VK_NULL_HANDLE) {
84 vkDestroyDevice(device, nullptr);
85 device = VK_NULL_HANDLE;
86 }
87
88 if (debug_messenger != VK_NULL_HANDLE) {
89 auto func = (PFN_vkDestroyDebugUtilsMessengerEXT) vkGetInstanceProcAddr(instance, "vkDestroyDebugUtilsMessengerEXT");
90 if (func != nullptr) {
91 func(instance, debug_messenger, nullptr);
92 }
93 debug_messenger = VK_NULL_HANDLE;
94 }
95
96 if (instance != VK_NULL_HANDLE) {
97 vkDestroyInstance(instance, nullptr);
98 instance = VK_NULL_HANDLE;
99 }
100 }
101
102 void VulkanDevice::createInstance(bool enable_validation) {
103 VkApplicationInfo app_info{};
104 app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
105 app_info.pApplicationName = "Helios Radiation Plugin";
106 app_info.applicationVersion = VK_MAKE_VERSION(1, 3, 0);
107 app_info.pEngineName = "Helios";
108 app_info.engineVersion = VK_MAKE_VERSION(1, 3, 0);
109 app_info.apiVersion = VK_API_VERSION_1_1;
110
111 VkInstanceCreateInfo create_info{};
112 create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
113 create_info.pApplicationInfo = &app_info;
114
115 // Required extensions (none for headless compute)
116 std::vector<const char *> extensions;
117
118 // MoltenVK requires VK_KHR_portability_enumeration on macOS
119#ifdef __APPLE__
120 extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME);
121 extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
122 create_info.flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR;
123#endif
124
125 // Try with validation first if requested
126 std::vector<const char *> validation_layers;
127 bool validation_enabled = false;
128
129 if (enable_validation) {
130 validation_layers.push_back("VK_LAYER_KHRONOS_validation");
131 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
132
133 create_info.enabledExtensionCount = static_cast<uint32_t>(extensions.size());
134 create_info.ppEnabledExtensionNames = extensions.data();
135 create_info.enabledLayerCount = static_cast<uint32_t>(validation_layers.size());
136 create_info.ppEnabledLayerNames = validation_layers.data();
137
138 VkResult result = vkCreateInstance(&create_info, nullptr, &instance);
139
140 if (result == VK_SUCCESS) {
141 validation_enabled = true;
142 } else if (result == VK_ERROR_LAYER_NOT_PRESENT || result == VK_ERROR_EXTENSION_NOT_PRESENT) {
143 // Validation not available, try without it
144 extensions.pop_back(); // Remove debug utils extension
145 } else {
146 // Other error - fail
147 helios_runtime_error("ERROR (VulkanDevice::createInstance): Failed to create Vulkan instance. "
148 "Make sure Vulkan SDK is installed. VkResult code: " +
149 std::to_string(result));
150 }
151 }
152
153 // Create instance without validation if not already created
154 if (instance == VK_NULL_HANDLE) {
155 create_info.enabledExtensionCount = static_cast<uint32_t>(extensions.size());
156 create_info.ppEnabledExtensionNames = extensions.data();
157 create_info.enabledLayerCount = 0;
158 create_info.ppEnabledLayerNames = nullptr;
159
160 VkResult result = vkCreateInstance(&create_info, nullptr, &instance);
161 if (result != VK_SUCCESS) {
162 helios_runtime_error("ERROR (VulkanDevice::createInstance): Failed to create Vulkan instance. "
163 "Make sure Vulkan SDK is installed. VkResult code: " +
164 std::to_string(result));
165 }
166 }
167
168 if (validation_enabled) {
169 setupDebugMessenger();
170 }
171 }
172
173 void VulkanDevice::setupDebugMessenger() {
174 VkDebugUtilsMessengerCreateInfoEXT create_info{};
175 create_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
176 create_info.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
177 create_info.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
178 create_info.pfnUserCallback = debugCallback;
179
180 auto func = (PFN_vkCreateDebugUtilsMessengerEXT) vkGetInstanceProcAddr(instance, "vkCreateDebugUtilsMessengerEXT");
181 if (func != nullptr) {
182 func(instance, &create_info, nullptr, &debug_messenger);
183 }
184 }
185
186 void VulkanDevice::selectPhysicalDevice() {
187 uint32_t device_count = 0;
188 VkResult result = vkEnumeratePhysicalDevices(instance, &device_count, nullptr);
189
190 // Check for enumeration errors first
191 if (result != VK_SUCCESS) {
192 std::stringstream error_msg;
193 error_msg << "ERROR (VulkanDevice::selectPhysicalDevice): "
194 << "Failed to enumerate physical devices (VkResult: " << result << ")\n"
195 << "This indicates a driver installation or compatibility issue.";
196 helios_runtime_error(error_msg.str());
197 }
198
199 if (device_count == 0) {
200 // Provide detailed diagnostic information
201 std::stringstream diagnostic;
202 diagnostic << "ERROR (VulkanDevice::selectPhysicalDevice): "
203 << "No Vulkan-capable GPU found.\n\n";
204
205 // Show environment state
206 const char *icd_filenames = std::getenv("VK_ICD_FILENAMES");
207 const char *loader_debug = std::getenv("VK_LOADER_DEBUG");
208
209 diagnostic << "=== Environment ===\n";
210 diagnostic << "VK_ICD_FILENAMES: " << (icd_filenames ? icd_filenames : "(using system default)") << "\n";
211 diagnostic << "VK_LOADER_DEBUG: " << (loader_debug ? loader_debug : "(not set)") << "\n\n";
212
213 diagnostic << "=== Possible Causes ===\n";
214 diagnostic << "1. Vulkan driver library version mismatch (common with mixed compiler environments)\n";
215 diagnostic << "2. Incompatible ICD files failing to load (check for GLIBCXX errors)\n";
216 diagnostic << "3. GPU does not support Vulkan 1.1 or higher\n";
217 diagnostic << "4. Vulkan loader contamination from failed ICD loading attempts\n\n";
218
219 diagnostic << "=== Troubleshooting Steps ===\n";
220 diagnostic << "1. Check driver loading:\n";
221 diagnostic << " ldd /usr/lib/x86_64-linux-gnu/libvulkan_*.so\n";
222 diagnostic << "2. Debug Vulkan loader:\n";
223 diagnostic << " VK_LOADER_DEBUG=all vulkaninfo --summary\n";
224 diagnostic << "3. Filter working ICDs:\n";
225 diagnostic << " export VK_ICD_FILENAMES=/path/to/working_icd.json\n";
226 diagnostic << "4. Verify GPU Vulkan support:\n";
227 diagnostic << " vulkaninfo | grep 'apiVersion'\n";
228
229 helios_runtime_error(diagnostic.str());
230 }
231
232 std::vector<VkPhysicalDevice> devices(device_count);
233 vkEnumeratePhysicalDevices(instance, &device_count, devices.data());
234
235 // Score devices and select the best one
236 uint32_t best_score = 0;
237 VkPhysicalDevice best_device = VK_NULL_HANDLE;
238
239 for (const auto &device_candidate: devices) {
240 if (!isDeviceSuitable(device_candidate)) {
241 continue;
242 }
243
244 uint32_t score = scorePhysicalDevice(device_candidate);
245 if (score > best_score) {
246 best_score = score;
247 best_device = device_candidate;
248 }
249 }
250
251 if (best_device == VK_NULL_HANDLE) {
252 helios_runtime_error("ERROR (VulkanDevice::selectPhysicalDevice): No suitable Vulkan device found. "
253 "GPU must support compute queues and Vulkan 1.1.");
254 }
255
256 physical_device = best_device;
257 vkGetPhysicalDeviceProperties(physical_device, &device_properties);
258 }
259
260 bool VulkanDevice::isDeviceSuitable(VkPhysicalDevice device_candidate) const {
261 // Must have compute queue
262 uint32_t queue_family = findComputeQueueFamily(device_candidate);
263 if (queue_family == UINT32_MAX) {
264 return false;
265 }
266
267 // Must support Vulkan 1.1 minimum
268 VkPhysicalDeviceProperties props;
269 vkGetPhysicalDeviceProperties(device_candidate, &props);
270 if (props.apiVersion < VK_API_VERSION_1_1) {
271 return false;
272 }
273
274 return true;
275 }
276
277 uint32_t VulkanDevice::scorePhysicalDevice(VkPhysicalDevice device_candidate) const {
278 VkPhysicalDeviceProperties props;
279 vkGetPhysicalDeviceProperties(device_candidate, &props);
280
281 uint32_t score = 0;
282
283 // Discrete GPUs have a huge performance advantage
284 if (props.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) {
285 score += 10000;
286 } else if (props.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU) {
287 score += 1000;
288 }
289
290 // Prefer newer Vulkan API versions
291 score += VK_VERSION_MAJOR(props.apiVersion) * 100 + VK_VERSION_MINOR(props.apiVersion) * 10;
292
293 return score;
294 }
295
296 uint32_t VulkanDevice::findComputeQueueFamily(VkPhysicalDevice device_candidate) const {
297 uint32_t queue_family_count = 0;
298 vkGetPhysicalDeviceQueueFamilyProperties(device_candidate, &queue_family_count, nullptr);
299
300 std::vector<VkQueueFamilyProperties> queue_families(queue_family_count);
301 vkGetPhysicalDeviceQueueFamilyProperties(device_candidate, &queue_family_count, queue_families.data());
302
303 for (uint32_t i = 0; i < queue_family_count; i++) {
304 if (queue_families[i].queueFlags & VK_QUEUE_COMPUTE_BIT) {
305 return i;
306 }
307 }
308
309 return UINT32_MAX;
310 }
311
312 void VulkanDevice::createLogicalDevice() {
313 compute_queue_family = findComputeQueueFamily(physical_device);
314
315 VkDeviceQueueCreateInfo queue_create_info{};
316 queue_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
317 queue_create_info.queueFamilyIndex = compute_queue_family;
318 queue_create_info.queueCount = 1;
319 float queue_priority = 1.0f;
320 queue_create_info.pQueuePriorities = &queue_priority;
321
322 VkPhysicalDeviceFeatures device_features{};
323 // No special features required for baseline compute
324
325 // Check and enable atomic float if available (needed for atomicAdd in shaders)
326 VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_features{};
327 atomic_float_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT;
328 atomic_float_features.pNext = nullptr;
329
330 VkPhysicalDeviceFeatures2 features2{};
331 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
332 features2.pNext = &atomic_float_features;
333 features2.features = device_features;
334
335 // Query if atomic float is supported
336 vkGetPhysicalDeviceFeatures2(physical_device, &features2);
337
338 bool enable_atomic_float = atomic_float_features.shaderBufferFloat32AtomicAdd;
339
340 VkDeviceCreateInfo create_info{};
341 create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
342 create_info.pQueueCreateInfos = &queue_create_info;
343 create_info.queueCreateInfoCount = 1;
344
345 // Required device extensions
346 std::vector<const char *> device_extensions;
347
348#ifdef __APPLE__
349 // MoltenVK requires portability subset extension
350 device_extensions.push_back("VK_KHR_portability_subset");
351#endif
352
353 // Enable atomic float extension if supported
354 if (enable_atomic_float) {
355 device_extensions.push_back(VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME);
356 // Enable the feature via pNext chain
357 atomic_float_features.shaderBufferFloat32AtomicAdd = VK_TRUE;
358 create_info.pNext = &features2;
359 create_info.pEnabledFeatures = nullptr; // Use features2 instead
360 } else {
361 create_info.pEnabledFeatures = &device_features;
362 }
363
364 create_info.enabledExtensionCount = static_cast<uint32_t>(device_extensions.size());
365 create_info.ppEnabledExtensionNames = device_extensions.data();
366
367 VkResult result = vkCreateDevice(physical_device, &create_info, nullptr, &device);
368 if (result != VK_SUCCESS) {
369 helios_runtime_error("ERROR (VulkanDevice::createLogicalDevice): Failed to create logical device. "
370 "VkResult code: " +
371 std::to_string(result));
372 }
373
374 vkGetDeviceQueue(device, compute_queue_family, 0, &compute_queue);
375 }
376
377 void VulkanDevice::createAllocator() {
378 VmaAllocatorCreateInfo allocator_info{};
379 allocator_info.vulkanApiVersion = VK_API_VERSION_1_1;
380 allocator_info.physicalDevice = physical_device;
381 allocator_info.device = device;
382 allocator_info.instance = instance;
383
384 VkResult result = vmaCreateAllocator(&allocator_info, &allocator);
385 if (result != VK_SUCCESS) {
386 helios_runtime_error("ERROR (VulkanDevice::createAllocator): Failed to create VMA allocator. "
387 "VkResult code: " +
388 std::to_string(result));
389 }
390 }
391
392 void VulkanDevice::detectMoltenVK() {
393 // Method 1: Check for Apple vendor ID (0x106B)
394 if (device_properties.vendorID == 0x106B) {
395 is_moltenvk = true;
396 return;
397 }
398
399 // Method 2: Check for MoltenVK-specific extension
400 uint32_t ext_count;
401 vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &ext_count, nullptr);
402 std::vector<VkExtensionProperties> exts(ext_count);
403 vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &ext_count, exts.data());
404
405 for (const auto &ext: exts) {
406 if (std::strcmp(ext.extensionName, "VK_MVK_moltenvk") == 0) {
407 is_moltenvk = true;
408 return;
409 }
410 }
411
412 // Method 3: Fallback to device name check
413 const char *device_name = device_properties.deviceName;
414 is_moltenvk = (std::strstr(device_name, "MoltenVK") != nullptr);
415 }
416
417 void VulkanDevice::detectAtomicFloat() {
418 // First check if VK_EXT_shader_atomic_float extension is available
419 uint32_t ext_count;
420 vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &ext_count, nullptr);
421 std::vector<VkExtensionProperties> exts(ext_count);
422 vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &ext_count, exts.data());
423
424 bool has_extension = false;
425 for (const auto &ext: exts) {
426 if (std::strcmp(ext.extensionName, VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME) == 0) {
427 has_extension = true;
428 break;
429 }
430 }
431
432 if (!has_extension) {
433 has_atomic_float = false;
434 return; // Extension not available, use atomicCompSwap fallback
435 }
436
437 // Extension present, query features
438 VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_features{};
439 atomic_float_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT;
440
441 VkPhysicalDeviceFeatures2 features2{};
442 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
443 features2.pNext = &atomic_float_features;
444
445 vkGetPhysicalDeviceFeatures2(physical_device, &features2);
446
447 has_atomic_float = atomic_float_features.shaderBufferFloat32AtomicAdd;
448 }
449
451 // Dispatch a compute shader that writes to a storage buffer, then read it back.
452 // This verifies the device can actually execute GPU compute work with memory access.
453 //
454 // A no-op shader (void main(){}) is NOT sufficient: MoltenVK on macOS CI runners
455 // (Apple Silicon VMs via Virtualization.framework) can dispatch trivial shaders
456 // that touch no memory, but fail with VK_ERROR_DEVICE_LOST when a shader reads
457 // or writes storage buffers — which is what the real radiation shaders do.
458 //
459 // This probe exercises the same code path: descriptor set + storage buffer + write.
460
461 // Embedded SPIR-V for:
462 // #version 450
463 // layout(local_size_x = 1) in;
464 // layout(set = 0, binding = 0) buffer ProbeBuffer { uint data[]; } buf;
465 // void main() { buf.data[0] = 42u; }
466 // Compiled with glslangValidator -V. 568 bytes, 142 words.
467 static const uint32_t probe_spirv[] = {
468 0x07230203, 0x00010000, 0x0008000b, 0x00000013,
469 0x00000000, 0x00020011, 0x00000001, 0x0006000b,
470 0x00000001, 0x4c534c47, 0x6474732e, 0x3035342e,
471 0x00000000, 0x0003000e, 0x00000000, 0x00000001,
472 0x0005000f, 0x00000005, 0x00000004, 0x6e69616d,
473 0x00000000, 0x00060010, 0x00000004, 0x00000011,
474 0x00000001, 0x00000001, 0x00000001, 0x00030003,
475 0x00000002, 0x000001c2, 0x00040005, 0x00000004,
476 0x6e69616d, 0x00000000, 0x00050005, 0x00000008,
477 0x626f7250, 0x66754265, 0x00726566, 0x00050006,
478 0x00000008, 0x00000000, 0x61746164, 0x00000000,
479 0x00030005, 0x0000000a, 0x00667562, 0x00040047,
480 0x00000007, 0x00000006, 0x00000004, 0x00030047,
481 0x00000008, 0x00000003, 0x00050048, 0x00000008,
482 0x00000000, 0x00000023, 0x00000000, 0x00040047,
483 0x0000000a, 0x00000021, 0x00000000, 0x00040047,
484 0x0000000a, 0x00000022, 0x00000000, 0x00040047,
485 0x00000012, 0x0000000b, 0x00000019, 0x00020013,
486 0x00000002, 0x00030021, 0x00000003, 0x00000002,
487 0x00040015, 0x00000006, 0x00000020, 0x00000000,
488 0x0003001d, 0x00000007, 0x00000006, 0x0003001e,
489 0x00000008, 0x00000007, 0x00040020, 0x00000009,
490 0x00000002, 0x00000008, 0x0004003b, 0x00000009,
491 0x0000000a, 0x00000002, 0x00040015, 0x0000000b,
492 0x00000020, 0x00000001, 0x0004002b, 0x0000000b,
493 0x0000000c, 0x00000000, 0x0004002b, 0x00000006,
494 0x0000000d, 0x0000002a, 0x00040020, 0x0000000e,
495 0x00000002, 0x00000006, 0x00040017, 0x00000010,
496 0x00000006, 0x00000003, 0x0004002b, 0x00000006,
497 0x00000011, 0x00000001, 0x0006002c, 0x00000010,
498 0x00000012, 0x00000011, 0x00000011, 0x00000011,
499 0x00050036, 0x00000002, 0x00000004, 0x00000000,
500 0x00000003, 0x000200f8, 0x00000005, 0x00060041,
501 0x0000000e, 0x0000000f, 0x0000000a, 0x0000000c,
502 0x0000000c, 0x0003003e, 0x0000000f, 0x0000000d,
503 0x000100fd, 0x00010038,
504 };
505 static const size_t probe_spirv_size = sizeof(probe_spirv);
506
507 // Vulkan handles for cleanup
508 VkCommandPool probe_pool = VK_NULL_HANDLE;
509 VkCommandBuffer probe_cmd = VK_NULL_HANDLE;
510 VkFence probe_fence = VK_NULL_HANDLE;
511 VkShaderModule probe_shader = VK_NULL_HANDLE;
512 VkPipelineLayout probe_layout = VK_NULL_HANDLE;
513 VkPipeline probe_pipeline = VK_NULL_HANDLE;
514 VkDescriptorSetLayout probe_set_layout = VK_NULL_HANDLE;
515 VkDescriptorPool probe_desc_pool = VK_NULL_HANDLE;
516 VkDescriptorSet probe_desc_set = VK_NULL_HANDLE;
517 VkBuffer probe_buffer = VK_NULL_HANDLE;
518 VmaAllocation probe_alloc = VK_NULL_HANDLE;
519
520 auto cleanup = [&]() {
521 if (probe_pipeline != VK_NULL_HANDLE) vkDestroyPipeline(device, probe_pipeline, nullptr);
522 if (probe_layout != VK_NULL_HANDLE) vkDestroyPipelineLayout(device, probe_layout, nullptr);
523 if (probe_shader != VK_NULL_HANDLE) vkDestroyShaderModule(device, probe_shader, nullptr);
524 if (probe_desc_pool != VK_NULL_HANDLE) vkDestroyDescriptorPool(device, probe_desc_pool, nullptr);
525 if (probe_set_layout != VK_NULL_HANDLE) vkDestroyDescriptorSetLayout(device, probe_set_layout, nullptr);
526 if (probe_buffer != VK_NULL_HANDLE) vmaDestroyBuffer(allocator, probe_buffer, probe_alloc);
527 if (probe_fence != VK_NULL_HANDLE) vkDestroyFence(device, probe_fence, nullptr);
528 if (probe_pool != VK_NULL_HANDLE) vkDestroyCommandPool(device, probe_pool, nullptr);
529 };
530
531 // 1. Create storage buffer (4 bytes, host-visible for readback)
532 VkBufferCreateInfo buf_info{};
533 buf_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
534 buf_info.size = sizeof(uint32_t);
535 buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
536 buf_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
537
538 VmaAllocationCreateInfo vma_info{};
539 vma_info.usage = VMA_MEMORY_USAGE_AUTO;
540 vma_info.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT;
541
542 VmaAllocationInfo alloc_result{};
543 VkResult result = vmaCreateBuffer(allocator, &buf_info, &vma_info, &probe_buffer, &probe_alloc, &alloc_result);
544 if (result != VK_SUCCESS) {
545 cleanup();
546 helios_runtime_error("ERROR (VulkanDevice::probeComputeCapability): Failed to create probe buffer. "
547 "VkResult code: " + std::to_string(result));
548 }
549
550 // Zero the buffer so we can verify the shader wrote to it
551 *static_cast<uint32_t *>(alloc_result.pMappedData) = 0;
552
553 // 2. Create descriptor set layout (one storage buffer at binding 0)
554 VkDescriptorSetLayoutBinding binding{};
555 binding.binding = 0;
556 binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
557 binding.descriptorCount = 1;
558 binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
559
560 VkDescriptorSetLayoutCreateInfo set_layout_info{};
561 set_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
562 set_layout_info.bindingCount = 1;
563 set_layout_info.pBindings = &binding;
564
565 result = vkCreateDescriptorSetLayout(device, &set_layout_info, nullptr, &probe_set_layout);
566 if (result != VK_SUCCESS) {
567 cleanup();
568 helios_runtime_error("ERROR (VulkanDevice::probeComputeCapability): Failed to create descriptor set layout. "
569 "VkResult code: " + std::to_string(result));
570 }
571
572 // 3. Create descriptor pool and allocate set
573 VkDescriptorPoolSize pool_size{};
574 pool_size.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
575 pool_size.descriptorCount = 1;
576
577 VkDescriptorPoolCreateInfo desc_pool_info{};
578 desc_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
579 desc_pool_info.maxSets = 1;
580 desc_pool_info.poolSizeCount = 1;
581 desc_pool_info.pPoolSizes = &pool_size;
582
583 result = vkCreateDescriptorPool(device, &desc_pool_info, nullptr, &probe_desc_pool);
584 if (result != VK_SUCCESS) {
585 cleanup();
586 helios_runtime_error("ERROR (VulkanDevice::probeComputeCapability): Failed to create descriptor pool. "
587 "VkResult code: " + std::to_string(result));
588 }
589
590 VkDescriptorSetAllocateInfo desc_alloc_info{};
591 desc_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
592 desc_alloc_info.descriptorPool = probe_desc_pool;
593 desc_alloc_info.descriptorSetCount = 1;
594 desc_alloc_info.pSetLayouts = &probe_set_layout;
595
596 result = vkAllocateDescriptorSets(device, &desc_alloc_info, &probe_desc_set);
597 if (result != VK_SUCCESS) {
598 cleanup();
599 helios_runtime_error("ERROR (VulkanDevice::probeComputeCapability): Failed to allocate descriptor set. "
600 "VkResult code: " + std::to_string(result));
601 }
602
603 // 4. Update descriptor set to point to the buffer
604 VkDescriptorBufferInfo desc_buf_info{};
605 desc_buf_info.buffer = probe_buffer;
606 desc_buf_info.offset = 0;
607 desc_buf_info.range = VK_WHOLE_SIZE;
608
609 VkWriteDescriptorSet write{};
610 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
611 write.dstSet = probe_desc_set;
612 write.dstBinding = 0;
613 write.descriptorCount = 1;
614 write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
615 write.pBufferInfo = &desc_buf_info;
616
617 vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
618
619 // 5. Create shader module from embedded SPIR-V
620 VkShaderModuleCreateInfo shader_info{};
621 shader_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
622 shader_info.codeSize = probe_spirv_size;
623 shader_info.pCode = probe_spirv;
624
625 result = vkCreateShaderModule(device, &shader_info, nullptr, &probe_shader);
626 if (result != VK_SUCCESS) {
627 cleanup();
628 helios_runtime_error("ERROR (VulkanDevice::probeComputeCapability): Failed to create probe shader module. "
629 "VkResult code: " + std::to_string(result));
630 }
631
632 // 6. Create pipeline layout with descriptor set layout, and compute pipeline
633 VkPipelineLayoutCreateInfo layout_create_info{};
634 layout_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
635 layout_create_info.setLayoutCount = 1;
636 layout_create_info.pSetLayouts = &probe_set_layout;
637
638 result = vkCreatePipelineLayout(device, &layout_create_info, nullptr, &probe_layout);
639 if (result != VK_SUCCESS) {
640 cleanup();
641 helios_runtime_error("ERROR (VulkanDevice::probeComputeCapability): Failed to create probe pipeline layout. "
642 "VkResult code: " + std::to_string(result));
643 }
644
645 VkComputePipelineCreateInfo pipeline_info{};
646 pipeline_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
647 pipeline_info.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
648 pipeline_info.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
649 pipeline_info.stage.module = probe_shader;
650 pipeline_info.stage.pName = "main";
651 pipeline_info.layout = probe_layout;
652
653 result = vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &pipeline_info, nullptr, &probe_pipeline);
654 if (result != VK_SUCCESS) {
655 cleanup();
656 helios_runtime_error("ERROR (VulkanDevice::probeComputeCapability): Failed to create probe compute pipeline. "
657 "VkResult code: " + std::to_string(result));
658 }
659
660 // 7. Create command pool and record dispatch with bound descriptor set
661 VkCommandPoolCreateInfo pool_create_info{};
662 pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
663 pool_create_info.queueFamilyIndex = compute_queue_family;
664 pool_create_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
665
666 result = vkCreateCommandPool(device, &pool_create_info, nullptr, &probe_pool);
667 if (result != VK_SUCCESS) {
668 cleanup();
669 helios_runtime_error("ERROR (VulkanDevice::probeComputeCapability): Failed to create command pool. "
670 "VkResult code: " + std::to_string(result));
671 }
672
673 VkCommandBufferAllocateInfo cmd_alloc_info{};
674 cmd_alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
675 cmd_alloc_info.commandPool = probe_pool;
676 cmd_alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
677 cmd_alloc_info.commandBufferCount = 1;
678
679 result = vkAllocateCommandBuffers(device, &cmd_alloc_info, &probe_cmd);
680 if (result != VK_SUCCESS) {
681 cleanup();
682 helios_runtime_error("ERROR (VulkanDevice::probeComputeCapability): Failed to allocate command buffer. "
683 "VkResult code: " + std::to_string(result));
684 }
685
686 VkCommandBufferBeginInfo begin_info{};
687 begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
688 begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
689
690 vkBeginCommandBuffer(probe_cmd, &begin_info);
691 vkCmdBindPipeline(probe_cmd, VK_PIPELINE_BIND_POINT_COMPUTE, probe_pipeline);
692 vkCmdBindDescriptorSets(probe_cmd, VK_PIPELINE_BIND_POINT_COMPUTE, probe_layout, 0, 1, &probe_desc_set, 0, nullptr);
693 vkCmdDispatch(probe_cmd, 1, 1, 1); // Single workgroup, writes buf.data[0] = 42
694 vkEndCommandBuffer(probe_cmd);
695
696 // 8. Create fence, submit, and wait
697 VkFenceCreateInfo fence_info{};
698 fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
699
700 result = vkCreateFence(device, &fence_info, nullptr, &probe_fence);
701 if (result != VK_SUCCESS) {
702 cleanup();
703 helios_runtime_error("ERROR (VulkanDevice::probeComputeCapability): Failed to create fence. "
704 "VkResult code: " + std::to_string(result));
705 }
706
707 VkSubmitInfo submit_info{};
708 submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
709 submit_info.commandBufferCount = 1;
710 submit_info.pCommandBuffers = &probe_cmd;
711
712 result = vkQueueSubmit(compute_queue, 1, &submit_info, probe_fence);
713 if (result != VK_SUCCESS) {
714 cleanup();
715 helios_runtime_error("ERROR (VulkanDevice::probeComputeCapability): Compute dispatch submission failed. "
716 "VkResult code: " + std::to_string(result));
717 }
718
719 // Poll with timeout (MoltenVK doesn't always respect the timeout parameter)
720 bool completed = false;
721 for (int attempt = 0; attempt < 50; ++attempt) {
722 result = vkWaitForFences(device, 1, &probe_fence, VK_TRUE, 100000000ULL); // 100ms per poll
723 if (result == VK_SUCCESS) {
724 completed = true;
725 break;
726 } else if (result != VK_TIMEOUT) {
727 cleanup();
728 helios_runtime_error("ERROR (VulkanDevice::probeComputeCapability): GPU compute probe failed. "
729 "The Vulkan device was created but cannot execute compute shaders "
730 "(common on CI runners or systems without full GPU compute support). "
731 "VkResult code: " + std::to_string(result));
732 }
733 }
734
735 if (!completed) {
736 cleanup();
737 helios_runtime_error("ERROR (VulkanDevice::probeComputeCapability): GPU compute probe timed out after 5 seconds.");
738 }
739
740 // 9. Verify the shader actually wrote to the buffer
741 uint32_t readback = *static_cast<uint32_t *>(alloc_result.pMappedData);
742 cleanup();
743
744 if (readback != 42) {
745 helios_runtime_error("ERROR (VulkanDevice::probeComputeCapability): GPU compute probe produced wrong result "
746 "(expected 42, got " + std::to_string(readback) + "). "
747 "The GPU may not support compute shader storage buffer writes.");
748 }
749 }
750
752 // Device info is not printed during normal operation
753 }
754
755} // namespace helios