0.1.8
Loading...
Searching...
No Matches
gpu_detector.py
Go to the documentation of this file.
1"""
2Runtime GPU capability detection for PyHelios.
3
4This module provides runtime detection of GPU hardware and driver availability,
5which is essential for determining whether GPU-accelerated tests should run.
6"""
7
8import platform
9import subprocess
10import logging
11from typing import Dict, Optional
12
13logger = logging.getLogger(__name__)
14
15
16def is_gpu_runtime_available() -> bool:
17 """
18 Check if GPU hardware and drivers are actually available at runtime.
19
20 This is different from build-time CUDA availability - this checks if
21 the GPU can actually be used for computation at runtime.
22
23 Returns:
24 bool: True if GPU is available for runtime use, False otherwise
25 """
26 try:
28 return info.get('cuda_runtime_available', False) or info.get('opencl_available', False)
29 except Exception as e:
30 logger.debug(f"GPU runtime check failed: {e}")
31 return False
32
33
34def get_gpu_runtime_info() -> Dict[str, any]:
35 """
36 Get detailed information about GPU runtime capabilities.
37
38 Returns:
39 Dict containing GPU runtime information:
40 - cuda_runtime_available: bool
41 - cuda_device_count: int
42 - cuda_version: str
43 - opencl_available: bool
44 - platform: str
45 - error_message: str (if any errors)
46 """
47 info = {
48 'cuda_runtime_available': False,
49 'cuda_device_count': 0,
50 'cuda_version': None,
51 'opencl_available': False,
52 'platform': platform.system(),
53 'error_message': None
54 }
55
56 # Check CUDA runtime capability
57 try:
58 info.update(_check_cuda_runtime())
59 except Exception as e:
60 info['error_message'] = f"CUDA check failed: {e}"
61 logger.debug(f"CUDA runtime check failed: {e}")
62
63 # Check OpenCL capability (fallback for some systems)
64 try:
65 info['opencl_available'] = _check_opencl_runtime()
66 except Exception as e:
67 logger.debug(f"OpenCL runtime check failed: {e}")
68
69 return info
70
71
72def _check_cuda_runtime() -> Dict[str, any]:
73 """
74 Check CUDA runtime availability by trying to initialize CUDA.
75
76 Returns:
77 Dict with CUDA runtime information
78 """
79 cuda_info = {
80 'cuda_runtime_available': False,
81 'cuda_device_count': 0,
82 'cuda_version': None
83 }
84
85 # Method 1: Try nvidia-smi (most reliable for checking GPU hardware)
86 try:
87 result = subprocess.run(
88 ["nvidia-smi", "--query-gpu=count", "--format=csv,noheader,nounits"],
89 capture_output=True, text=True, timeout=10
90 )
91 if result.returncode == 0:
92 device_count = len(result.stdout.strip().split('\n'))
93 if device_count > 0:
94 cuda_info['cuda_device_count'] = device_count
95 cuda_info['cuda_runtime_available'] = True
96 logger.debug(f"nvidia-smi detected {device_count} GPU(s)")
97
98 # Try to get CUDA version
99 try:
100 version_result = subprocess.run(
101 ["nvidia-smi", "--query-gpu=driver_version", "--format=csv,noheader,nounits"],
102 capture_output=True, text=True, timeout=5
103 )
104 if version_result.returncode == 0:
105 cuda_info['cuda_version'] = version_result.stdout.strip().split('\n')[0]
106 except Exception:
107 pass
108
109 return cuda_info
110 except (FileNotFoundError, subprocess.TimeoutExpired):
111 logger.debug("nvidia-smi not available")
112
113 # Method 2: Try CUDA runtime library direct check
114 try:
115 # Try to import pycuda if available (optional dependency)
116 import pycuda.driver as cuda
117 import pycuda.autoinit
118
119 cuda.init()
120 device_count = cuda.Device.count()
121
122 if device_count > 0:
123 cuda_info['cuda_device_count'] = device_count
124 cuda_info['cuda_runtime_available'] = True
125
126 # Get CUDA version
127 try:
128 cuda_info['cuda_version'] = ".".join(map(str, cuda.get_version()))
129 except Exception:
130 pass
131
132 logger.debug(f"PyCUDA detected {device_count} GPU(s)")
133 return cuda_info
134
135 except ImportError:
136 logger.debug("PyCUDA not available")
137 except Exception as e:
138 logger.debug(f"PyCUDA initialization failed: {e}")
139
140 # Method 3: Try nvidia-ml-py if available (alternative NVIDIA library)
141 try:
142 import pynvml
143 pynvml.nvmlInit()
144 device_count = pynvml.nvmlDeviceGetCount()
145
146 if device_count > 0:
147 cuda_info['cuda_device_count'] = device_count
148 cuda_info['cuda_runtime_available'] = True
149
150 try:
151 # Get driver version from first device
152 handle = pynvml.nvmlDeviceGetHandleByIndex(0)
153 cuda_info['cuda_version'] = pynvml.nvmlSystemGetDriverVersion().decode('utf-8')
154 except Exception:
155 pass
156
157 logger.debug(f"pynvml detected {device_count} GPU(s)")
158 return cuda_info
159
160 except ImportError:
161 logger.debug("pynvml not available")
162 except Exception as e:
163 logger.debug(f"pynvml initialization failed: {e}")
164
165 # Method 4: Try checking for CUDA device files on Linux
166 if platform.system() == "Linux":
167 try:
168 import os
169 nvidia_devices = [f for f in os.listdir("/dev") if f.startswith("nvidia") and f != "nvidiactl"]
170 if nvidia_devices:
171 cuda_info['cuda_device_count'] = len(nvidia_devices)
172 cuda_info['cuda_runtime_available'] = True
173 logger.debug(f"Found {len(nvidia_devices)} NVIDIA device files in /dev")
174 return cuda_info
175 except Exception:
176 pass
177
178 logger.debug("No CUDA runtime capability detected")
179 return cuda_info
180
181
182def _check_opencl_runtime() -> bool:
183 """
184 Check OpenCL runtime availability.
185
186 Returns:
187 bool: True if OpenCL is available, False otherwise
188 """
189 try:
190 import pyopencl as cl
191 platforms = cl.get_platforms()
193 for platform in platforms:
194 devices = platform.get_devices()
195 if devices:
196 logger.debug(f"OpenCL available with {len(devices)} devices on platform {platform.name}")
197 return True
198
199 except ImportError:
200 logger.debug("PyOpenCL not available")
201 except Exception as e:
202 logger.debug(f"OpenCL check failed: {e}")
203
204 return False
Dict[str, any] _check_cuda_runtime()
Check CUDA runtime availability by trying to initialize CUDA.
bool _check_opencl_runtime()
Check OpenCL runtime availability.
bool is_gpu_runtime_available()
Check if GPU hardware and drivers are actually available at runtime.
Dict[str, any] get_gpu_runtime_info()
Get detailed information about GPU runtime capabilities.