1.3.72
 
Loading...
Searching...
No Matches
OptiX8DeviceCode.cu
Go to the documentation of this file.
1
16#include <optix.h>
17#include <optix_device.h>
18#include "OptiX8Math.h"
19#include "OptiX8LaunchParams.h"
20
21// ---------------------------------------------------------------------------
22// Launch params declared as constant memory (filled by optixLaunch)
23// ---------------------------------------------------------------------------
24extern "C" __constant__ OptiX8LaunchParams params;
25
26// ---------------------------------------------------------------------------
27// Device utility functions (adapted from RayTracing.cuh, OptiX-API-agnostic)
28// ---------------------------------------------------------------------------
29
30static __forceinline__ __device__ unsigned int lcg(unsigned int &prev) {
31 const unsigned int LCG_A = 1664525u;
32 const unsigned int LCG_C = 1013904223u;
33 prev = (LCG_A * prev + LCG_C);
34 return prev & 0x00FFFFFF;
35}
36
37static __forceinline__ __device__ float rnd(unsigned int &prev) {
38 return (float)lcg(prev) / (float)0x01000000;
39}
40
41template<unsigned int N>
42static __forceinline__ __device__ unsigned int tea(unsigned int val0, unsigned int val1) {
43 unsigned int v0 = val0, v1 = val1, s0 = 0;
44 for (unsigned int n = 0; n < N; n++) {
45 s0 += 0x9e3779b9;
46 v0 += ((v1 << 4) + 0xa341316c) ^ (v1 + s0) ^ ((v1 >> 5) + 0xc8013ea4);
47 v1 += ((v0 << 4) + 0xad90777d) ^ (v0 + s0) ^ ((v0 >> 5) + 0x7e95761e);
48 }
49 return v0;
50}
51
52__device__ __forceinline__ void atomicFloatAdd(float *address, float val) {
53 atomicAdd(address, val);
54}
55
56__device__ __forceinline__ void d_transformPoint(const float (&T)[16], float3 &v) {
57 float3 V;
58 V.x = T[0]*v.x + T[1]*v.y + T[2]*v.z + T[3];
59 V.y = T[4]*v.x + T[5]*v.y + T[6]*v.z + T[7];
60 V.z = T[8]*v.x + T[9]*v.y + T[10]*v.z + T[11];
61 v = V;
62}
63
64__device__ __forceinline__ float3 d_rotatePoint(const float3 &pos, float theta, float phi) {
65 float st = sinf(theta), ct = cosf(theta);
66 float sp = sinf(phi), cp = cosf(phi);
67 float3 tmp;
68 tmp.x = cp*ct*pos.x + (-sp)*pos.y + cp*st*pos.z;
69 tmp.y = sp*ct*pos.x + cp*pos.y + sp*st*pos.z;
70 tmp.z = -st*pos.x + ct*pos.z;
71 return tmp;
72}
73
74__device__ __forceinline__ float d_magnitude(const float3 v) {
75 return sqrtf(v.x*v.x + v.y*v.y + v.z*v.z);
76}
77
78static __forceinline__ __device__ float acos_safe(float x) {
79 return acosf(fmaxf(-1.f, fminf(1.f, x)));
80}
81
82static __forceinline__ __device__ float asin_safe(float x) {
83 return asinf(fmaxf(-1.f, fminf(1.f, x)));
84}
85
86// Evaluates the diffuse angular distribution (fd factor).
87// Priority 1: Power-law (Harrison & Coombes) if extinction > 0
88// Priority 2: Prague sky model if sky_params.w > 0
89// Priority 3: Isotropic (returns 1.0)
90static __device__ float evaluateDiffuseAngularDistribution(const float3 &ray_dir, const float3 &peak_dir,
91 float power_law_K, float power_law_norm,
92 const float4 &sky_params) {
93 if (power_law_K > 0.f) {
94 float psi = acos_safe(dot(peak_dir, ray_dir));
95 psi = fmaxf(psi, M_PI / 180.f);
96 return powf(psi, -power_law_K) * power_law_norm;
97 }
98 if (sky_params.w > 0.f) {
99 float gamma = acos_safe(dot(ray_dir, peak_dir)) * 180.f / M_PI;
100 float cos_theta = fmaxf(ray_dir.z, 0.f);
101 float pattern = (1.f + sky_params.x * expf(-gamma / sky_params.y))
102 * (1.f + (sky_params.z - 1.f) * (1.f - cos_theta));
103 return pattern * sky_params.w * M_PI;
104 }
105 return 1.f; // isotropic
106}
107
108// Load transform matrix for primitive at global position pos
109__device__ __forceinline__ void loadTransformMatrix(uint32_t pos, float (&T)[16]) {
110 for (int i = 0; i < 16; i++) {
111 T[i] = params.transform_matrix[pos * 16 + i];
112 }
113}
114
115// Sample texture mask at UV (uv_u, uv_v) in [0,1]^2 for mask at index msk_id.
116// Returns true if the texel is opaque (intersection should be reported),
117// false if transparent (reject the hit).
118// Standard texture convention: uv_v=0 maps to the top row (iy=0).
119static __forceinline__ __device__ bool sampleMask(int32_t msk_id, float uv_u, float uv_v) {
120 if (msk_id < 0) return true; // no mask → always opaque
121 const int32_t width = params.mask_sizes[msk_id * 2];
122 const int32_t height = params.mask_sizes[msk_id * 2 + 1];
123 const uint32_t offset = params.mask_offsets[msk_id];
124 int ix = (int)(floorf(float(width - 1) * uv_u));
125 int iy = (int)(floorf(float(height - 1) * (1.f - uv_v)));
126 ix = max(0, min(ix, width - 1));
127 iy = max(0, min(iy, height - 1));
128 return params.mask_data[offset + (uint32_t)(iy * width + ix)] != 0u;
129}
130
131// Build a rotation-only 4×4 transform matrix (row-major) from Euler angles (rx, ry, rz)
132// Matches OptiX 6's d_makeTransformMatrix convention.
133static __forceinline__ __device__ void d_makeTransformMatrix(float3 rotation, float (&T)[16]) {
134 float sx = sinf(rotation.x), cx = cosf(rotation.x);
135 float sy = sinf(rotation.y), cy = cosf(rotation.y);
136 float sz = sinf(rotation.z), cz = cosf(rotation.z);
137 T[0] = cz * cy; T[1] = cz * sy * sx - sz * cx; T[2] = cz * sy * cx + sz * sx; T[3] = 0.f;
138 T[4] = sz * cy; T[5] = sz * sy * sx + cz * cx; T[6] = sz * sy * cx - cz * sx; T[7] = 0.f;
139 T[8] = -sy; T[9] = cy * sx; T[10] = cy * cx; T[11] = 0.f;
140 T[12] = 0.f; T[13] = 0.f; T[14] = 0.f; T[15] = 1.f;
141}
142
143// Sample a point uniformly on the unit disk in the xy-plane (z=0).
144// Based on Suffern (2007) "Ray Tracing from the Ground Up", Ch. 6 concentric mapping.
145static __forceinline__ __device__ void d_sampleDisk(uint32_t &seed, float3 &sample) {
146 float Rx = rnd(seed), Ry = rnd(seed);
147 float sp_x = -1.f + 2.f * Rx;
148 float sp_y = -1.f + 2.f * Ry;
149 float r, p;
150 if (sp_x > -sp_y) {
151 if (sp_x > sp_y) { r = sp_x; p = sp_y / sp_x; }
152 else { r = sp_y; p = 2.f - sp_x / sp_y; }
153 } else {
154 if (sp_x < sp_y) { r = -sp_x; p = 4.f + sp_y / sp_x; }
155 else { r = -sp_y; p = (sp_y != 0.f) ? 6.f - sp_x / sp_y : 0.f; }
156 }
157 p *= 0.25f * M_PI;
158 sample = make_float3(r * cosf(p), r * sinf(p), 0.f);
159}
160
161// Sample a point uniformly on the unit square [-0.5,0.5]^2 in the xy-plane (z=0).
162static __forceinline__ __device__ void d_sampleSquare(uint32_t &seed, float3 &sample) {
163 sample = make_float3(-0.5f + rnd(seed), -0.5f + rnd(seed), 0.f);
164}
165
166// Invert a 4×4 row-major matrix (used for rect/disk source intersection tests).
167static __forceinline__ __device__ void d_invertMatrix(const float (&m)[16], float (&minv)[16]) {
168 float inv[16];
169 inv[0] = m[5]*m[10]*m[15] - m[5]*m[11]*m[14] - m[9]*m[6]*m[15] + m[9]*m[7]*m[14] + m[13]*m[6]*m[11] - m[13]*m[7]*m[10];
170 inv[4] = -m[4]*m[10]*m[15] + m[4]*m[11]*m[14] + m[8]*m[6]*m[15] - m[8]*m[7]*m[14] - m[12]*m[6]*m[11] + m[12]*m[7]*m[10];
171 inv[8] = m[4]*m[9]*m[15] - m[4]*m[11]*m[13] - m[8]*m[5]*m[15] + m[8]*m[7]*m[13] + m[12]*m[5]*m[11] - m[12]*m[7]*m[9];
172 inv[12] = -m[4]*m[9]*m[14] + m[4]*m[10]*m[13] + m[8]*m[5]*m[14] - m[8]*m[6]*m[13] - m[12]*m[5]*m[10] + m[12]*m[6]*m[9];
173 inv[1] = -m[1]*m[10]*m[15] + m[1]*m[11]*m[14] + m[9]*m[2]*m[15] - m[9]*m[3]*m[14] - m[13]*m[2]*m[11] + m[13]*m[3]*m[10];
174 inv[5] = m[0]*m[10]*m[15] - m[0]*m[11]*m[14] - m[8]*m[2]*m[15] + m[8]*m[3]*m[14] + m[12]*m[2]*m[11] - m[12]*m[3]*m[10];
175 inv[9] = -m[0]*m[9]*m[15] + m[0]*m[11]*m[13] + m[8]*m[1]*m[15] - m[8]*m[3]*m[13] - m[12]*m[1]*m[11] + m[12]*m[3]*m[9];
176 inv[13] = m[0]*m[9]*m[14] - m[0]*m[10]*m[13] - m[8]*m[1]*m[14] + m[8]*m[2]*m[13] + m[12]*m[1]*m[10] - m[12]*m[2]*m[9];
177 inv[2] = m[1]*m[6]*m[15] - m[1]*m[7]*m[14] - m[5]*m[2]*m[15] + m[5]*m[3]*m[14] + m[13]*m[2]*m[7] - m[13]*m[3]*m[6];
178 inv[6] = -m[0]*m[6]*m[15] + m[0]*m[7]*m[14] + m[4]*m[2]*m[15] - m[4]*m[3]*m[14] - m[12]*m[2]*m[7] + m[12]*m[3]*m[6];
179 inv[10] = m[0]*m[5]*m[15] - m[0]*m[7]*m[13] - m[4]*m[1]*m[15] + m[4]*m[3]*m[13] + m[12]*m[1]*m[7] - m[12]*m[3]*m[5];
180 inv[14] = -m[0]*m[5]*m[14] + m[0]*m[6]*m[13] + m[4]*m[1]*m[14] - m[4]*m[2]*m[13] - m[12]*m[1]*m[6] + m[12]*m[2]*m[5];
181 inv[3] = -m[1]*m[6]*m[11] + m[1]*m[7]*m[10] + m[5]*m[2]*m[11] - m[5]*m[3]*m[10] - m[9]*m[2]*m[7] + m[9]*m[3]*m[6];
182 inv[7] = m[0]*m[6]*m[11] - m[0]*m[7]*m[10] - m[4]*m[2]*m[11] + m[4]*m[3]*m[10] + m[8]*m[2]*m[7] - m[8]*m[3]*m[6];
183 inv[11] = -m[0]*m[5]*m[11] + m[0]*m[7]*m[9] + m[4]*m[1]*m[11] - m[4]*m[3]*m[9] - m[8]*m[1]*m[7] + m[8]*m[3]*m[5];
184 inv[15] = m[0]*m[5]*m[10] - m[0]*m[6]*m[9] - m[4]*m[1]*m[10] + m[4]*m[2]*m[9] + m[8]*m[1]*m[6] - m[8]*m[2]*m[5];
185 float det = m[0]*inv[0] + m[1]*inv[4] + m[2]*inv[8] + m[3]*inv[12];
186 det = 1.0f / det;
187 for (int i = 0; i < 16; i++) minv[i] = inv[i] * det;
188}
189
190// Test if ray hits a sphere source (any intersection in front of origin)
191static __forceinline__ __device__ bool d_raySphereIntersect(const float3 &ray_origin, const float3 &ray_direction,
192 const float3 &sphere_center, float sphere_radius) {
193 const float3 oc = make_float3(ray_origin.x - sphere_center.x, ray_origin.y - sphere_center.y,
194 ray_origin.z - sphere_center.z);
195 const float b = dot(oc, ray_direction);
196 const float c = dot(oc, oc) - sphere_radius * sphere_radius;
197 const float disc = b * b - c;
198 if (disc < 0.0f) return false;
199 return (-b - sqrtf(disc)) > 0.0f;
200}
201
202// Test if ray hits the front face of a rectangular source
203static __forceinline__ __device__ bool d_rayRectangleIntersect(const float3 &ray_origin, const float3 &ray_direction,
204 const float3 &rect_center, float rect_width, float rect_length,
205 const float3 &rect_rotation, float &out_cos_angle) {
206 float transform[16];
207 d_makeTransformMatrix(rect_rotation, transform);
208 const float3 normal = make_float3(transform[2], transform[6], transform[10]);
209 const float denom = dot(ray_direction, normal);
210 if (denom >= -1e-6f) return false;
211 const float3 oc = make_float3(rect_center.x - ray_origin.x, rect_center.y - ray_origin.y,
212 rect_center.z - ray_origin.z);
213 const float t = dot(oc, normal) / denom;
214 if (t <= 0.0f) return false;
215 float3 hit = make_float3(ray_origin.x + t * ray_direction.x - rect_center.x,
216 ray_origin.y + t * ray_direction.y - rect_center.y,
217 ray_origin.z + t * ray_direction.z - rect_center.z);
218 float inv_t[16];
219 d_invertMatrix(transform, inv_t);
220 d_transformPoint(inv_t, hit);
221 if (fabsf(hit.x) > rect_width * 0.5f || fabsf(hit.y) > rect_length * 0.5f) return false;
222 out_cos_angle = -denom;
223 return true;
224}
225
226// Test if ray hits the front face of a disk source
227static __forceinline__ __device__ bool d_rayDiskIntersect(const float3 &ray_origin, const float3 &ray_direction,
228 const float3 &disk_center, float disk_radius,
229 const float3 &disk_rotation, float &out_cos_angle) {
230 float transform[16];
231 d_makeTransformMatrix(disk_rotation, transform);
232 const float3 normal = make_float3(transform[2], transform[6], transform[10]);
233 const float denom = dot(ray_direction, normal);
234 if (denom >= -1e-6f) return false;
235 const float3 oc = make_float3(disk_center.x - ray_origin.x, disk_center.y - ray_origin.y,
236 disk_center.z - ray_origin.z);
237 const float t = dot(oc, normal) / denom;
238 if (t <= 0.0f) return false;
239 const float3 hit = make_float3(ray_origin.x + t * ray_direction.x - disk_center.x,
240 ray_origin.y + t * ray_direction.y - disk_center.y,
241 ray_origin.z + t * ray_direction.z - disk_center.z);
242 if (dot(hit, hit) > disk_radius * disk_radius) return false;
243 out_cos_angle = -denom;
244 return true;
245}
246
247// ---------------------------------------------------------------------------
248// PerRayData accessor (uses getPRD() from OptiX8LaunchParams.h)
249// ---------------------------------------------------------------------------
250
251// getPRD() is defined in OptiX8LaunchParams.h (guarded by #ifdef __CUDACC__)
252
253// ---------------------------------------------------------------------------
254// Intersection dispatch: handles all primitive types via primitive_type lookup.
255// All hitgroup SBT records reference this single entry point.
256// prim_idx = optixGetPrimitiveIndex() is the global AABB index (= global pos).
257// UUID is read from params.primitive_uuid[prim_idx] (global pos → UUID array).
258// Triangle and patch vertices are derived from the transform matrix using
259// canonical-space vertices that match those used in buildAABBs() on the host.
260// ---------------------------------------------------------------------------
261
262extern "C" __global__ void __intersection__patch() {
263 const uint32_t prim_idx = optixGetPrimitiveIndex();
264 const uint32_t pos = prim_idx; // global AABB index == global primitive position
265 const uint32_t ptype = params.primitive_type[pos];
266
267 const float3 ray_origin = optixGetWorldRayOrigin();
268 const float3 ray_direction = optixGetWorldRayDirection();
269 const float t_min = optixGetRayTmin();
270 const float t_max = optixGetRayTmax();
271
272 const uint32_t uuid = params.primitive_uuid[prim_idx];
273
274 if (ptype == 5) {
275 // ---- Bbox face: planar quad intersection (periodic boundary wall) ----
276 // Each bbox has 4 world-space vertices stored at bbox_vertices[bbox_local * 4 + v].
277 const uint32_t bbox_local = uuid - params.bbox_UUID_base;
278 const float3 v0 = params.bbox_vertices[bbox_local * 4 + 0];
279 const float3 v1 = params.bbox_vertices[bbox_local * 4 + 1];
280 const float3 v2 = params.bbox_vertices[bbox_local * 4 + 2];
281 const float3 v3 = params.bbox_vertices[bbox_local * 4 + 3];
282
283 // Quad normal from two edges
284 float3 e1 = make_float3(v1.x - v0.x, v1.y - v0.y, v1.z - v0.z);
285 float3 e3 = make_float3(v3.x - v0.x, v3.y - v0.y, v3.z - v0.z);
286 float3 n = cross(e1, e3);
287 float nd = dot(ray_direction, n);
288 if (fabsf(nd) < 1e-8f) return; // ray parallel to face
289
290 float t = dot(make_float3(v0.x - ray_origin.x, v0.y - ray_origin.y, v0.z - ray_origin.z), n) / nd;
291 if (t < t_min || t > t_max) return;
292
293 // Hit point must lie within the bounding box of the quad vertices
294 float3 hit = make_float3(ray_origin.x + t * ray_direction.x,
295 ray_origin.y + t * ray_direction.y,
296 ray_origin.z + t * ray_direction.z);
297 const float slack = 1e-4f;
298 float mnx = fminf(fminf(v0.x, v1.x), fminf(v2.x, v3.x)) - slack;
299 float mxx = fmaxf(fmaxf(v0.x, v1.x), fmaxf(v2.x, v3.x)) + slack;
300 float mny = fminf(fminf(v0.y, v1.y), fminf(v2.y, v3.y)) - slack;
301 float mxy = fmaxf(fmaxf(v0.y, v1.y), fmaxf(v2.y, v3.y)) + slack;
302 float mnz = fminf(fminf(v0.z, v1.z), fminf(v2.z, v3.z)) - slack;
303 float mxz = fmaxf(fmaxf(v0.z, v1.z), fmaxf(v2.z, v3.z)) + slack;
304 if (hit.x < mnx || hit.x > mxx || hit.y < mny || hit.y > mxy ||
305 hit.z < mnz || hit.z > mxz) return;
306
307 optixReportIntersection(t, 0, uuid, 0u);
308 return;
309 }
310
311 if (ptype != 0 && ptype != 1 && ptype != 3) return; // only patch, triangle, tile
312
313 float T[16];
314 loadTransformMatrix(pos, T);
315
316 if (ptype == 0 || ptype == 3) {
317 // ---- Patch: rectangle in canonical [-0.5, 0.5]^2 space ----
318 // Normal = third column of rotation part of T
319 float3 normal = make_float3(T[2], T[6], T[10]);
320 float nd = dot(ray_direction, normal);
321 if (fabsf(nd) < 1e-8f) return; // ray parallel to patch plane
322
323 // Patch centroid is the translation column of T
324 float3 patch_origin = make_float3(T[3], T[7], T[11]);
325 float t = dot(patch_origin - ray_origin, normal) / nd;
326 if (t < t_min || t > t_max) return;
327
328 // Project hit point into patch-local 2D coordinates
329 float3 hit_local = ray_origin + t * ray_direction - patch_origin;
330 float3 local_x = make_float3(T[0], T[4], T[8]);
331 float3 local_y = make_float3(T[1], T[5], T[9]);
332 float lx2 = dot(local_x, local_x);
333 float ly2 = dot(local_y, local_y);
334 if (lx2 < 1e-12f || ly2 < 1e-12f) return;
335 float u = dot(hit_local, local_x) / lx2;
336 float v = dot(hit_local, local_y) / ly2;
337 if (u < -0.5f || u > 0.5f || v < -0.5f || v > 0.5f) return;
338
339 // Texture mask check
340 const int32_t msk_id = params.mask_IDs[pos];
341 if (msk_id >= 0) {
342 float uv_u, uv_v;
343 if (params.uv_IDs[pos] >= 0) {
344 // Custom UV: bilinear from stored corner UVs
345 float2 uv0 = params.uv_data[pos * 4 + 0]; // UV at (u=0, v=0) corner
346 float2 uv1 = params.uv_data[pos * 4 + 1]; // UV at (u=1, v=0) corner
347 float2 uv2 = params.uv_data[pos * 4 + 2]; // UV at (u=0, v=1) corner
348 float du = uv1.x - uv0.x;
349 float dv = uv2.y - uv0.y;
350 uv_u = uv0.x + (u + 0.5f) * du;
351 uv_v = uv0.y + (v + 0.5f) * dv;
352 } else {
353 // Parametric UV: remap from [-0.5, 0.5] to [0, 1]
354 uv_u = u + 0.5f;
355 uv_v = v + 0.5f;
356 }
357 if (!sampleMask(msk_id, uv_u, uv_v)) return;
358 }
359
360 uint32_t face_attr = (nd < 0.f) ? 1u : 0u;
361 if (!face_attr && !params.twosided_flag[pos]) return;
362 optixReportIntersection(t, 0, uuid, face_attr);
363
364 } else {
365 // ---- Triangle: canonical vertices (0,0,0), (0,1,0), (1,1,0) ----
366 // World-space vertices via T (consistent with buildAABBs canonical vertices)
367 const float3 v0 = make_float3(T[3], T[7], T[11]);
368 const float3 v1 = make_float3(T[1] + T[3], T[5] + T[7], T[9] + T[11]);
369 const float3 v2 = make_float3(T[0] + T[1] + T[3], T[4] + T[5] + T[7], T[8] + T[9] + T[11]);
370
371 // Shirley's ray-triangle intersection (Möller–Trumbore style)
372 float a = v0.x - v1.x, b = v0.x - v2.x, c = ray_direction.x, d = v0.x - ray_origin.x;
373 float e = v0.y - v1.y, f = v0.y - v2.y, g = ray_direction.y, h = v0.y - ray_origin.y;
374 float i = v0.z - v1.z, j = v0.z - v2.z, k = ray_direction.z, l = v0.z - ray_origin.z;
375
376 float m = f * k - g * j, n = h * k - g * l, p = f * l - h * j;
377 float q = g * i - e * k, s = e * j - f * i;
378
379 float tri_denom = a * m + b * q + c * s;
380 if (fabsf(tri_denom) < 1e-12f) return;
381 float inv_denom = 1.f / tri_denom;
382
383 float e1 = d * m - b * n - c * p;
384 float beta = e1 * inv_denom;
385 if (beta < 0.f) return;
386
387 float r = e * l - h * i;
388 float e2 = a * n + d * q + c * r;
389 float gamma = e2 * inv_denom;
390 if (gamma < 0.f || beta + gamma > 1.f) return;
391
392 float e3 = a * p - b * r + d * s;
393 float t = e3 * inv_denom;
394 if (t < t_min || t > t_max) return;
395
396 // Texture mask check (using barycentric UV)
397 const int32_t msk_id = params.mask_IDs[pos];
398 if (msk_id >= 0) {
399 float uv_u, uv_v;
400 if (params.uv_IDs[pos] >= 0) {
401 // Custom UV: interpolate from stored per-vertex UV
402 float2 uv0 = params.uv_data[pos * 4 + 0];
403 float2 uv1 = params.uv_data[pos * 4 + 1];
404 float2 uv2 = params.uv_data[pos * 4 + 2];
405 // beta = weight at v1, gamma = weight at v2, (1-beta-gamma) = weight at v0
406 float2 uv = make_float2(uv0.x + beta * (uv1.x - uv0.x) + gamma * (uv2.x - uv0.x),
407 uv0.y + beta * (uv1.y - uv0.y) + gamma * (uv2.y - uv0.y));
408 uv_u = uv.x;
409 uv_v = 1.f - uv.y; // Y-flip to match OptiX 6 convention
410 } else {
411 // Parametric UV: use barycentric coordinates directly
412 uv_u = beta + gamma; // along u-axis (v1 is at u=1)
413 uv_v = gamma; // along v-axis (v2 is at v=1)
414 }
415 if (!sampleMask(msk_id, uv_u, uv_v)) return;
416 }
417
418 // Face from cross product normal vs ray direction
419 float3 edge0 = make_float3(v1.x - v0.x, v1.y - v0.y, v1.z - v0.z);
420 float3 edge1 = make_float3(v2.x - v0.x, v2.y - v0.y, v2.z - v0.z);
421 float3 tri_nrm = make_float3(edge0.y * edge1.z - edge0.z * edge1.y,
422 edge0.z * edge1.x - edge0.x * edge1.z,
423 edge0.x * edge1.y - edge0.y * edge1.x);
424 uint32_t face_attr = (dot(ray_direction, tri_nrm) < 0.f) ? 1u : 0u;
425 if (!face_attr && !params.twosided_flag[pos]) return;
426 optixReportIntersection(t, 0, uuid, face_attr);
427 }
428}
429
430extern "C" __global__ void __intersection__disk() {
431 // Disk intersection is not yet implemented. Disk primitives are rejected at
432 // the host level in updateGeometry(), so this program is never invoked.
433}
434
435extern "C" __global__ void __intersection__tile() {
436 // Tile intersection is handled by __intersection__patch (ptype == 3).
437}
438
439extern "C" __global__ void __intersection__voxel() {
440 // Voxel intersection is not yet implemented. Voxel primitives are rejected at
441 // the host level in updateGeometry(), so this program is never invoked.
442}
443
444extern "C" __global__ void __intersection__bbox() {
445 // Bbox intersection is not yet implemented in this backend.
446}
447
448// ---------------------------------------------------------------------------
449// Miss programs
450// ---------------------------------------------------------------------------
451
452extern "C" __global__ void __miss__direct() {
453 PerRayData *prd = getPayloadPRD();
454
455 const uint32_t origin_position = params.primitive_positions[prd->origin_UUID];
456 const uint32_t Nprims = params.Nprimitives;
457 const uint32_t Nbands_global = params.Nbands_global;
458 const uint32_t Nbands_launch = params.Nbands_launch;
459
460 int b = -1;
461 for (uint32_t b_global = 0; b_global < Nbands_global; b_global++) {
462 if (!params.band_launch_flag[b_global]) continue;
463 b++;
464
465 // radiation_in layout: [prim * Nbands_launch + band_launch]
466 const uint32_t ind_origin = origin_position * Nbands_launch + (uint32_t)b;
467
468 // rho/tau layout: [source * Nprims * Nbands_global + prim * Nbands_global + band_global]
469 const uint32_t radprop_ind = prd->source_ID * Nprims * Nbands_global
470 + origin_position * Nbands_global
471 + b_global;
472 const float t_rho = params.rho[radprop_ind];
473 const float t_tau = params.tau[radprop_ind];
474
475 // source_fluxes layout: [source * Nbands_launch + band_launch]
476 const uint32_t flux_idx = prd->source_ID * Nbands_launch + (uint32_t)b;
477 const float source_flux = params.source_fluxes[flux_idx];
478
479 const double strength = prd->strength * (double)source_flux;
480 const float absorption = (float)(strength * (1.0 - t_rho - t_tau));
481
482 atomicFloatAdd(&params.radiation_in[ind_origin], absorption);
483
484 if (t_rho > 0.f || t_tau > 0.f) {
485 if (prd->face) {
486 atomicFloatAdd(&params.scatter_buff_top[ind_origin], (float)(strength * t_rho));
487 atomicFloatAdd(&params.scatter_buff_bottom[ind_origin], (float)(strength * t_tau));
488 } else {
489 atomicFloatAdd(&params.scatter_buff_bottom[ind_origin], (float)(strength * t_rho));
490 atomicFloatAdd(&params.scatter_buff_top[ind_origin], (float)(strength * t_tau));
491 }
492 }
493
494 // Camera-weighted scatter: mirrors scatter_buff but uses rho_cam/tau_cam
495 if (params.Ncameras > 0 && params.rho_cam && params.scatter_buff_top_cam) {
496 const uint32_t Ncameras = params.Ncameras;
497 const uint32_t cam_id = params.camera_ID;
498 const uint32_t rc_idx = prd->source_ID * Nprims * Nbands_global * Ncameras
499 + origin_position * Nbands_global * Ncameras
500 + b_global * Ncameras + cam_id;
501 const float t_rho_cam = params.rho_cam[rc_idx];
502 const float t_tau_cam = params.tau_cam ? params.tau_cam[rc_idx] : 0.f;
503 if ((t_rho_cam > 0.f || t_tau_cam > 0.f) && strength > 0.0) {
504 if (prd->face) {
505 atomicFloatAdd(&params.scatter_buff_top_cam[ind_origin], (float)(strength * t_rho_cam));
506 atomicFloatAdd(&params.scatter_buff_bottom_cam[ind_origin], (float)(strength * t_tau_cam));
507 } else {
508 atomicFloatAdd(&params.scatter_buff_bottom_cam[ind_origin], (float)(strength * t_rho_cam));
509 atomicFloatAdd(&params.scatter_buff_top_cam[ind_origin], (float)(strength * t_tau_cam));
510 }
511 }
512 }
513
514 // Accumulate incident radiation for specular for ALL cameras (per source, camera-weighted).
515 // Direct rays are launched once (not per camera), so we must populate every camera's
516 // slot in radiation_specular here so each camera's closest-hit can read its own data.
517 if (params.radiation_specular && params.source_fluxes_cam && strength > 0.0) {
518 for (uint32_t cam = 0; cam < params.Ncameras; cam++) {
519 // source_fluxes_cam layout: [source][band][camera] (full 3D buffer uploaded in updateSources)
520 const uint32_t weight_idx = prd->source_ID * Nbands_launch * params.Ncameras
521 + (uint32_t)b * params.Ncameras + cam;
522 const float camera_weight = params.source_fluxes_cam[weight_idx];
523 // radiation_specular layout: [source][camera][primitive][band]
524 const uint32_t ind_specular = prd->source_ID * params.Ncameras * Nprims * Nbands_launch
525 + cam * Nprims * Nbands_launch
526 + origin_position * Nbands_launch + (uint32_t)b;
527 atomicFloatAdd(&params.radiation_specular[ind_specular], (float)(strength * camera_weight));
528 }
529 }
530 }
531}
532
533extern "C" __global__ void __miss__diffuse() {
534 PerRayData *prd = getPayloadPRD();
535
536 const uint32_t origin_position = params.primitive_positions[prd->origin_UUID];
537 if (origin_position == UINT_MAX) return;
538
539 const uint32_t Nprims = params.Nprimitives;
540 const uint32_t Nbands_global = params.Nbands_global;
541 const uint32_t Nbands_launch = params.Nbands_launch;
542
543 if (params.diffuse_flux == nullptr) {
544 printf("ERROR (OptiX8 __miss__diffuse): diffuse_flux is null. "
545 "Call updateDiffuseRadiation() before launchDiffuseRays().\n");
546 __trap();
547 }
548
549 const float3 ray_dir = optixGetWorldRayDirection();
550
551 int b = -1;
552 for (uint32_t b_global = 0; b_global < Nbands_global; b_global++) {
553 if (!params.band_launch_flag[b_global]) continue;
554 b++;
555
556 if (params.diffuse_flux[b] <= 0.f) continue;
557
558 const float4 sky_p = params.sky_radiance_params ? params.sky_radiance_params[b] : make_float4(0.f, 0.f, 0.f, 0.f);
559 const float3 peak_d = params.diffuse_peak_dir ? params.diffuse_peak_dir[b] : make_float3(0.f, 0.f, 1.f);
560 const float power_K = params.diffuse_extinction ? params.diffuse_extinction[b] : 0.f;
561 const float power_N = params.diffuse_dist_norm ? params.diffuse_dist_norm[b] : 1.f;
562
563 const float fd = evaluateDiffuseAngularDistribution(ray_dir, peak_d, power_K, power_N, sky_p);
564 const float strength = fd * params.diffuse_flux[b] * (float)prd->strength;
565
566 const uint32_t ind_origin = origin_position * Nbands_launch + (uint32_t)b;
567 const uint32_t radprop_ind = prd->source_ID * Nprims * Nbands_global
568 + origin_position * Nbands_global + b_global;
569 const float t_rho = params.rho[radprop_ind];
570 const float t_tau = params.tau[radprop_ind];
571
572 atomicFloatAdd(&params.radiation_in[ind_origin], strength * (1.f - t_rho - t_tau));
573
574 if (t_rho > 0.f || t_tau > 0.f) {
575 if (prd->face) { // top-face origin
576 atomicFloatAdd(&params.scatter_buff_top[ind_origin], strength * t_rho);
577 atomicFloatAdd(&params.scatter_buff_bottom[ind_origin], strength * t_tau);
578 } else { // bottom-face origin
579 atomicFloatAdd(&params.scatter_buff_bottom[ind_origin], strength * t_rho);
580 atomicFloatAdd(&params.scatter_buff_top[ind_origin], strength * t_tau);
581 }
582 }
583
584 // Camera-weighted scatter: mirrors scatter_buff but uses rho_cam/tau_cam
585 if (params.Ncameras > 0 && params.rho_cam && params.scatter_buff_top_cam) {
586 const uint32_t Ncameras = params.Ncameras;
587 const uint32_t cam_id = params.camera_ID;
588 const uint32_t rc_idx = prd->source_ID * Nprims * Nbands_global * Ncameras
589 + origin_position * Nbands_global * Ncameras
590 + b_global * Ncameras + cam_id;
591 const float t_rho_cam = params.rho_cam[rc_idx];
592 const float t_tau_cam = params.tau_cam ? params.tau_cam[rc_idx] : 0.f;
593 if ((t_rho_cam > 0.f || t_tau_cam > 0.f) && strength > 0.f) {
594 if (prd->face) {
595 atomicFloatAdd(&params.scatter_buff_top_cam[ind_origin], strength * t_rho_cam);
596 atomicFloatAdd(&params.scatter_buff_bottom_cam[ind_origin], strength * t_tau_cam);
597 } else {
598 atomicFloatAdd(&params.scatter_buff_bottom_cam[ind_origin], strength * t_rho_cam);
599 atomicFloatAdd(&params.scatter_buff_top_cam[ind_origin], strength * t_tau_cam);
600 }
601 }
602 }
603 }
604}
605
606extern "C" __global__ void __miss__camera() {
607 PerRayData *prd = getPayloadPRD();
608 const uint32_t pixel_idx = prd->origin_UUID;
609 const uint32_t Nbands_l = params.Nbands_launch;
610 const float3 ray_origin = optixGetWorldRayOrigin();
611 const float3 ray_dir = optixGetWorldRayDirection();
612
613 for (uint32_t b = 0; b < Nbands_l; b++) {
614 float radiance = 0.0f;
615
616 for (uint32_t s = 0; s < params.Nsources; s++) {
617 const float flux = params.source_fluxes[s * Nbands_l + b];
618 if (flux <= 0.0f) continue;
619
620 const uint32_t stype = params.source_types[s];
621 if (stype == 0 || stype == 2) {
622 // Collimated / sun-sphere: treat as solar disk
623 if (params.solar_disk_radiance && params.solar_disk_radiance[b] > 0.0f &&
624 dot(ray_dir, params.sun_direction) >= params.solar_disk_cos_angle) {
625 radiance += params.solar_disk_radiance[b];
626 }
627 } else if (stype == 1) {
628 // Sphere
629 if (d_raySphereIntersect(ray_origin, ray_dir, params.source_positions[s],
630 params.source_widths[s].x * 0.5f)) {
631 const float area = 4.0f * M_PI * params.source_widths[s].x * 0.5f * params.source_widths[s].x * 0.5f;
632 radiance += (flux / area) / M_PI;
633 }
634 } else if (stype == 3) {
635 // Rectangle
636 float cos_angle;
637 if (d_rayRectangleIntersect(ray_origin, ray_dir, params.source_positions[s],
638 params.source_widths[s].x, params.source_widths[s].y,
639 params.source_rotations[s], cos_angle)) {
640 const float area = params.source_widths[s].x * params.source_widths[s].y;
641 radiance += (flux / area) * cos_angle / M_PI;
642 }
643 } else if (stype == 4) {
644 // Disk
645 float cos_angle;
646 if (d_rayDiskIntersect(ray_origin, ray_dir, params.source_positions[s],
647 params.source_widths[s].x, params.source_rotations[s], cos_angle)) {
648 const float area = M_PI * params.source_widths[s].x * params.source_widths[s].x;
649 radiance += (flux / area) * cos_angle / M_PI;
650 }
651 }
652 }
653
654 // Sky radiance fallback
655 if (radiance <= 0.0f && params.camera_sky_radiance && params.camera_sky_radiance[b] > 0.0f) {
656 const float4 sky_p = params.sky_radiance_params ? params.sky_radiance_params[b]
657 : make_float4(0.f, 0.f, 0.f, 0.f);
658 radiance = params.camera_sky_radiance[b] *
659 evaluateDiffuseAngularDistribution(ray_dir, params.sun_direction, 0.0f, 1.0f, sky_p);
660 }
661
662 if (radiance > 0.0f) {
663 atomicFloatAdd(&params.radiation_in_camera[pixel_idx * Nbands_l + b],
664 radiance * (float)prd->strength);
665 }
666 }
667}
668
669extern "C" __global__ void __miss__pixel_label() {
670 PerRayData *prd = getPayloadPRD();
671 if (params.camera_pixel_depth) {
672 params.camera_pixel_depth[prd->origin_UUID] = -1.0f;
673 }
674}
675
676// ---------------------------------------------------------------------------
677// Closest-hit: direct radiation
678// ---------------------------------------------------------------------------
679
680// Shared helper: populate PerRayData with periodic boundary wrapping info.
681// Called from closesthit programs when a bbox (type-5) face is hit.
682// hit_uuid identifies which bbox face was hit (bbox_local = hit_uuid - bbox_UUID_base).
683// Bbox face ordering in RadiationModel.cpp:
684// x-only: 0=x-min, 1=x-max
685// y-only: 0=y-min, 1=y-max
686// xy: 0=x-min, 1=x-max, 2=y-min, 3=y-max
687// Vertex 0 of each face is always the "min" corner, so bbox_vertices[b*4+0] gives
688// the face's position (x or y coordinate) without floating-point tolerance issues.
689static __forceinline__ __device__ void handlePeriodicBoundaryHit(PerRayData *prd, uint32_t hit_uuid) {
690 const float t_hit = optixGetRayTmax();
691 const float3 ray_orig = optixGetWorldRayOrigin();
692 const float3 ray_dir = optixGetWorldRayDirection();
693 const float3 hit_pos = make_float3(ray_orig.x + t_hit * ray_dir.x,
694 ray_orig.y + t_hit * ray_dir.y,
695 ray_orig.z + t_hit * ray_dir.z);
696
697 const uint32_t bbox_local = hit_uuid - params.bbox_UUID_base;
698 prd->periodic_hit = hit_pos;
699
700 if (params.periodic_flag.x == 1 && bbox_local < 2) {
701 // x-faces: bbox 0 = x-min, bbox 1 = x-max
702 // vertex 0 of each face gives the face's x coordinate
703 const float xmin = params.bbox_vertices[0 * 4].x; // x-min face, any vertex .x = xmin
704 const float xmax = params.bbox_vertices[1 * 4].x; // x-max face, any vertex .x = xmax
705 const float width = xmax - xmin;
706 prd->periodic_hit.x += (bbox_local == 0) ? width : -width;
707 } else {
708 // y-faces: if x-periodic exists they are bboxes 2,3; otherwise 0,1
709 const uint32_t y_base = (params.periodic_flag.x == 1) ? 2u : 0u;
710 const float ymin = params.bbox_vertices[y_base * 4].y; // y-min face vertex 0 .y = ymin
711 const float ymax = params.bbox_vertices[(y_base + 1) * 4].y; // y-max face vertex 0 .y = ymax
712 const float width = ymax - ymin;
713 prd->periodic_hit.y += (bbox_local == y_base) ? width : -width;
714 }
715}
716
717extern "C" __global__ void __closesthit__direct() {
718 // A direct ray hit an obstacle — it is simply blocked.
719 // Exception: bbox (type-5) hits trigger periodic boundary wrapping.
720 if (params.periodic_flag.x == 0 && params.periodic_flag.y == 0) return;
721
722 const uint32_t hit_uuid = optixGetAttribute_0();
723 if (params.bbox_UUID_base == 0xFFFFFFFFu || hit_uuid < params.bbox_UUID_base) return;
724
725 PerRayData *prd = getPayloadPRD();
726 prd->hit_periodic_boundary = true;
727 handlePeriodicBoundaryHit(prd, hit_uuid);
728}
729
730// ---------------------------------------------------------------------------
731// Closest-hit: diffuse radiation
732// ---------------------------------------------------------------------------
733
734extern "C" __global__ void __closesthit__diffuse() {
735 const uint32_t hit_uuid = optixGetAttribute_0();
736 const bool face_top = (optixGetAttribute_1() == 1u);
737
738 PerRayData *prd = getPayloadPRD();
739
740 // Periodic boundary: bbox (type-5) hit — wrap ray and return without depositing energy
741 if ((params.periodic_flag.x == 1 || params.periodic_flag.y == 1) &&
742 params.bbox_UUID_base != 0xFFFFFFFFu && hit_uuid >= params.bbox_UUID_base) {
743 prd->hit_periodic_boundary = true;
744 handlePeriodicBoundaryHit(prd, hit_uuid);
745 return;
746 }
747
748 const uint32_t origin_position = params.primitive_positions[prd->origin_UUID];
749 const uint32_t hit_position = params.primitive_positions[hit_uuid];
750 if (origin_position == UINT_MAX || hit_position == UINT_MAX) return;
751
752 const uint32_t Nprims = params.Nprimitives;
753 const uint32_t Nbands_global = params.Nbands_global;
754 const uint32_t Nbands_launch = params.Nbands_launch;
755
756 int b = -1;
757 for (uint32_t b_global = 0; b_global < Nbands_global; b_global++) {
758 if (!params.band_launch_flag[b_global]) continue;
759 b++;
760
761 const uint32_t ind_origin = origin_position * Nbands_launch + (uint32_t)b;
762 const uint32_t ind_hit = hit_position * Nbands_launch + (uint32_t)b;
763
764 const double strength = face_top ? params.radiation_out_top[ind_hit] * prd->strength
765 : params.radiation_out_bottom[ind_hit] * prd->strength;
766 if (strength == 0.0) continue;
767
768 const uint32_t radprop_ind = prd->source_ID * Nprims * Nbands_global
769 + origin_position * Nbands_global + b_global;
770 const float t_rho = params.rho[radprop_ind];
771 const float t_tau = params.tau[radprop_ind];
772
773 atomicFloatAdd(&params.radiation_in[ind_origin], (float)(strength * (1.0 - t_rho - t_tau)));
774
775 if (t_rho > 0.f || t_tau > 0.f) {
776 if (prd->face) { // top-face origin
777 atomicFloatAdd(&params.scatter_buff_top[ind_origin], (float)(strength * t_rho));
778 atomicFloatAdd(&params.scatter_buff_bottom[ind_origin], (float)(strength * t_tau));
779 } else { // bottom-face origin
780 atomicFloatAdd(&params.scatter_buff_bottom[ind_origin], (float)(strength * t_rho));
781 atomicFloatAdd(&params.scatter_buff_top[ind_origin], (float)(strength * t_tau));
782 }
783 }
784
785 // Camera-weighted scatter: mirrors scatter_buff but uses rho_cam/tau_cam
786 if (params.Ncameras > 0 && params.rho_cam && params.scatter_buff_top_cam) {
787 const uint32_t Ncameras = params.Ncameras;
788 const uint32_t cam_id = params.camera_ID;
789 const uint32_t rc_idx = prd->source_ID * Nprims * Nbands_global * Ncameras
790 + origin_position * Nbands_global * Ncameras
791 + b_global * Ncameras + cam_id;
792 const float t_rho_cam = params.rho_cam[rc_idx];
793 const float t_tau_cam = params.tau_cam ? params.tau_cam[rc_idx] : 0.f;
794 if ((t_rho_cam > 0.f || t_tau_cam > 0.f) && strength > 0.0) {
795 if (prd->face) {
796 atomicFloatAdd(&params.scatter_buff_top_cam[ind_origin], (float)(strength * t_rho_cam));
797 atomicFloatAdd(&params.scatter_buff_bottom_cam[ind_origin], (float)(strength * t_tau_cam));
798 } else {
799 atomicFloatAdd(&params.scatter_buff_bottom_cam[ind_origin], (float)(strength * t_rho_cam));
800 atomicFloatAdd(&params.scatter_buff_top_cam[ind_origin], (float)(strength * t_tau_cam));
801 }
802 }
803 }
804 }
805}
806
807// ---------------------------------------------------------------------------
808// Closest-hit: camera
809// ---------------------------------------------------------------------------
810
811extern "C" __global__ void __closesthit__camera() {
812 const uint32_t hit_uuid = optixGetAttribute_0();
813 PerRayData *prd = getPayloadPRD();
814 const uint32_t hit_position = params.primitive_positions[hit_uuid];
815
816 if (hit_position == 0xFFFFFFFFu) return; // invalid primitive
817
818 // Periodic boundary: treat as transparent wall and re-launch
819 if ((params.periodic_flag.x != 0.f || params.periodic_flag.y != 0.f) &&
820 params.primitive_type[hit_position] == 5) {
821 handlePeriodicBoundaryHit(prd, hit_uuid);
822 prd->hit_periodic_boundary = true;
823 return;
824 }
825
826 const uint32_t pixel_index = prd->origin_UUID;
827 const uint32_t Nbands_l = params.Nbands_launch;
828 const uint32_t Nprims = params.Nprimitives;
829 const float t_hit = optixGetRayTmax();
830 const float3 ray_origin = optixGetWorldRayOrigin();
831 const float3 ray_direction = optixGetWorldRayDirection();
832
833 // Use face attribute from intersection program (correct for both patches and triangles)
834 const bool face_top = (optixGetAttribute_1() == 1u);
835
836 // Compute surface normal for specular reflection (needed for Blinn-Phong half-vector)
837 float T[16];
838 loadTransformMatrix(hit_position, T);
839 float3 n0 = make_float3(0.f, 0.f, 0.f); d_transformPoint(T, n0);
840 float3 n1 = make_float3(1.f, 0.f, 0.f); d_transformPoint(T, n1);
841 float3 n2 = make_float3(0.f, 1.f, 0.f); d_transformPoint(T, n2);
842 float3 normal = normalize(cross(n1 - n0, n2 - n0));
843 // Ensure normal points toward the camera (consistent with face_top)
844 if (face_top != (dot(normal, ray_direction) < 0.f)) {
845 normal = make_float3(-normal.x, -normal.y, -normal.z);
846 }
847
848 for (uint32_t b = 0; b < Nbands_l; b++) {
849 // Radiance from hit surface (outgoing flux / pi = radiance)
850 const uint32_t ind_hit = hit_position * Nbands_l + b;
851 float strength = (float)prd->strength *
852 (float)(face_top ? params.radiation_out_top[ind_hit]
853 : params.radiation_out_bottom[ind_hit]);
854
855 // Check sources visible between camera origin and hit point
856 for (uint32_t s = 0; s < params.Nsources; s++) {
857 const float flux = params.source_fluxes[s * Nbands_l + b];
858 if (flux <= 0.0f) continue;
859
860 const uint32_t stype = params.source_types[s];
861 float source_radiance = 0.0f;
862
863 if (stype == 1) {
864 // Sphere
865 const float radius = params.source_widths[s].x * 0.5f;
866 const float3 oc = make_float3(ray_origin.x - params.source_positions[s].x,
867 ray_origin.y - params.source_positions[s].y,
868 ray_origin.z - params.source_positions[s].z);
869 const float bd = dot(oc, ray_direction);
870 const float cd = dot(oc, oc) - radius * radius;
871 const float disc = bd * bd - cd;
872 if (disc >= 0.0f) {
873 const float t_sphere = -bd - sqrtf(disc);
874 if (t_sphere > 0.0f && t_sphere < t_hit) {
875 const float area = 4.0f * M_PI * radius * radius;
876 source_radiance = (flux / area) / M_PI;
877 }
878 }
879 } else if (stype == 3) {
880 // Rectangle
881 float trans[16];
882 d_makeTransformMatrix(params.source_rotations[s], trans);
883 const float3 snormal = make_float3(trans[2], trans[6], trans[10]);
884 const float denom = dot(ray_direction, snormal);
885 if (denom < -1e-6f) {
886 const float3 oc = make_float3(params.source_positions[s].x - ray_origin.x,
887 params.source_positions[s].y - ray_origin.y,
888 params.source_positions[s].z - ray_origin.z);
889 const float t_r = dot(oc, snormal) / denom;
890 if (t_r > 0.0f && t_r < t_hit) {
891 float3 hp = make_float3(ray_origin.x + t_r * ray_direction.x - params.source_positions[s].x,
892 ray_origin.y + t_r * ray_direction.y - params.source_positions[s].y,
893 ray_origin.z + t_r * ray_direction.z - params.source_positions[s].z);
894 float inv_t[16];
895 d_invertMatrix(trans, inv_t);
896 d_transformPoint(inv_t, hp);
897 if (fabsf(hp.x) <= params.source_widths[s].x * 0.5f &&
898 fabsf(hp.y) <= params.source_widths[s].y * 0.5f) {
899 const float area = params.source_widths[s].x * params.source_widths[s].y;
900 source_radiance = (flux / area) * (-denom) / M_PI;
901 }
902 }
903 }
904 } else if (stype == 4) {
905 // Disk
906 float trans[16];
907 d_makeTransformMatrix(params.source_rotations[s], trans);
908 const float3 snormal = make_float3(trans[2], trans[6], trans[10]);
909 const float denom = dot(ray_direction, snormal);
910 if (denom < -1e-6f) {
911 const float3 oc = make_float3(params.source_positions[s].x - ray_origin.x,
912 params.source_positions[s].y - ray_origin.y,
913 params.source_positions[s].z - ray_origin.z);
914 const float t_d = dot(oc, snormal) / denom;
915 if (t_d > 0.0f && t_d < t_hit) {
916 const float3 hp = make_float3(ray_origin.x + t_d * ray_direction.x - params.source_positions[s].x,
917 ray_origin.y + t_d * ray_direction.y - params.source_positions[s].y,
918 ray_origin.z + t_d * ray_direction.z - params.source_positions[s].z);
919 const float radius = params.source_widths[s].x;
920 if (dot(hp, hp) <= radius * radius) {
921 const float area = M_PI * radius * radius;
922 source_radiance = (flux / area) * (-denom) / M_PI;
923 }
924 }
925 }
926 }
927
928 if (source_radiance > 0.0f) {
929 strength += source_radiance * (float)prd->strength;
930 }
931 }
932
933 // Specular contribution (only if enabled and on iteration 0)
934 float strength_spec = 0.0f;
935 if (params.specular_reflection_enabled > 0 &&
936 params.specular_exponent && params.specular_exponent[hit_position] > 0.f &&
937 params.scattering_iteration == 0 &&
938 params.radiation_specular) {
939 for (uint32_t rr = 0; rr < params.Nsources; rr++) {
940 const uint32_t ind_spec = rr * params.Ncameras * Nprims * Nbands_l
941 + params.camera_ID * Nprims * Nbands_l
942 + hit_position * Nbands_l + b;
943 const float spec = params.radiation_specular[ind_spec] * 0.25f;
944 if (spec > 0.0f) {
945 float3 light_dir;
946 if (params.source_types[rr] == 0 || params.source_types[rr] == 2) {
947 light_dir = normalize(params.source_positions[rr]);
948 } else {
949 const float3 hp = make_float3(ray_origin.x + t_hit * ray_direction.x,
950 ray_origin.y + t_hit * ray_direction.y,
951 ray_origin.z + t_hit * ray_direction.z);
952 light_dir = normalize(make_float3(params.source_positions[rr].x - hp.x,
953 params.source_positions[rr].y - hp.y,
954 params.source_positions[rr].z - hp.z));
955 }
956 const float3 spec_dir = normalize(light_dir - ray_direction);
957 const float exponent = params.specular_exponent[hit_position];
958 float scale_coeff = 1.0f;
959 if (params.specular_reflection_enabled == 2 && params.specular_scale) {
960 scale_coeff = params.specular_scale[hit_position];
961 }
962 const float cos_spec = fmaxf(0.f, dot(spec_dir, normal));
963 strength_spec += spec * scale_coeff
964 * powf(cos_spec, exponent) * (exponent + 2.f)
965 / ((float)params.launch_dim_x * 2.f * M_PI);
966 }
967 }
968 }
969
970 // Accumulate into camera radiation buffer: [pixel][band]
971 atomicFloatAdd(&params.radiation_in_camera[pixel_index * Nbands_l + b],
972 (strength + strength_spec) / M_PI);
973 }
974}
975
976// ---------------------------------------------------------------------------
977// Closest-hit: pixel label
978// ---------------------------------------------------------------------------
979
980extern "C" __global__ void __closesthit__pixel_label() {
981 const uint32_t hit_uuid = optixGetAttribute_0();
982 PerRayData *prd = getPayloadPRD();
983 const uint32_t origin_UUID = prd->origin_UUID;
984 const uint32_t hit_position = params.primitive_positions[hit_uuid];
985
986 // Periodic boundary: treat as transparent wall and re-launch
987 if ((params.periodic_flag.x != 0.f || params.periodic_flag.y != 0.f) &&
988 hit_position != 0xFFFFFFFFu && params.primitive_type[hit_position] == 5) {
989 handlePeriodicBoundaryHit(prd, hit_uuid);
990 prd->hit_periodic_boundary = true;
991 return;
992 }
993
994 // Store UUID+1 (0 is reserved for sky/miss)
995 if (params.camera_pixel_label) {
996 params.camera_pixel_label[origin_UUID] = hit_uuid + 1u;
997 }
998
999 // Depth: project ray parameter along camera view direction
1000 if (params.camera_pixel_depth) {
1001 const float t_hit = optixGetRayTmax() + (float)prd->strength; // strength=0 for pixel label
1002 const float3 ray_dir = optixGetWorldRayDirection();
1003 const float3 cam_dir = d_rotatePoint(make_float3(1.f, 0.f, 0.f),
1004 -0.5f * M_PI + params.camera_direction.x,
1005 0.5f * M_PI - params.camera_direction.y);
1006 params.camera_pixel_depth[origin_UUID] = fabsf(dot(cam_dir, ray_dir)) * t_hit;
1007 }
1008}
1009
1010// ---------------------------------------------------------------------------
1011// Raygen: direct rays
1012// ---------------------------------------------------------------------------
1013
1014extern "C" __global__ void __raygen__direct() {
1015 // 3D launch: x = x-strat index [0, dim_x), y = y-strat index [0, dim_y), z = prim
1016 const uint3 idx = optixGetLaunchIndex();
1017 const uint32_t xi = idx.x;
1018 const uint32_t yi = idx.y;
1019 const uint32_t prim_local = idx.z;
1020
1021 const uint32_t dim_x = params.launch_dim_x;
1022 const uint32_t dim_y = params.launch_dim_y;
1023 const uint32_t Nrays = dim_x * dim_y;
1024 const uint32_t prim_pos = params.launch_offset + prim_local;
1025
1026 if (prim_pos >= params.Nprimitives) return;
1027
1028 const uint32_t ptype = params.primitive_type[prim_pos];
1029 const int32_t NX = params.object_subdivisions[prim_pos * 2];
1030 const int32_t NY = params.object_subdivisions[prim_pos * 2 + 1];
1031
1032 float T[16];
1033 loadTransformMatrix(prim_pos, T);
1034
1035 const uint32_t linear_idx = xi + dim_x * yi;
1036 uint32_t seed = tea<16>(linear_idx + Nrays * prim_local, params.random_seed);
1037
1038 for (int jj = 0; jj < NY; jj++) {
1039 for (int ii = 0; ii < NX; ii++) {
1040
1041 // UUID for this sub-patch
1042 const uint32_t UUID = params.primitiveID[prim_pos] + (uint32_t)(jj * NX + ii);
1043
1044 const float Rx = rnd(seed);
1045 const float Ry = rnd(seed);
1046
1047 float3 sp;
1048 float3 normal;
1049
1050 if (ptype == 0 || ptype == 3) { // Patch or Tile: canonical space [-0.5, 0.5]^2
1051 const float dx = 1.0f / float(NX);
1052 const float dy = 1.0f / float(NY);
1053 sp.x = -0.5f + ii * dx + (float(xi) + Rx) * dx / float(dim_x);
1054 sp.y = -0.5f + jj * dy + (float(yi) + Ry) * dy / float(dim_y);
1055 sp.z = 0.f;
1056
1057 // Origin mask rejection sampling: only launch from opaque regions (matches OptiX 6 raygen behavior)
1058 const int32_t msk_orig = params.mask_IDs[prim_pos];
1059 if (msk_orig >= 0) {
1060 for (int attempt = 0; attempt < 10; attempt++) {
1061 float uv_u, uv_v;
1062 if (params.uv_IDs[prim_pos] >= 0) {
1063 float2 uv0 = params.uv_data[prim_pos * 4 + 0];
1064 float2 uv1 = params.uv_data[prim_pos * 4 + 1];
1065 float2 uv2 = params.uv_data[prim_pos * 4 + 2];
1066 uv_u = uv0.x + (sp.x + 0.5f) * (uv1.x - uv0.x);
1067 uv_v = uv0.y + (sp.y + 0.5f) * (uv2.y - uv0.y);
1068 } else {
1069 uv_u = sp.x + 0.5f;
1070 uv_v = sp.y + 0.5f;
1071 }
1072 if (sampleMask(msk_orig, uv_u, uv_v)) break;
1073 sp.x = -0.5f + (ii + rnd(seed)) * dx;
1074 sp.y = -0.5f + (jj + rnd(seed)) * dy;
1075 }
1076 }
1077
1078 float3 v0 = make_float3(0.f, 0.f, 0.f); d_transformPoint(T, v0);
1079 float3 v1 = make_float3(1.f, 0.f, 0.f); d_transformPoint(T, v1);
1080 float3 v2 = make_float3(0.f, 1.f, 0.f); d_transformPoint(T, v2);
1081 normal = normalize(cross(v1 - v0, v2 - v0));
1082
1083 } else if (ptype == 1) { // Triangle: canonical space (0,0,0)-(0,1,0)-(1,1,0)
1084 if (Rx < Ry) { sp.x = Rx; sp.y = Ry; }
1085 else { sp.x = Ry; sp.y = Rx; }
1086 sp.z = 0.f;
1087
1088 // Origin mask rejection sampling
1089 const int32_t msk_orig_t = params.mask_IDs[prim_pos];
1090 if (msk_orig_t >= 0) {
1091 for (int attempt = 0; attempt < 10; attempt++) {
1092 float uv_u, uv_v;
1093 if (params.uv_IDs[prim_pos] >= 0) {
1094 float2 uv0 = params.uv_data[prim_pos * 4 + 0];
1095 float2 uv1 = params.uv_data[prim_pos * 4 + 1];
1096 float2 uv2 = params.uv_data[prim_pos * 4 + 2];
1097 const float beta = sp.y - sp.x; // weight at v1
1098 const float gamma = sp.x; // weight at v2
1099 float2 uv = make_float2(
1100 uv0.x + beta * (uv1.x - uv0.x) + gamma * (uv2.x - uv0.x),
1101 uv0.y + beta * (uv1.y - uv0.y) + gamma * (uv2.y - uv0.y));
1102 uv_u = uv.x;
1103 uv_v = 1.f - uv.y; // Y-flip (matches intersection convention)
1104 } else {
1105 uv_u = sp.y; // = beta + gamma
1106 uv_v = sp.x; // = gamma
1107 }
1108 if (sampleMask(msk_orig_t, uv_u, uv_v)) break;
1109 float Rx2 = rnd(seed), Ry2 = rnd(seed);
1110 if (Rx2 < Ry2) { sp.x = Rx2; sp.y = Ry2; }
1111 else { sp.x = Ry2; sp.y = Rx2; }
1112 }
1113 }
1114
1115 float3 v0 = make_float3(0.f, 0.f, 0.f); d_transformPoint(T, v0);
1116 float3 v1 = make_float3(0.f, 1.f, 0.f); d_transformPoint(T, v1);
1117 float3 v2 = make_float3(1.f, 1.f, 0.f); d_transformPoint(T, v2);
1118 normal = normalize(cross(v1 - v0, v2 - v0));
1119
1120 } else {
1121 // Unsupported primitive type — should have been caught by updateGeometry().
1122 printf("ERROR (OptiX8 __raygen__direct): unsupported primitive type %u at index %u\n",
1123 ptype, prim_pos);
1124 __trap();
1125 }
1126
1127 // Transform sample point to world space
1128 float3 ray_origin = sp;
1129 d_transformPoint(T, ray_origin);
1130
1131 // Send a ray toward each source
1132 for (uint32_t rr = 0; rr < params.Nsources; rr++) {
1133
1134 const uint32_t src_type = params.source_types[rr];
1135
1136 float3 ray_direction;
1137 float ray_tmax;
1138 double strength;
1139
1140 if (src_type == 0) { // Collimated source
1141 ray_direction = normalize(params.source_positions[rr]);
1142 ray_tmax = 1e38f;
1143 strength = (1.0 / double(dim_x * dim_y)) * (double)fabsf(dot(normal, ray_direction));
1144
1145 } else if (src_type == 1 || src_type == 2) { // Sphere source (type 1 = point, type 2 = sphere)
1146 float theta_s = acos_safe(1.f - 2.f * rnd(seed));
1147 float phi_s = rnd(seed) * 2.f * M_PI;
1148 float3 sphere_pt = make_float3(0.5f * params.source_widths[rr].x * sinf(theta_s) * cosf(phi_s),
1149 0.5f * params.source_widths[rr].x * sinf(theta_s) * sinf(phi_s),
1150 0.5f * params.source_widths[rr].x * cosf(theta_s));
1151 ray_direction = sphere_pt + params.source_positions[rr] - ray_origin;
1152 ray_tmax = d_magnitude(ray_direction);
1153 ray_direction = normalize(ray_direction);
1154
1155 // Integrate over sphere surface for strength
1156 strength = 0.0;
1157 const uint32_t N = 10;
1158 for (uint32_t j = 0; j < N; j++) {
1159 for (uint32_t i = 0; i < N; i++) {
1160 float theta = acos_safe(1.f - 2.f * (float(i) + 0.5f) / float(N));
1161 float phi = (float(j) + 0.5f) * 2.f * M_PI / float(N);
1162 float3 ldir = make_float3(sinf(theta)*cosf(phi), sinf(theta)*sinf(phi), cosf(theta));
1163 if (dot(ldir, ray_direction) < 0.f) {
1164 strength += (1.0 / double(dim_x * dim_y)) * (double)fabsf(dot(normal, ray_direction))
1165 * (double)fabsf(dot(ldir, ray_direction))
1166 / ((double)ray_tmax * (double)ray_tmax)
1167 / double(N * N)
1168 * (double)params.source_widths[rr].x * (double)params.source_widths[rr].x;
1169 }
1170 }
1171 }
1172
1173 } else if (src_type == 3) { // Rectangle source
1174 float light_transform[16];
1175 float3 rot3 = params.source_rotations[rr];
1176 d_makeTransformMatrix(rot3, light_transform);
1177
1178 float3 square_pt;
1179 d_sampleSquare(seed, square_pt);
1180 square_pt = make_float3(params.source_widths[rr].x * square_pt.x, params.source_widths[rr].y * square_pt.y, square_pt.z);
1181 d_transformPoint(light_transform, square_pt);
1182
1183 float3 light_dir = make_float3(0.f, 0.f, 1.f);
1184 d_transformPoint(light_transform, light_dir);
1185
1186 ray_direction = square_pt + params.source_positions[rr] - ray_origin;
1187 if (dot(ray_direction, light_dir) > 0.f) continue; // don't emit from back of source
1188
1189 ray_tmax = d_magnitude(ray_direction);
1190 ray_direction = normalize(ray_direction);
1191 strength = (1.0 / double(dim_x * dim_y))
1192 * (double)fabsf(dot(normal, ray_direction))
1193 * (double)fabsf(dot(light_dir, ray_direction))
1194 / ((double)ray_tmax * (double)ray_tmax)
1195 * (double)params.source_widths[rr].x * (double)params.source_widths[rr].y
1196 / M_PI;
1197
1198 } else if (src_type == 4) { // Disk source
1199 float light_transform[16];
1200 float3 rot3 = params.source_rotations[rr];
1201 d_makeTransformMatrix(rot3, light_transform);
1202
1203 float3 disk_pt;
1204 d_sampleDisk(seed, disk_pt);
1205 d_transformPoint(light_transform, disk_pt);
1206
1207 float3 light_dir = make_float3(0.f, 0.f, 1.f);
1208 d_transformPoint(light_transform, light_dir);
1209
1210 ray_direction = params.source_widths[rr].x * disk_pt + params.source_positions[rr] - ray_origin;
1211 if (dot(ray_direction, light_dir) > 0.f) continue; // don't emit from back of source
1212
1213 ray_tmax = d_magnitude(ray_direction);
1214 ray_direction = normalize(ray_direction);
1215 strength = (1.0 / double(dim_x * dim_y))
1216 * (double)fabsf(dot(normal, ray_direction))
1217 * (double)fabsf(dot(light_dir, ray_direction))
1218 / ((double)ray_tmax * (double)ray_tmax)
1219 * (double)params.source_widths[rr].x * (double)params.source_widths[rr].x;
1220
1221 } else {
1222 continue; // Unknown source type
1223 }
1224
1225 PerRayData prd;
1226 prd.seed = seed;
1227 prd.origin_UUID = UUID;
1228 prd.source_ID = (unsigned char)rr;
1229 prd.hit_periodic_boundary = false;
1230 prd.face = (dot(ray_direction, normal) > 0.f);
1231
1232 // Strength set above per source type
1233 prd.strength = strength;
1234
1235 // Only fire from the face pointing toward source (or two-sided)
1236 const int8_t tsf = params.twosided_flag[prim_pos];
1237 if (!prd.face && tsf == 0) continue;
1238 if (tsf == 3) continue; // reserved flag — skip
1239
1240 uint32_t u0, u1;
1241 float3 current_origin = ray_origin;
1242
1243 for (int wrap = 0; wrap < 10; ++wrap) {
1244 packPointer(&prd, u0, u1);
1245 optixTrace(
1246 params.traversable,
1247 current_origin,
1248 ray_direction,
1249 1e-4f, // tmin
1250 ray_tmax, // tmax
1251 0.f, // time
1252 OptixVisibilityMask(255),
1253 OPTIX_RAY_FLAG_NONE,
1254 0, // SBT offset (direct hit group = 0)
1255 0, // SBT stride
1256 0, // miss SBT index (direct miss = 0)
1257 u0, u1
1258 );
1259 if (!prd.hit_periodic_boundary) break;
1260 current_origin = prd.periodic_hit;
1261 prd.hit_periodic_boundary = false;
1262 }
1263
1264 seed = prd.seed;
1265 }
1266 }
1267 }
1268}
1269
1270// ---------------------------------------------------------------------------
1271// Raygen: diffuse rays
1272// ---------------------------------------------------------------------------
1273
1274extern "C" __global__ void __raygen__diffuse() {
1275 // 3D launch: x=theta_idx, y=phi_idx, z=prim_local
1276 const uint3 idx = optixGetLaunchIndex();
1277 const uint32_t theta_idx = idx.x;
1278 const uint32_t phi_idx = idx.y;
1279 const uint32_t prim_local = idx.z;
1280
1281 const uint32_t dim_x = params.launch_dim_x;
1282 const uint32_t dim_y = params.launch_dim_y;
1283 const uint32_t dimxy = dim_x * dim_y;
1284
1285 const uint32_t prim_pos = params.launch_offset + prim_local;
1286 if (prim_pos >= params.Nprimitives) return;
1287
1288 // Skip bottom-face launch for one-sided primitives
1289 if (params.launch_face == 0 && params.twosided_flag[prim_pos] == 0) return;
1290
1291 const uint32_t ptype = params.primitive_type[prim_pos];
1292 const int32_t NX = params.object_subdivisions[prim_pos * 2];
1293 const int32_t NY = params.object_subdivisions[prim_pos * 2 + 1];
1294
1295 float T[16];
1296 loadTransformMatrix(prim_pos, T);
1297
1298 // Seed once per ray index
1299 const uint32_t linear_idx = theta_idx + dim_x * phi_idx;
1300 uint32_t seed = tea<16>(linear_idx + dimxy * prim_local, params.random_seed);
1301
1302 // Stratified cosine-weighted hemisphere sampling
1303 const float Rt = (theta_idx + rnd(seed)) / float(dim_x);
1304 const float Rp = (phi_idx + rnd(seed)) / float(dim_y);
1305 const float t = asin_safe(sqrtf(Rt));
1306 const float p = 2.f * M_PI * Rp;
1307 float3 ray_dir_canonical;
1308 ray_dir_canonical.x = sinf(t) * cosf(p);
1309 ray_dir_canonical.y = sinf(t) * sinf(p);
1310 ray_dir_canonical.z = cosf(t);
1311
1312 for (int jj = 0; jj < NY; jj++) {
1313 for (int ii = 0; ii < NX; ii++) {
1314
1315 const uint32_t UUID = params.primitiveID[prim_pos] + (uint32_t)(jj * NX + ii);
1316
1317 const float Rx = rnd(seed);
1318 const float Ry = rnd(seed);
1319
1320 float3 sp;
1321 float3 normal;
1322
1323 if (ptype == 0 || ptype == 3) { // Patch or Tile
1324 const float dx = 1.f / float(NX);
1325 const float dy = 1.f / float(NY);
1326 sp.x = -0.5f + (ii + Rx) * dx;
1327 sp.y = -0.5f + (jj + Ry) * dy;
1328 sp.z = 0.f;
1329
1330 // Origin mask rejection sampling
1331 const int32_t msk_orig_d = params.mask_IDs[prim_pos];
1332 if (msk_orig_d >= 0) {
1333 for (int attempt = 0; attempt < 10; attempt++) {
1334 float uv_u, uv_v;
1335 if (params.uv_IDs[prim_pos] >= 0) {
1336 float2 uv0 = params.uv_data[prim_pos * 4 + 0];
1337 float2 uv1 = params.uv_data[prim_pos * 4 + 1];
1338 float2 uv2 = params.uv_data[prim_pos * 4 + 2];
1339 uv_u = uv0.x + (sp.x + 0.5f) * (uv1.x - uv0.x);
1340 uv_v = uv0.y + (sp.y + 0.5f) * (uv2.y - uv0.y);
1341 } else {
1342 uv_u = sp.x + 0.5f;
1343 uv_v = sp.y + 0.5f;
1344 }
1345 if (sampleMask(msk_orig_d, uv_u, uv_v)) break;
1346 sp.x = -0.5f + (ii + rnd(seed)) * dx;
1347 sp.y = -0.5f + (jj + rnd(seed)) * dy;
1348 }
1349 }
1350
1351 float3 v0 = make_float3(0.f, 0.f, 0.f); d_transformPoint(T, v0);
1352 float3 v1 = make_float3(1.f, 0.f, 0.f); d_transformPoint(T, v1);
1353 float3 v2 = make_float3(0.f, 1.f, 0.f); d_transformPoint(T, v2);
1354 normal = normalize(cross(v1 - v0, v2 - v0));
1355
1356 } else if (ptype == 1) { // Triangle
1357 if (Rx < Ry) { sp.x = Rx; sp.y = Ry; }
1358 else { sp.x = Ry; sp.y = Rx; }
1359 sp.z = 0.f;
1360
1361 // Origin mask rejection sampling
1362 const int32_t msk_orig_dt = params.mask_IDs[prim_pos];
1363 if (msk_orig_dt >= 0) {
1364 for (int attempt = 0; attempt < 10; attempt++) {
1365 float uv_u, uv_v;
1366 if (params.uv_IDs[prim_pos] >= 0) {
1367 float2 uv0 = params.uv_data[prim_pos * 4 + 0];
1368 float2 uv1 = params.uv_data[prim_pos * 4 + 1];
1369 float2 uv2 = params.uv_data[prim_pos * 4 + 2];
1370 const float beta = sp.y - sp.x;
1371 const float gamma = sp.x;
1372 float2 uv = make_float2(
1373 uv0.x + beta * (uv1.x - uv0.x) + gamma * (uv2.x - uv0.x),
1374 uv0.y + beta * (uv1.y - uv0.y) + gamma * (uv2.y - uv0.y));
1375 uv_u = uv.x;
1376 uv_v = 1.f - uv.y;
1377 } else {
1378 uv_u = sp.y;
1379 uv_v = sp.x;
1380 }
1381 if (sampleMask(msk_orig_dt, uv_u, uv_v)) break;
1382 float Rx2 = rnd(seed), Ry2 = rnd(seed);
1383 if (Rx2 < Ry2) { sp.x = Rx2; sp.y = Ry2; }
1384 else { sp.x = Ry2; sp.y = Rx2; }
1385 }
1386 }
1387
1388 float3 v0 = make_float3(0.f, 0.f, 0.f); d_transformPoint(T, v0);
1389 float3 v1 = make_float3(0.f, 1.f, 0.f); d_transformPoint(T, v1);
1390 float3 v2 = make_float3(1.f, 1.f, 0.f); d_transformPoint(T, v2);
1391 normal = normalize(cross(v1 - v0, v2 - v0));
1392
1393 } else {
1394 // Unsupported primitive type — should have been caught by updateGeometry().
1395 printf("ERROR (OptiX8 __raygen__diffuse): unsupported primitive type %u at index %u\n",
1396 ptype, prim_pos);
1397 __trap();
1398 }
1399
1400 // Rotate hemisphere direction by primitive normal orientation
1401 float3 ray_dir = d_rotatePoint(ray_dir_canonical,
1402 acos_safe(normal.z),
1403 atan2f(normal.y, normal.x));
1404
1405 // Transform origin point to world space
1406 float3 ray_origin = sp;
1407 d_transformPoint(T, ray_origin);
1408
1409 PerRayData prd;
1410 prd.seed = seed;
1411 prd.origin_UUID = UUID;
1412 prd.source_ID = 0;
1413 prd.hit_periodic_boundary = false;
1414 prd.strength = 1.0 / double(dimxy);
1415
1416 uint32_t u0, u1;
1417
1418 if (params.launch_face == 1 && params.twosided_flag[prim_pos] != 3) {
1419 prd.face = true;
1420 float3 cur_origin = ray_origin;
1421 for (int wrap = 0; wrap < 10; ++wrap) {
1422 packPointer(&prd, u0, u1);
1423 optixTrace(
1424 params.traversable,
1425 cur_origin, ray_dir,
1426 1e-4f, 1e38f, 0.f,
1427 OptixVisibilityMask(255),
1428 OPTIX_RAY_FLAG_NONE,
1429 1, 0, 1, // SBT offset=1 (diffuse hit), stride=0, miss index=1 (diffuse miss)
1430 u0, u1
1431 );
1432 if (!prd.hit_periodic_boundary) break;
1433 cur_origin = prd.periodic_hit;
1434 prd.hit_periodic_boundary = false;
1435 }
1436 } else if (params.launch_face == 0 && params.twosided_flag[prim_pos] == 1) {
1437 prd.face = false;
1438 float3 neg_dir = make_float3(-ray_dir.x, -ray_dir.y, -ray_dir.z);
1439 float3 cur_origin = ray_origin;
1440 for (int wrap = 0; wrap < 10; ++wrap) {
1441 packPointer(&prd, u0, u1);
1442 optixTrace(
1443 params.traversable,
1444 cur_origin, neg_dir,
1445 1e-4f, 1e38f, 0.f,
1446 OptixVisibilityMask(255),
1447 OPTIX_RAY_FLAG_NONE,
1448 1, 0, 1,
1449 u0, u1
1450 );
1451 if (!prd.hit_periodic_boundary) break;
1452 cur_origin = prd.periodic_hit;
1453 prd.hit_periodic_boundary = false;
1454 }
1455 }
1456
1457 seed = prd.seed;
1458 }
1459 }
1460}
1461
1462// ---------------------------------------------------------------------------
1463// Raygen: camera rays
1464// 3D launch: x=ray_within_pixel [0,anti_samples), y=tile_column, z=tile_row
1465// ---------------------------------------------------------------------------
1466
1467extern "C" __global__ void __raygen__camera() {
1468 const uint3 idx = optixGetLaunchIndex();
1469 const uint32_t ray_idx = idx.x; // sample index within pixel
1470 const uint32_t col = idx.y; // tile column
1471 const uint32_t row = idx.z; // tile row
1472
1473 const uint32_t dim_x = params.launch_dim_x; // antialiasing_samples
1474 const uint32_t dim_y = params.launch_dim_y; // tile_width
1475
1476 // Global pixel coordinates
1477 const uint32_t ii = (uint32_t)params.camera_pixel_offset.x + col;
1478 const uint32_t jj = (uint32_t)params.camera_pixel_offset.y + row;
1479
1480 // Linear pixel index in the full image
1481 const uint32_t pixel_index = jj * (uint32_t)params.camera_resolution_full.x + ii;
1482
1483 // Seed: unique per (ray_idx, col, row)
1484 const uint32_t linear_idx = dim_x * col + ray_idx;
1485 uint32_t seed = tea<16>(linear_idx + dim_x * dim_y * row, params.random_seed);
1486
1487 const float Rx = rnd(seed);
1488 const float Ry = rnd(seed);
1489
1490 // Map sub-pixel sample to view-space point on viewplane
1491 // sp.x = viewplane distance, sp.y/sp.z = horizontal/vertical offsets
1492 const float multiplier = 1.0f / params.FOV_aspect_ratio;
1493 float3 sp;
1494 sp.y = -0.5f + ((float)ii + Rx) / (float)params.camera_resolution_full.x;
1495 sp.z = ( 0.5f - ((float)jj + Ry) / (float)params.camera_resolution_full.y) * multiplier;
1496 sp.x = params.camera_viewplane_length;
1497
1498 // Focal point on focus plane
1499 const float3 p = make_float3(
1500 params.camera_focal_length,
1501 sp.y / params.camera_viewplane_length * params.camera_focal_length,
1502 sp.z / params.camera_viewplane_length * params.camera_focal_length);
1503
1504 // Sample lens (pinhole if lens_diameter == 0)
1505 float3 ray_origin = make_float3(0.f, 0.f, 0.f);
1506 if (params.camera_lens_diameter > 0.f) {
1507 float3 disk_sample;
1508 d_sampleDisk(seed, disk_sample);
1509 ray_origin = make_float3(0.f, 0.5f * disk_sample.x * params.camera_lens_diameter,
1510 0.5f * disk_sample.y * params.camera_lens_diameter);
1511 }
1512
1513 float3 ray_direction = make_float3(p.x - ray_origin.x, p.y - ray_origin.y, p.z - ray_origin.z);
1514
1515 // Rotate into world space
1516 const float theta = -0.5f * M_PI + params.camera_direction.x;
1517 const float phi = 0.5f * M_PI - params.camera_direction.y;
1518 ray_origin = d_rotatePoint(ray_origin, theta, phi) + params.camera_position;
1519 ray_direction = d_rotatePoint(ray_direction, theta, phi);
1520 ray_direction = ray_direction * (1.0f / d_magnitude(ray_direction));
1521
1522 PerRayData prd;
1523 prd.strength = 1.0f / (float)dim_x;
1524 prd.origin_UUID = pixel_index;
1525 prd.face = true;
1526 prd.source_ID = 0;
1527 prd.seed = seed;
1528 prd.hit_periodic_boundary = false;
1529
1530 uint32_t p0, p1;
1531 packPointer(&prd, p0, p1);
1532
1533 const float t_min = 1e-5f;
1534 const float t_max = 1e30f;
1535
1536 float3 cur_origin = ray_origin;
1537 for (int wrap = 0; wrap < 10; wrap++) {
1538 prd.hit_periodic_boundary = false;
1539 optixTrace(params.traversable, cur_origin, ray_direction,
1540 t_min, t_max, 0.f,
1541 OptixVisibilityMask(255), OPTIX_RAY_FLAG_NONE,
1542 2u, 0u, 2u, // SBT: offset=2 (camera hit), stride=0, miss=2
1543 p0, p1);
1544 if (!prd.hit_periodic_boundary) break;
1545 cur_origin = prd.periodic_hit;
1546 }
1547}
1548
1549// ---------------------------------------------------------------------------
1550// Raygen: pixel label rays
1551// 3D launch: x=1 (always), y=tile_column, z=tile_row
1552// ---------------------------------------------------------------------------
1553
1554extern "C" __global__ void __raygen__pixel_label() {
1555 const uint3 idx = optixGetLaunchIndex();
1556 const uint32_t col = idx.y;
1557 const uint32_t row = idx.z;
1558
1559 const uint32_t dim_y = params.launch_dim_y; // tile_width
1560
1561 // Global pixel coordinates
1562 const uint32_t ii = (uint32_t)params.camera_pixel_offset.x + col;
1563 const uint32_t jj = (uint32_t)params.camera_pixel_offset.y + row;
1564 const uint32_t pixel_index = jj * (uint32_t)params.camera_resolution_full.x + ii;
1565
1566 uint32_t seed = tea<16>(dim_y * row + col, params.random_seed);
1567
1568 // Center of pixel, no antialiasing jitter
1569 const float multiplier = 1.0f / params.FOV_aspect_ratio;
1570 float3 sp;
1571 sp.y = -0.5f + ((float)ii + 0.5f) / (float)params.camera_resolution_full.x;
1572 sp.z = ( 0.5f - ((float)jj + 0.5f) / (float)params.camera_resolution_full.y) * multiplier;
1573 sp.x = params.camera_viewplane_length;
1574
1575 const float3 p = make_float3(
1576 params.camera_focal_length,
1577 sp.y / params.camera_viewplane_length * params.camera_focal_length,
1578 sp.z / params.camera_viewplane_length * params.camera_focal_length);
1579
1580 float3 ray_origin = make_float3(0.f, 0.f, 0.f);
1581 float3 ray_direction = p;
1582
1583 const float theta = -0.5f * M_PI + params.camera_direction.x;
1584 const float phi = 0.5f * M_PI - params.camera_direction.y;
1585 ray_origin = d_rotatePoint(ray_origin, theta, phi) + params.camera_position;
1586 ray_direction = d_rotatePoint(ray_direction, theta, phi);
1587 ray_direction = ray_direction * (1.0f / d_magnitude(ray_direction));
1588
1589 PerRayData prd;
1590 prd.strength = 0.f; // used as distance offset (always 0 for pixel label)
1591 prd.origin_UUID = pixel_index;
1592 prd.face = true;
1593 prd.source_ID = 0;
1594 prd.seed = seed;
1595 prd.hit_periodic_boundary = false;
1596
1597 uint32_t p0, p1;
1598 packPointer(&prd, p0, p1);
1599
1600 const float t_min = 1e-5f;
1601 const float t_max = 1e30f;
1602
1603 float3 cur_origin = ray_origin;
1604 for (int wrap = 0; wrap < 10; wrap++) {
1605 prd.hit_periodic_boundary = false;
1606 optixTrace(params.traversable, cur_origin, ray_direction,
1607 t_min, t_max, 0.f,
1608 OptixVisibilityMask(255), OPTIX_RAY_FLAG_NONE,
1609 3u, 0u, 3u, // SBT: offset=3 (pixel label hit), stride=0, miss=3
1610 p0, p1);
1611 if (!prd.hit_periodic_boundary) break;
1612 cur_origin = prd.periodic_hit;
1613 }
1614}