17#include <optix_device.h>
30static __forceinline__ __device__
unsigned int lcg(
unsigned int &prev) {
31 const unsigned int LCG_A = 1664525u;
32 const unsigned int LCG_C = 1013904223u;
33 prev = (LCG_A * prev + LCG_C);
34 return prev & 0x00FFFFFF;
37static __forceinline__ __device__
float rnd(
unsigned int &prev) {
38 return (
float)lcg(prev) / (float)0x01000000;
41template<
unsigned int N>
42static __forceinline__ __device__
unsigned int tea(
unsigned int val0,
unsigned int val1) {
43 unsigned int v0 = val0, v1 = val1, s0 = 0;
44 for (
unsigned int n = 0; n < N; n++) {
46 v0 += ((v1 << 4) + 0xa341316c) ^ (v1 + s0) ^ ((v1 >> 5) + 0xc8013ea4);
47 v1 += ((v0 << 4) + 0xad90777d) ^ (v0 + s0) ^ ((v0 >> 5) + 0x7e95761e);
52__device__ __forceinline__
void atomicFloatAdd(
float *address,
float val) {
53 atomicAdd(address, val);
56__device__ __forceinline__
void d_transformPoint(
const float (&T)[16], float3 &v) {
58 V.x = T[0]*v.x + T[1]*v.y + T[2]*v.z + T[3];
59 V.y = T[4]*v.x + T[5]*v.y + T[6]*v.z + T[7];
60 V.z = T[8]*v.x + T[9]*v.y + T[10]*v.z + T[11];
64__device__ __forceinline__ float3 d_rotatePoint(
const float3 &pos,
float theta,
float phi) {
65 float st = sinf(theta), ct = cosf(theta);
66 float sp = sinf(phi), cp = cosf(phi);
68 tmp.x = cp*ct*pos.x + (-sp)*pos.y + cp*st*pos.z;
69 tmp.y = sp*ct*pos.x + cp*pos.y + sp*st*pos.z;
70 tmp.z = -st*pos.x + ct*pos.z;
74__device__ __forceinline__
float d_magnitude(
const float3 v) {
75 return sqrtf(v.x*v.x + v.y*v.y + v.z*v.z);
78static __forceinline__ __device__
float acos_safe(
float x) {
79 return acosf(fmaxf(-1.f, fminf(1.f, x)));
82static __forceinline__ __device__
float asin_safe(
float x) {
83 return asinf(fmaxf(-1.f, fminf(1.f, x)));
90static __device__
float evaluateDiffuseAngularDistribution(
const float3 &ray_dir,
const float3 &peak_dir,
91 float power_law_K,
float power_law_norm,
92 const float4 &sky_params) {
93 if (power_law_K > 0.f) {
94 float psi =
acos_safe(dot(peak_dir, ray_dir));
95 psi = fmaxf(psi,
M_PI / 180.f);
96 return powf(psi, -power_law_K) * power_law_norm;
98 if (sky_params.w > 0.f) {
99 float gamma =
acos_safe(dot(ray_dir, peak_dir)) * 180.f /
M_PI;
100 float cos_theta = fmaxf(ray_dir.z, 0.f);
101 float pattern = (1.f + sky_params.x * expf(-gamma / sky_params.y))
102 * (1.f + (sky_params.z - 1.f) * (1.f - cos_theta));
103 return pattern * sky_params.w *
M_PI;
109__device__ __forceinline__
void loadTransformMatrix(uint32_t pos,
float (&T)[16]) {
110 for (
int i = 0; i < 16; i++) {
119static __forceinline__ __device__
bool sampleMask(int32_t msk_id,
float uv_u,
float uv_v) {
120 if (msk_id < 0)
return true;
121 const int32_t width = params.
mask_sizes[msk_id * 2];
122 const int32_t height = params.
mask_sizes[msk_id * 2 + 1];
124 int ix = (int)(floorf(
float(width - 1) * uv_u));
125 int iy = (int)(floorf(
float(height - 1) * (1.f - uv_v)));
126 ix =
max(0,
min(ix, width - 1));
127 iy =
max(0,
min(iy, height - 1));
128 return params.
mask_data[offset + (uint32_t)(iy * width + ix)] != 0u;
133static __forceinline__ __device__
void d_makeTransformMatrix(float3 rotation,
float (&T)[16]) {
134 float sx = sinf(rotation.x), cx = cosf(rotation.x);
135 float sy = sinf(rotation.y), cy = cosf(rotation.y);
136 float sz = sinf(rotation.z), cz = cosf(rotation.z);
137 T[0] = cz * cy; T[1] = cz * sy * sx - sz * cx; T[2] = cz * sy * cx + sz * sx; T[3] = 0.f;
138 T[4] = sz * cy; T[5] = sz * sy * sx + cz * cx; T[6] = sz * sy * cx - cz * sx; T[7] = 0.f;
139 T[8] = -sy; T[9] = cy * sx; T[10] = cy * cx; T[11] = 0.f;
140 T[12] = 0.f; T[13] = 0.f; T[14] = 0.f; T[15] = 1.f;
145static __forceinline__ __device__
void d_sampleDisk(uint32_t &seed, float3 &sample) {
146 float Rx = rnd(seed), Ry = rnd(seed);
147 float sp_x = -1.f + 2.f * Rx;
148 float sp_y = -1.f + 2.f * Ry;
151 if (sp_x > sp_y) { r = sp_x; p = sp_y / sp_x; }
152 else { r = sp_y; p = 2.f - sp_x / sp_y; }
154 if (sp_x < sp_y) { r = -sp_x; p = 4.f + sp_y / sp_x; }
155 else { r = -sp_y; p = (sp_y != 0.f) ? 6.f - sp_x / sp_y : 0.f; }
158 sample = make_float3(r * cosf(p), r * sinf(p), 0.f);
162static __forceinline__ __device__
void d_sampleSquare(uint32_t &seed, float3 &sample) {
163 sample = make_float3(-0.5f + rnd(seed), -0.5f + rnd(seed), 0.f);
167static __forceinline__ __device__
void d_invertMatrix(
const float (&m)[16],
float (&minv)[16]) {
169 inv[0] = m[5]*m[10]*m[15] - m[5]*m[11]*m[14] - m[9]*m[6]*m[15] + m[9]*m[7]*m[14] + m[13]*m[6]*m[11] - m[13]*m[7]*m[10];
170 inv[4] = -m[4]*m[10]*m[15] + m[4]*m[11]*m[14] + m[8]*m[6]*m[15] - m[8]*m[7]*m[14] - m[12]*m[6]*m[11] + m[12]*m[7]*m[10];
171 inv[8] = m[4]*m[9]*m[15] - m[4]*m[11]*m[13] - m[8]*m[5]*m[15] + m[8]*m[7]*m[13] + m[12]*m[5]*m[11] - m[12]*m[7]*m[9];
172 inv[12] = -m[4]*m[9]*m[14] + m[4]*m[10]*m[13] + m[8]*m[5]*m[14] - m[8]*m[6]*m[13] - m[12]*m[5]*m[10] + m[12]*m[6]*m[9];
173 inv[1] = -m[1]*m[10]*m[15] + m[1]*m[11]*m[14] + m[9]*m[2]*m[15] - m[9]*m[3]*m[14] - m[13]*m[2]*m[11] + m[13]*m[3]*m[10];
174 inv[5] = m[0]*m[10]*m[15] - m[0]*m[11]*m[14] - m[8]*m[2]*m[15] + m[8]*m[3]*m[14] + m[12]*m[2]*m[11] - m[12]*m[3]*m[10];
175 inv[9] = -m[0]*m[9]*m[15] + m[0]*m[11]*m[13] + m[8]*m[1]*m[15] - m[8]*m[3]*m[13] - m[12]*m[1]*m[11] + m[12]*m[3]*m[9];
176 inv[13] = m[0]*m[9]*m[14] - m[0]*m[10]*m[13] - m[8]*m[1]*m[14] + m[8]*m[2]*m[13] + m[12]*m[1]*m[10] - m[12]*m[2]*m[9];
177 inv[2] = m[1]*m[6]*m[15] - m[1]*m[7]*m[14] - m[5]*m[2]*m[15] + m[5]*m[3]*m[14] + m[13]*m[2]*m[7] - m[13]*m[3]*m[6];
178 inv[6] = -m[0]*m[6]*m[15] + m[0]*m[7]*m[14] + m[4]*m[2]*m[15] - m[4]*m[3]*m[14] - m[12]*m[2]*m[7] + m[12]*m[3]*m[6];
179 inv[10] = m[0]*m[5]*m[15] - m[0]*m[7]*m[13] - m[4]*m[1]*m[15] + m[4]*m[3]*m[13] + m[12]*m[1]*m[7] - m[12]*m[3]*m[5];
180 inv[14] = -m[0]*m[5]*m[14] + m[0]*m[6]*m[13] + m[4]*m[1]*m[14] - m[4]*m[2]*m[13] - m[12]*m[1]*m[6] + m[12]*m[2]*m[5];
181 inv[3] = -m[1]*m[6]*m[11] + m[1]*m[7]*m[10] + m[5]*m[2]*m[11] - m[5]*m[3]*m[10] - m[9]*m[2]*m[7] + m[9]*m[3]*m[6];
182 inv[7] = m[0]*m[6]*m[11] - m[0]*m[7]*m[10] - m[4]*m[2]*m[11] + m[4]*m[3]*m[10] + m[8]*m[2]*m[7] - m[8]*m[3]*m[6];
183 inv[11] = -m[0]*m[5]*m[11] + m[0]*m[7]*m[9] + m[4]*m[1]*m[11] - m[4]*m[3]*m[9] - m[8]*m[1]*m[7] + m[8]*m[3]*m[5];
184 inv[15] = m[0]*m[5]*m[10] - m[0]*m[6]*m[9] - m[4]*m[1]*m[10] + m[4]*m[2]*m[9] + m[8]*m[1]*m[6] - m[8]*m[2]*m[5];
185 float det = m[0]*inv[0] + m[1]*inv[4] + m[2]*inv[8] + m[3]*inv[12];
187 for (
int i = 0; i < 16; i++) minv[i] = inv[i] * det;
191static __forceinline__ __device__
bool d_raySphereIntersect(
const float3 &ray_origin,
const float3 &ray_direction,
192 const float3 &sphere_center,
float sphere_radius) {
193 const float3 oc = make_float3(ray_origin.x - sphere_center.x, ray_origin.y - sphere_center.y,
194 ray_origin.z - sphere_center.z);
195 const float b = dot(oc, ray_direction);
196 const float c = dot(oc, oc) - sphere_radius * sphere_radius;
197 const float disc = b * b - c;
198 if (disc < 0.0f)
return false;
199 return (-b - sqrtf(disc)) > 0.0f;
203static __forceinline__ __device__
bool d_rayRectangleIntersect(
const float3 &ray_origin,
const float3 &ray_direction,
204 const float3 &rect_center,
float rect_width,
float rect_length,
205 const float3 &rect_rotation,
float &out_cos_angle) {
207 d_makeTransformMatrix(rect_rotation, transform);
208 const float3 normal = make_float3(transform[2], transform[6], transform[10]);
209 const float denom = dot(ray_direction, normal);
210 if (denom >= -1e-6f)
return false;
211 const float3 oc = make_float3(rect_center.x - ray_origin.x, rect_center.y - ray_origin.y,
212 rect_center.z - ray_origin.z);
213 const float t = dot(oc, normal) / denom;
214 if (t <= 0.0f)
return false;
215 float3 hit = make_float3(ray_origin.x + t * ray_direction.x - rect_center.x,
216 ray_origin.y + t * ray_direction.y - rect_center.y,
217 ray_origin.z + t * ray_direction.z - rect_center.z);
219 d_invertMatrix(transform, inv_t);
220 d_transformPoint(inv_t, hit);
221 if (fabsf(hit.x) > rect_width * 0.5f || fabsf(hit.y) > rect_length * 0.5f)
return false;
222 out_cos_angle = -denom;
227static __forceinline__ __device__
bool d_rayDiskIntersect(
const float3 &ray_origin,
const float3 &ray_direction,
228 const float3 &disk_center,
float disk_radius,
229 const float3 &disk_rotation,
float &out_cos_angle) {
231 d_makeTransformMatrix(disk_rotation, transform);
232 const float3 normal = make_float3(transform[2], transform[6], transform[10]);
233 const float denom = dot(ray_direction, normal);
234 if (denom >= -1e-6f)
return false;
235 const float3 oc = make_float3(disk_center.x - ray_origin.x, disk_center.y - ray_origin.y,
236 disk_center.z - ray_origin.z);
237 const float t = dot(oc, normal) / denom;
238 if (t <= 0.0f)
return false;
239 const float3 hit = make_float3(ray_origin.x + t * ray_direction.x - disk_center.x,
240 ray_origin.y + t * ray_direction.y - disk_center.y,
241 ray_origin.z + t * ray_direction.z - disk_center.z);
242 if (dot(hit, hit) > disk_radius * disk_radius)
return false;
243 out_cos_angle = -denom;
262extern "C" __global__
void __intersection__patch() {
263 const uint32_t prim_idx = optixGetPrimitiveIndex();
264 const uint32_t pos = prim_idx;
267 const float3 ray_origin = optixGetWorldRayOrigin();
268 const float3 ray_direction = optixGetWorldRayDirection();
269 const float t_min = optixGetRayTmin();
270 const float t_max = optixGetRayTmax();
284 float3 e1 = make_float3(v1.x - v0.x, v1.y - v0.y, v1.z - v0.z);
285 float3 e3 = make_float3(v3.x - v0.x, v3.y - v0.y, v3.z - v0.z);
286 float3 n =
cross(e1, e3);
287 float nd = dot(ray_direction, n);
288 if (fabsf(nd) < 1e-8f)
return;
290 float t = dot(make_float3(v0.x - ray_origin.x, v0.y - ray_origin.y, v0.z - ray_origin.z), n) / nd;
291 if (t < t_min || t > t_max)
return;
294 float3 hit = make_float3(ray_origin.x + t * ray_direction.x,
295 ray_origin.y + t * ray_direction.y,
296 ray_origin.z + t * ray_direction.z);
297 const float slack = 1e-4f;
298 float mnx = fminf(fminf(v0.x, v1.x), fminf(v2.x, v3.x)) - slack;
299 float mxx = fmaxf(fmaxf(v0.x, v1.x), fmaxf(v2.x, v3.x)) + slack;
300 float mny = fminf(fminf(v0.y, v1.y), fminf(v2.y, v3.y)) - slack;
301 float mxy = fmaxf(fmaxf(v0.y, v1.y), fmaxf(v2.y, v3.y)) + slack;
302 float mnz = fminf(fminf(v0.z, v1.z), fminf(v2.z, v3.z)) - slack;
303 float mxz = fmaxf(fmaxf(v0.z, v1.z), fmaxf(v2.z, v3.z)) + slack;
304 if (hit.x < mnx || hit.x > mxx || hit.y < mny || hit.y > mxy ||
305 hit.z < mnz || hit.z > mxz)
return;
307 optixReportIntersection(t, 0, uuid, 0u);
311 if (ptype != 0 && ptype != 1 && ptype != 3)
return;
314 loadTransformMatrix(pos, T);
316 if (ptype == 0 || ptype == 3) {
319 float3 normal = make_float3(T[2], T[6], T[10]);
320 float nd = dot(ray_direction, normal);
321 if (fabsf(nd) < 1e-8f)
return;
324 float3 patch_origin = make_float3(T[3], T[7], T[11]);
325 float t = dot(patch_origin - ray_origin, normal) / nd;
326 if (t < t_min || t > t_max)
return;
329 float3 hit_local = ray_origin + t * ray_direction - patch_origin;
330 float3 local_x = make_float3(T[0], T[4], T[8]);
331 float3 local_y = make_float3(T[1], T[5], T[9]);
332 float lx2 = dot(local_x, local_x);
333 float ly2 = dot(local_y, local_y);
334 if (lx2 < 1e-12f || ly2 < 1e-12f)
return;
335 float u = dot(hit_local, local_x) / lx2;
336 float v = dot(hit_local, local_y) / ly2;
337 if (u < -0.5f || u > 0.5f || v < -0.5f || v > 0.5f)
return;
340 const int32_t msk_id = params.
mask_IDs[pos];
343 if (params.
uv_IDs[pos] >= 0) {
345 float2 uv0 = params.
uv_data[pos * 4 + 0];
346 float2 uv1 = params.
uv_data[pos * 4 + 1];
347 float2 uv2 = params.
uv_data[pos * 4 + 2];
348 float du = uv1.x - uv0.x;
349 float dv = uv2.y - uv0.y;
350 uv_u = uv0.x + (u + 0.5f) * du;
351 uv_v = uv0.y + (v + 0.5f) * dv;
357 if (!sampleMask(msk_id, uv_u, uv_v))
return;
360 uint32_t face_attr = (nd < 0.f) ? 1u : 0u;
362 optixReportIntersection(t, 0, uuid, face_attr);
367 const float3 v0 = make_float3(T[3], T[7], T[11]);
368 const float3 v1 = make_float3(T[1] + T[3], T[5] + T[7], T[9] + T[11]);
369 const float3 v2 = make_float3(T[0] + T[1] + T[3], T[4] + T[5] + T[7], T[8] + T[9] + T[11]);
372 float a = v0.x - v1.x, b = v0.x - v2.x, c = ray_direction.x, d = v0.x - ray_origin.x;
373 float e = v0.y - v1.y, f = v0.y - v2.y, g = ray_direction.y, h = v0.y - ray_origin.y;
374 float i = v0.z - v1.z, j = v0.z - v2.z, k = ray_direction.z, l = v0.z - ray_origin.z;
376 float m = f * k - g * j, n = h * k - g * l, p = f * l - h * j;
377 float q = g * i - e * k, s = e * j - f * i;
379 float tri_denom = a * m + b * q + c * s;
380 if (fabsf(tri_denom) < 1e-12f)
return;
381 float inv_denom = 1.f / tri_denom;
383 float e1 = d * m - b * n - c * p;
384 float beta = e1 * inv_denom;
385 if (beta < 0.f)
return;
387 float r = e * l - h * i;
388 float e2 = a * n + d * q + c * r;
389 float gamma = e2 * inv_denom;
390 if (gamma < 0.f || beta + gamma > 1.f)
return;
392 float e3 = a * p - b * r + d * s;
393 float t = e3 * inv_denom;
394 if (t < t_min || t > t_max)
return;
397 const int32_t msk_id = params.
mask_IDs[pos];
400 if (params.
uv_IDs[pos] >= 0) {
402 float2 uv0 = params.
uv_data[pos * 4 + 0];
403 float2 uv1 = params.
uv_data[pos * 4 + 1];
404 float2 uv2 = params.
uv_data[pos * 4 + 2];
406 float2 uv = make_float2(uv0.x + beta * (uv1.x - uv0.x) + gamma * (uv2.x - uv0.x),
407 uv0.y + beta * (uv1.y - uv0.y) + gamma * (uv2.y - uv0.y));
415 if (!sampleMask(msk_id, uv_u, uv_v))
return;
419 float3 edge0 = make_float3(v1.x - v0.x, v1.y - v0.y, v1.z - v0.z);
420 float3 edge1 = make_float3(v2.x - v0.x, v2.y - v0.y, v2.z - v0.z);
421 float3 tri_nrm = make_float3(edge0.y * edge1.z - edge0.z * edge1.y,
422 edge0.z * edge1.x - edge0.x * edge1.z,
423 edge0.x * edge1.y - edge0.y * edge1.x);
424 uint32_t face_attr = (dot(ray_direction, tri_nrm) < 0.f) ? 1u : 0u;
426 optixReportIntersection(t, 0, uuid, face_attr);
430extern "C" __global__
void __intersection__disk() {
435extern "C" __global__
void __intersection__tile() {
439extern "C" __global__
void __intersection__voxel() {
444extern "C" __global__
void __intersection__bbox() {
452extern "C" __global__
void __miss__direct() {
456 const uint32_t Nprims = params.Nprimitives;
457 const uint32_t Nbands_global = params.Nbands_global;
458 const uint32_t Nbands_launch = params.Nbands_launch;
461 for (uint32_t b_global = 0; b_global < Nbands_global; b_global++) {
466 const uint32_t ind_origin = origin_position * Nbands_launch + (uint32_t)b;
469 const uint32_t radprop_ind = prd->
source_ID * Nprims * Nbands_global
470 + origin_position * Nbands_global
472 const float t_rho = params.
rho[radprop_ind];
473 const float t_tau = params.
tau[radprop_ind];
476 const uint32_t flux_idx = prd->
source_ID * Nbands_launch + (uint32_t)b;
479 const double strength = prd->
strength * (double)source_flux;
480 const float absorption = (float)(strength * (1.0 - t_rho - t_tau));
482 atomicFloatAdd(¶ms.
radiation_in[ind_origin], absorption);
484 if (t_rho > 0.f || t_tau > 0.f) {
486 atomicFloatAdd(¶ms.
scatter_buff_top[ind_origin], (
float)(strength * t_rho));
490 atomicFloatAdd(¶ms.
scatter_buff_top[ind_origin], (
float)(strength * t_tau));
496 const uint32_t Ncameras = params.Ncameras;
497 const uint32_t cam_id = params.camera_ID;
498 const uint32_t rc_idx = prd->
source_ID * Nprims * Nbands_global * Ncameras
499 + origin_position * Nbands_global * Ncameras
500 + b_global * Ncameras + cam_id;
501 const float t_rho_cam = params.
rho_cam[rc_idx];
502 const float t_tau_cam = params.
tau_cam ? params.
tau_cam[rc_idx] : 0.f;
503 if ((t_rho_cam > 0.f || t_tau_cam > 0.f) && strength > 0.0) {
506 atomicFloatAdd(¶ms.scatter_buff_bottom_cam[ind_origin], (
float)(strength * t_tau_cam));
508 atomicFloatAdd(¶ms.scatter_buff_bottom_cam[ind_origin], (
float)(strength * t_rho_cam));
518 for (uint32_t cam = 0; cam < params.Ncameras; cam++) {
520 const uint32_t weight_idx = prd->
source_ID * Nbands_launch * params.Ncameras
521 + (uint32_t)b * params.Ncameras + cam;
524 const uint32_t ind_specular = prd->
source_ID * params.Ncameras * Nprims * Nbands_launch
525 + cam * Nprims * Nbands_launch
526 + origin_position * Nbands_launch + (uint32_t)b;
527 atomicFloatAdd(¶ms.
radiation_specular[ind_specular], (
float)(strength * camera_weight));
533extern "C" __global__
void __miss__diffuse() {
537 if (origin_position == UINT_MAX)
return;
539 const uint32_t Nprims = params.Nprimitives;
540 const uint32_t Nbands_global = params.Nbands_global;
541 const uint32_t Nbands_launch = params.Nbands_launch;
544 printf(
"ERROR (OptiX8 __miss__diffuse): diffuse_flux is null. "
545 "Call updateDiffuseRadiation() before launchDiffuseRays().\n");
549 const float3 ray_dir = optixGetWorldRayDirection();
552 for (uint32_t b_global = 0; b_global < Nbands_global; b_global++) {
563 const float fd = evaluateDiffuseAngularDistribution(ray_dir, peak_d, power_K, power_N, sky_p);
566 const uint32_t ind_origin = origin_position * Nbands_launch + (uint32_t)b;
567 const uint32_t radprop_ind = prd->
source_ID * Nprims * Nbands_global
568 + origin_position * Nbands_global + b_global;
569 const float t_rho = params.
rho[radprop_ind];
570 const float t_tau = params.
tau[radprop_ind];
572 atomicFloatAdd(¶ms.
radiation_in[ind_origin], strength * (1.f - t_rho - t_tau));
574 if (t_rho > 0.f || t_tau > 0.f) {
586 const uint32_t Ncameras = params.Ncameras;
587 const uint32_t cam_id = params.camera_ID;
588 const uint32_t rc_idx = prd->
source_ID * Nprims * Nbands_global * Ncameras
589 + origin_position * Nbands_global * Ncameras
590 + b_global * Ncameras + cam_id;
591 const float t_rho_cam = params.
rho_cam[rc_idx];
592 const float t_tau_cam = params.
tau_cam ? params.
tau_cam[rc_idx] : 0.f;
593 if ((t_rho_cam > 0.f || t_tau_cam > 0.f) && strength > 0.f) {
596 atomicFloatAdd(¶ms.scatter_buff_bottom_cam[ind_origin], strength * t_tau_cam);
598 atomicFloatAdd(¶ms.scatter_buff_bottom_cam[ind_origin], strength * t_rho_cam);
606extern "C" __global__
void __miss__camera() {
609 const uint32_t Nbands_l = params.Nbands_launch;
610 const float3 ray_origin = optixGetWorldRayOrigin();
611 const float3 ray_dir = optixGetWorldRayDirection();
613 for (uint32_t b = 0; b < Nbands_l; b++) {
614 float radiance = 0.0f;
616 for (uint32_t s = 0; s < params.Nsources; s++) {
618 if (flux <= 0.0f)
continue;
620 const uint32_t stype = params.source_types[s];
621 if (stype == 0 || stype == 2) {
624 dot(ray_dir, params.sun_direction) >= params.solar_disk_cos_angle) {
627 }
else if (stype == 1) {
629 if (d_raySphereIntersect(ray_origin, ray_dir, params.source_positions[s],
630 params.source_widths[s].x * 0.5f)) {
631 const float area = 4.0f *
M_PI * params.source_widths[s].x * 0.5f * params.source_widths[s].x * 0.5f;
632 radiance += (flux / area) /
M_PI;
634 }
else if (stype == 3) {
637 if (d_rayRectangleIntersect(ray_origin, ray_dir, params.source_positions[s],
638 params.source_widths[s].x, params.source_widths[s].y,
639 params.source_rotations[s], cos_angle)) {
640 const float area = params.source_widths[s].x * params.source_widths[s].y;
641 radiance += (flux / area) * cos_angle /
M_PI;
643 }
else if (stype == 4) {
646 if (d_rayDiskIntersect(ray_origin, ray_dir, params.source_positions[s],
647 params.source_widths[s].x, params.source_rotations[s], cos_angle)) {
648 const float area =
M_PI * params.source_widths[s].x * params.source_widths[s].x;
649 radiance += (flux / area) * cos_angle /
M_PI;
657 : make_float4(0.f, 0.f, 0.f, 0.f);
659 evaluateDiffuseAngularDistribution(ray_dir, params.sun_direction, 0.0f, 1.0f, sky_p);
662 if (radiance > 0.0f) {
669extern "C" __global__
void __miss__pixel_label() {
671 if (params.camera_pixel_depth) {
672 params.camera_pixel_depth[prd->
origin_UUID] = -1.0f;
689static __forceinline__ __device__
void handlePeriodicBoundaryHit(
PerRayData *prd, uint32_t hit_uuid) {
690 const float t_hit = optixGetRayTmax();
691 const float3 ray_orig = optixGetWorldRayOrigin();
692 const float3 ray_dir = optixGetWorldRayDirection();
693 const float3 hit_pos = make_float3(ray_orig.x + t_hit * ray_dir.x,
694 ray_orig.y + t_hit * ray_dir.y,
695 ray_orig.z + t_hit * ray_dir.z);
705 const float width = xmax - xmin;
706 prd->
periodic_hit.x += (bbox_local == 0) ? width : -width;
709 const uint32_t y_base = (params.
periodic_flag.x == 1) ? 2u : 0u;
712 const float width = ymax - ymin;
713 prd->
periodic_hit.y += (bbox_local == y_base) ? width : -width;
717extern "C" __global__
void __closesthit__direct() {
722 const uint32_t hit_uuid = optixGetAttribute_0();
726 prd->hit_periodic_boundary =
true;
727 handlePeriodicBoundaryHit(prd, hit_uuid);
734extern "C" __global__
void __closesthit__diffuse() {
735 const uint32_t hit_uuid = optixGetAttribute_0();
736 const bool face_top = (optixGetAttribute_1() == 1u);
743 prd->hit_periodic_boundary =
true;
744 handlePeriodicBoundaryHit(prd, hit_uuid);
750 if (origin_position == UINT_MAX || hit_position == UINT_MAX)
return;
752 const uint32_t Nprims = params.Nprimitives;
753 const uint32_t Nbands_global = params.Nbands_global;
754 const uint32_t Nbands_launch = params.Nbands_launch;
757 for (uint32_t b_global = 0; b_global < Nbands_global; b_global++) {
761 const uint32_t ind_origin = origin_position * Nbands_launch + (uint32_t)b;
762 const uint32_t ind_hit = hit_position * Nbands_launch + (uint32_t)b;
766 if (strength == 0.0)
continue;
768 const uint32_t radprop_ind = prd->
source_ID * Nprims * Nbands_global
769 + origin_position * Nbands_global + b_global;
770 const float t_rho = params.
rho[radprop_ind];
771 const float t_tau = params.
tau[radprop_ind];
773 atomicFloatAdd(¶ms.
radiation_in[ind_origin], (
float)(strength * (1.0 - t_rho - t_tau)));
775 if (t_rho > 0.f || t_tau > 0.f) {
777 atomicFloatAdd(¶ms.
scatter_buff_top[ind_origin], (
float)(strength * t_rho));
781 atomicFloatAdd(¶ms.
scatter_buff_top[ind_origin], (
float)(strength * t_tau));
787 const uint32_t Ncameras = params.Ncameras;
788 const uint32_t cam_id = params.camera_ID;
789 const uint32_t rc_idx = prd->
source_ID * Nprims * Nbands_global * Ncameras
790 + origin_position * Nbands_global * Ncameras
791 + b_global * Ncameras + cam_id;
792 const float t_rho_cam = params.
rho_cam[rc_idx];
793 const float t_tau_cam = params.
tau_cam ? params.
tau_cam[rc_idx] : 0.f;
794 if ((t_rho_cam > 0.f || t_tau_cam > 0.f) && strength > 0.0) {
797 atomicFloatAdd(¶ms.scatter_buff_bottom_cam[ind_origin], (
float)(strength * t_tau_cam));
799 atomicFloatAdd(¶ms.scatter_buff_bottom_cam[ind_origin], (
float)(strength * t_rho_cam));
811extern "C" __global__
void __closesthit__camera() {
812 const uint32_t hit_uuid = optixGetAttribute_0();
816 if (hit_position == 0xFFFFFFFFu)
return;
821 handlePeriodicBoundaryHit(prd, hit_uuid);
822 prd->hit_periodic_boundary =
true;
827 const uint32_t Nbands_l = params.Nbands_launch;
828 const uint32_t Nprims = params.Nprimitives;
829 const float t_hit = optixGetRayTmax();
830 const float3 ray_origin = optixGetWorldRayOrigin();
831 const float3 ray_direction = optixGetWorldRayDirection();
834 const bool face_top = (optixGetAttribute_1() == 1u);
838 loadTransformMatrix(hit_position, T);
839 float3 n0 = make_float3(0.f, 0.f, 0.f); d_transformPoint(T, n0);
840 float3 n1 = make_float3(1.f, 0.f, 0.f); d_transformPoint(T, n1);
841 float3 n2 = make_float3(0.f, 1.f, 0.f); d_transformPoint(T, n2);
842 float3 normal = normalize(
cross(n1 - n0, n2 - n0));
844 if (face_top != (dot(normal, ray_direction) < 0.f)) {
845 normal = make_float3(-normal.x, -normal.y, -normal.z);
848 for (uint32_t b = 0; b < Nbands_l; b++) {
850 const uint32_t ind_hit = hit_position * Nbands_l + b;
851 float strength = (float)prd->
strength *
856 for (uint32_t s = 0; s < params.Nsources; s++) {
858 if (flux <= 0.0f)
continue;
860 const uint32_t stype = params.source_types[s];
861 float source_radiance = 0.0f;
865 const float radius = params.source_widths[s].x * 0.5f;
866 const float3 oc = make_float3(ray_origin.x - params.source_positions[s].x,
867 ray_origin.y - params.source_positions[s].y,
868 ray_origin.z - params.source_positions[s].z);
869 const float bd = dot(oc, ray_direction);
870 const float cd = dot(oc, oc) - radius * radius;
871 const float disc = bd * bd - cd;
873 const float t_sphere = -bd - sqrtf(disc);
874 if (t_sphere > 0.0f && t_sphere < t_hit) {
875 const float area = 4.0f *
M_PI * radius * radius;
876 source_radiance = (flux / area) /
M_PI;
879 }
else if (stype == 3) {
882 d_makeTransformMatrix(params.source_rotations[s], trans);
883 const float3 snormal = make_float3(trans[2], trans[6], trans[10]);
884 const float denom = dot(ray_direction, snormal);
885 if (denom < -1e-6f) {
886 const float3 oc = make_float3(params.source_positions[s].x - ray_origin.x,
887 params.source_positions[s].y - ray_origin.y,
888 params.source_positions[s].z - ray_origin.z);
889 const float t_r = dot(oc, snormal) / denom;
890 if (t_r > 0.0f && t_r < t_hit) {
891 float3 hp = make_float3(ray_origin.x + t_r * ray_direction.x - params.source_positions[s].x,
892 ray_origin.y + t_r * ray_direction.y - params.source_positions[s].y,
893 ray_origin.z + t_r * ray_direction.z - params.source_positions[s].z);
895 d_invertMatrix(trans, inv_t);
896 d_transformPoint(inv_t, hp);
897 if (fabsf(hp.x) <= params.source_widths[s].x * 0.5f &&
898 fabsf(hp.y) <= params.source_widths[s].y * 0.5f) {
899 const float area = params.source_widths[s].x * params.source_widths[s].y;
900 source_radiance = (flux / area) * (-denom) /
M_PI;
904 }
else if (stype == 4) {
907 d_makeTransformMatrix(params.source_rotations[s], trans);
908 const float3 snormal = make_float3(trans[2], trans[6], trans[10]);
909 const float denom = dot(ray_direction, snormal);
910 if (denom < -1e-6f) {
911 const float3 oc = make_float3(params.source_positions[s].x - ray_origin.x,
912 params.source_positions[s].y - ray_origin.y,
913 params.source_positions[s].z - ray_origin.z);
914 const float t_d = dot(oc, snormal) / denom;
915 if (t_d > 0.0f && t_d < t_hit) {
916 const float3 hp = make_float3(ray_origin.x + t_d * ray_direction.x - params.source_positions[s].x,
917 ray_origin.y + t_d * ray_direction.y - params.source_positions[s].y,
918 ray_origin.z + t_d * ray_direction.z - params.source_positions[s].z);
919 const float radius = params.source_widths[s].x;
920 if (dot(hp, hp) <= radius * radius) {
921 const float area =
M_PI * radius * radius;
922 source_radiance = (flux / area) * (-denom) /
M_PI;
928 if (source_radiance > 0.0f) {
929 strength += source_radiance * (float)prd->
strength;
934 float strength_spec = 0.0f;
935 if (params.specular_reflection_enabled > 0 &&
936 params.specular_exponent && params.specular_exponent[hit_position] > 0.f &&
937 params.scattering_iteration == 0 &&
939 for (uint32_t rr = 0; rr < params.Nsources; rr++) {
940 const uint32_t ind_spec = rr * params.Ncameras * Nprims * Nbands_l
941 + params.camera_ID * Nprims * Nbands_l
942 + hit_position * Nbands_l + b;
946 if (params.source_types[rr] == 0 || params.source_types[rr] == 2) {
947 light_dir = normalize(params.source_positions[rr]);
949 const float3 hp = make_float3(ray_origin.x + t_hit * ray_direction.x,
950 ray_origin.y + t_hit * ray_direction.y,
951 ray_origin.z + t_hit * ray_direction.z);
952 light_dir = normalize(make_float3(params.source_positions[rr].x - hp.x,
953 params.source_positions[rr].y - hp.y,
954 params.source_positions[rr].z - hp.z));
956 const float3 spec_dir = normalize(light_dir - ray_direction);
957 const float exponent = params.specular_exponent[hit_position];
958 float scale_coeff = 1.0f;
959 if (params.specular_reflection_enabled == 2 && params.specular_scale) {
960 scale_coeff = params.specular_scale[hit_position];
962 const float cos_spec = fmaxf(0.f, dot(spec_dir, normal));
963 strength_spec += spec * scale_coeff
964 * powf(cos_spec, exponent) * (exponent + 2.f)
965 / ((
float)params.launch_dim_x * 2.f *
M_PI);
972 (strength + strength_spec) /
M_PI);
980extern "C" __global__
void __closesthit__pixel_label() {
981 const uint32_t hit_uuid = optixGetAttribute_0();
988 hit_position != 0xFFFFFFFFu && params.
primitive_type[hit_position] == 5) {
989 handlePeriodicBoundaryHit(prd, hit_uuid);
990 prd->hit_periodic_boundary =
true;
995 if (params.camera_pixel_label) {
996 params.camera_pixel_label[origin_UUID] = hit_uuid + 1u;
1000 if (params.camera_pixel_depth) {
1001 const float t_hit = optixGetRayTmax() + (float)prd->
strength;
1002 const float3 ray_dir = optixGetWorldRayDirection();
1003 const float3 cam_dir = d_rotatePoint(make_float3(1.f, 0.f, 0.f),
1006 params.camera_pixel_depth[origin_UUID] = fabsf(dot(cam_dir, ray_dir)) * t_hit;
1014extern "C" __global__
void __raygen__direct() {
1016 const uint3 idx = optixGetLaunchIndex();
1017 const uint32_t xi = idx.x;
1018 const uint32_t yi = idx.y;
1019 const uint32_t prim_local = idx.z;
1021 const uint32_t dim_x = params.launch_dim_x;
1022 const uint32_t dim_y = params.launch_dim_y;
1023 const uint32_t Nrays = dim_x * dim_y;
1024 const uint32_t prim_pos = params.
launch_offset + prim_local;
1026 if (prim_pos >= params.Nprimitives)
return;
1033 loadTransformMatrix(prim_pos, T);
1035 const uint32_t linear_idx = xi + dim_x * yi;
1036 uint32_t seed = tea<16>(linear_idx + Nrays * prim_local, params.random_seed);
1038 for (
int jj = 0; jj < NY; jj++) {
1039 for (
int ii = 0; ii < NX; ii++) {
1042 const uint32_t UUID = params.
primitiveID[prim_pos] + (uint32_t)(jj * NX + ii);
1044 const float Rx = rnd(seed);
1045 const float Ry = rnd(seed);
1050 if (ptype == 0 || ptype == 3) {
1051 const float dx = 1.0f / float(NX);
1052 const float dy = 1.0f / float(NY);
1053 sp.x = -0.5f + ii * dx + (float(xi) + Rx) * dx /
float(dim_x);
1054 sp.y = -0.5f + jj * dy + (float(yi) + Ry) * dy /
float(dim_y);
1058 const int32_t msk_orig = params.
mask_IDs[prim_pos];
1059 if (msk_orig >= 0) {
1060 for (
int attempt = 0; attempt < 10; attempt++) {
1062 if (params.
uv_IDs[prim_pos] >= 0) {
1063 float2 uv0 = params.
uv_data[prim_pos * 4 + 0];
1064 float2 uv1 = params.
uv_data[prim_pos * 4 + 1];
1065 float2 uv2 = params.
uv_data[prim_pos * 4 + 2];
1066 uv_u = uv0.x + (sp.x + 0.5f) * (uv1.x - uv0.x);
1067 uv_v = uv0.y + (sp.y + 0.5f) * (uv2.y - uv0.y);
1072 if (sampleMask(msk_orig, uv_u, uv_v))
break;
1073 sp.x = -0.5f + (ii + rnd(seed)) * dx;
1074 sp.y = -0.5f + (jj + rnd(seed)) * dy;
1078 float3 v0 = make_float3(0.f, 0.f, 0.f); d_transformPoint(T, v0);
1079 float3 v1 = make_float3(1.f, 0.f, 0.f); d_transformPoint(T, v1);
1080 float3 v2 = make_float3(0.f, 1.f, 0.f); d_transformPoint(T, v2);
1081 normal = normalize(
cross(v1 - v0, v2 - v0));
1083 }
else if (ptype == 1) {
1084 if (Rx < Ry) { sp.x = Rx; sp.y = Ry; }
1085 else { sp.x = Ry; sp.y = Rx; }
1089 const int32_t msk_orig_t = params.
mask_IDs[prim_pos];
1090 if (msk_orig_t >= 0) {
1091 for (
int attempt = 0; attempt < 10; attempt++) {
1093 if (params.
uv_IDs[prim_pos] >= 0) {
1094 float2 uv0 = params.
uv_data[prim_pos * 4 + 0];
1095 float2 uv1 = params.
uv_data[prim_pos * 4 + 1];
1096 float2 uv2 = params.
uv_data[prim_pos * 4 + 2];
1097 const float beta = sp.y - sp.x;
1098 const float gamma = sp.x;
1099 float2 uv = make_float2(
1100 uv0.x + beta * (uv1.x - uv0.x) + gamma * (uv2.x - uv0.x),
1101 uv0.y + beta * (uv1.y - uv0.y) + gamma * (uv2.y - uv0.y));
1108 if (sampleMask(msk_orig_t, uv_u, uv_v))
break;
1109 float Rx2 = rnd(seed), Ry2 = rnd(seed);
1110 if (Rx2 < Ry2) { sp.x = Rx2; sp.y = Ry2; }
1111 else { sp.x = Ry2; sp.y = Rx2; }
1115 float3 v0 = make_float3(0.f, 0.f, 0.f); d_transformPoint(T, v0);
1116 float3 v1 = make_float3(0.f, 1.f, 0.f); d_transformPoint(T, v1);
1117 float3 v2 = make_float3(1.f, 1.f, 0.f); d_transformPoint(T, v2);
1118 normal = normalize(
cross(v1 - v0, v2 - v0));
1122 printf(
"ERROR (OptiX8 __raygen__direct): unsupported primitive type %u at index %u\n",
1128 float3 ray_origin = sp;
1129 d_transformPoint(T, ray_origin);
1132 for (uint32_t rr = 0; rr < params.Nsources; rr++) {
1134 const uint32_t src_type = params.source_types[rr];
1136 float3 ray_direction;
1140 if (src_type == 0) {
1141 ray_direction = normalize(params.source_positions[rr]);
1143 strength = (1.0 / double(dim_x * dim_y)) * (
double)fabsf(dot(normal, ray_direction));
1145 }
else if (src_type == 1 || src_type == 2) {
1146 float theta_s =
acos_safe(1.f - 2.f * rnd(seed));
1147 float phi_s = rnd(seed) * 2.f *
M_PI;
1148 float3 sphere_pt = make_float3(0.5f * params.source_widths[rr].x * sinf(theta_s) * cosf(phi_s),
1149 0.5f * params.source_widths[rr].x * sinf(theta_s) * sinf(phi_s),
1150 0.5f * params.source_widths[rr].x * cosf(theta_s));
1151 ray_direction = sphere_pt + params.source_positions[rr] - ray_origin;
1152 ray_tmax = d_magnitude(ray_direction);
1153 ray_direction = normalize(ray_direction);
1157 const uint32_t N = 10;
1158 for (uint32_t j = 0; j < N; j++) {
1159 for (uint32_t i = 0; i < N; i++) {
1160 float theta =
acos_safe(1.f - 2.f * (
float(i) + 0.5f) /
float(N));
1161 float phi = (float(j) + 0.5f) * 2.f *
M_PI /
float(N);
1162 float3 ldir = make_float3(sinf(theta)*cosf(phi), sinf(theta)*sinf(phi), cosf(theta));
1163 if (dot(ldir, ray_direction) < 0.f) {
1164 strength += (1.0 / double(dim_x * dim_y)) * (
double)fabsf(dot(normal, ray_direction))
1165 * (double)fabsf(dot(ldir, ray_direction))
1166 / ((
double)ray_tmax * (double)ray_tmax)
1168 * (double)params.source_widths[rr].x * (
double)params.source_widths[rr].x;
1173 }
else if (src_type == 3) {
1174 float light_transform[16];
1175 float3 rot3 = params.source_rotations[rr];
1176 d_makeTransformMatrix(rot3, light_transform);
1179 d_sampleSquare(seed, square_pt);
1180 square_pt = make_float3(params.source_widths[rr].x * square_pt.x, params.source_widths[rr].y * square_pt.y, square_pt.z);
1181 d_transformPoint(light_transform, square_pt);
1183 float3 light_dir = make_float3(0.f, 0.f, 1.f);
1184 d_transformPoint(light_transform, light_dir);
1186 ray_direction = square_pt + params.source_positions[rr] - ray_origin;
1187 if (dot(ray_direction, light_dir) > 0.f)
continue;
1189 ray_tmax = d_magnitude(ray_direction);
1190 ray_direction = normalize(ray_direction);
1191 strength = (1.0 / double(dim_x * dim_y))
1192 * (
double)fabsf(dot(normal, ray_direction))
1193 * (double)fabsf(dot(light_dir, ray_direction))
1194 / ((
double)ray_tmax * (double)ray_tmax)
1195 * (double)params.source_widths[rr].x * (
double)params.source_widths[rr].y
1198 }
else if (src_type == 4) {
1199 float light_transform[16];
1200 float3 rot3 = params.source_rotations[rr];
1201 d_makeTransformMatrix(rot3, light_transform);
1204 d_sampleDisk(seed, disk_pt);
1205 d_transformPoint(light_transform, disk_pt);
1207 float3 light_dir = make_float3(0.f, 0.f, 1.f);
1208 d_transformPoint(light_transform, light_dir);
1210 ray_direction = params.source_widths[rr].x * disk_pt + params.source_positions[rr] - ray_origin;
1211 if (dot(ray_direction, light_dir) > 0.f)
continue;
1213 ray_tmax = d_magnitude(ray_direction);
1214 ray_direction = normalize(ray_direction);
1215 strength = (1.0 / double(dim_x * dim_y))
1216 * (
double)fabsf(dot(normal, ray_direction))
1217 * (double)fabsf(dot(light_dir, ray_direction))
1218 / ((
double)ray_tmax * (double)ray_tmax)
1219 * (double)params.source_widths[rr].x * (
double)params.source_widths[rr].x;
1229 prd.hit_periodic_boundary =
false;
1230 prd.
face = (dot(ray_direction, normal) > 0.f);
1237 if (!prd.
face && tsf == 0)
continue;
1238 if (tsf == 3)
continue;
1241 float3 current_origin = ray_origin;
1243 for (
int wrap = 0; wrap < 10; ++wrap) {
1244 packPointer(&prd, u0, u1);
1252 OptixVisibilityMask(255),
1253 OPTIX_RAY_FLAG_NONE,
1259 if (!prd.hit_periodic_boundary)
break;
1261 prd.hit_periodic_boundary =
false;
1274extern "C" __global__
void __raygen__diffuse() {
1276 const uint3 idx = optixGetLaunchIndex();
1277 const uint32_t theta_idx = idx.x;
1278 const uint32_t phi_idx = idx.y;
1279 const uint32_t prim_local = idx.z;
1281 const uint32_t dim_x = params.launch_dim_x;
1282 const uint32_t dim_y = params.launch_dim_y;
1283 const uint32_t dimxy = dim_x * dim_y;
1285 const uint32_t prim_pos = params.
launch_offset + prim_local;
1286 if (prim_pos >= params.Nprimitives)
return;
1296 loadTransformMatrix(prim_pos, T);
1299 const uint32_t linear_idx = theta_idx + dim_x * phi_idx;
1300 uint32_t seed = tea<16>(linear_idx + dimxy * prim_local, params.random_seed);
1303 const float Rt = (theta_idx + rnd(seed)) /
float(dim_x);
1304 const float Rp = (phi_idx + rnd(seed)) /
float(dim_y);
1306 const float p = 2.f *
M_PI * Rp;
1307 float3 ray_dir_canonical;
1308 ray_dir_canonical.x = sinf(t) * cosf(p);
1309 ray_dir_canonical.y = sinf(t) * sinf(p);
1310 ray_dir_canonical.z = cosf(t);
1312 for (
int jj = 0; jj < NY; jj++) {
1313 for (
int ii = 0; ii < NX; ii++) {
1315 const uint32_t UUID = params.
primitiveID[prim_pos] + (uint32_t)(jj * NX + ii);
1317 const float Rx = rnd(seed);
1318 const float Ry = rnd(seed);
1323 if (ptype == 0 || ptype == 3) {
1324 const float dx = 1.f / float(NX);
1325 const float dy = 1.f / float(NY);
1326 sp.x = -0.5f + (ii + Rx) * dx;
1327 sp.y = -0.5f + (jj + Ry) * dy;
1331 const int32_t msk_orig_d = params.
mask_IDs[prim_pos];
1332 if (msk_orig_d >= 0) {
1333 for (
int attempt = 0; attempt < 10; attempt++) {
1335 if (params.
uv_IDs[prim_pos] >= 0) {
1336 float2 uv0 = params.
uv_data[prim_pos * 4 + 0];
1337 float2 uv1 = params.
uv_data[prim_pos * 4 + 1];
1338 float2 uv2 = params.
uv_data[prim_pos * 4 + 2];
1339 uv_u = uv0.x + (sp.x + 0.5f) * (uv1.x - uv0.x);
1340 uv_v = uv0.y + (sp.y + 0.5f) * (uv2.y - uv0.y);
1345 if (sampleMask(msk_orig_d, uv_u, uv_v))
break;
1346 sp.x = -0.5f + (ii + rnd(seed)) * dx;
1347 sp.y = -0.5f + (jj + rnd(seed)) * dy;
1351 float3 v0 = make_float3(0.f, 0.f, 0.f); d_transformPoint(T, v0);
1352 float3 v1 = make_float3(1.f, 0.f, 0.f); d_transformPoint(T, v1);
1353 float3 v2 = make_float3(0.f, 1.f, 0.f); d_transformPoint(T, v2);
1354 normal = normalize(
cross(v1 - v0, v2 - v0));
1356 }
else if (ptype == 1) {
1357 if (Rx < Ry) { sp.x = Rx; sp.y = Ry; }
1358 else { sp.x = Ry; sp.y = Rx; }
1362 const int32_t msk_orig_dt = params.
mask_IDs[prim_pos];
1363 if (msk_orig_dt >= 0) {
1364 for (
int attempt = 0; attempt < 10; attempt++) {
1366 if (params.
uv_IDs[prim_pos] >= 0) {
1367 float2 uv0 = params.
uv_data[prim_pos * 4 + 0];
1368 float2 uv1 = params.
uv_data[prim_pos * 4 + 1];
1369 float2 uv2 = params.
uv_data[prim_pos * 4 + 2];
1370 const float beta = sp.y - sp.x;
1371 const float gamma = sp.x;
1372 float2 uv = make_float2(
1373 uv0.x + beta * (uv1.x - uv0.x) + gamma * (uv2.x - uv0.x),
1374 uv0.y + beta * (uv1.y - uv0.y) + gamma * (uv2.y - uv0.y));
1381 if (sampleMask(msk_orig_dt, uv_u, uv_v))
break;
1382 float Rx2 = rnd(seed), Ry2 = rnd(seed);
1383 if (Rx2 < Ry2) { sp.x = Rx2; sp.y = Ry2; }
1384 else { sp.x = Ry2; sp.y = Rx2; }
1388 float3 v0 = make_float3(0.f, 0.f, 0.f); d_transformPoint(T, v0);
1389 float3 v1 = make_float3(0.f, 1.f, 0.f); d_transformPoint(T, v1);
1390 float3 v2 = make_float3(1.f, 1.f, 0.f); d_transformPoint(T, v2);
1391 normal = normalize(
cross(v1 - v0, v2 - v0));
1395 printf(
"ERROR (OptiX8 __raygen__diffuse): unsupported primitive type %u at index %u\n",
1401 float3 ray_dir = d_rotatePoint(ray_dir_canonical,
1403 atan2f(normal.y, normal.x));
1406 float3 ray_origin = sp;
1407 d_transformPoint(T, ray_origin);
1413 prd.hit_periodic_boundary =
false;
1414 prd.
strength = 1.0 / double(dimxy);
1420 float3 cur_origin = ray_origin;
1421 for (
int wrap = 0; wrap < 10; ++wrap) {
1422 packPointer(&prd, u0, u1);
1425 cur_origin, ray_dir,
1427 OptixVisibilityMask(255),
1428 OPTIX_RAY_FLAG_NONE,
1432 if (!prd.hit_periodic_boundary)
break;
1434 prd.hit_periodic_boundary =
false;
1438 float3 neg_dir = make_float3(-ray_dir.x, -ray_dir.y, -ray_dir.z);
1439 float3 cur_origin = ray_origin;
1440 for (
int wrap = 0; wrap < 10; ++wrap) {
1441 packPointer(&prd, u0, u1);
1444 cur_origin, neg_dir,
1446 OptixVisibilityMask(255),
1447 OPTIX_RAY_FLAG_NONE,
1451 if (!prd.hit_periodic_boundary)
break;
1453 prd.hit_periodic_boundary =
false;
1467extern "C" __global__
void __raygen__camera() {
1468 const uint3 idx = optixGetLaunchIndex();
1469 const uint32_t ray_idx = idx.x;
1470 const uint32_t col = idx.y;
1471 const uint32_t row = idx.z;
1473 const uint32_t dim_x = params.launch_dim_x;
1474 const uint32_t dim_y = params.launch_dim_y;
1477 const uint32_t ii = (uint32_t)params.camera_pixel_offset.x + col;
1478 const uint32_t jj = (uint32_t)params.camera_pixel_offset.y + row;
1481 const uint32_t pixel_index = jj * (uint32_t)params.camera_resolution_full.x + ii;
1484 const uint32_t linear_idx = dim_x * col + ray_idx;
1485 uint32_t seed = tea<16>(linear_idx + dim_x * dim_y * row, params.random_seed);
1487 const float Rx = rnd(seed);
1488 const float Ry = rnd(seed);
1492 const float multiplier = 1.0f / params.FOV_aspect_ratio;
1494 sp.y = -0.5f + ((float)ii + Rx) / (float)params.camera_resolution_full.x;
1495 sp.z = ( 0.5f - ((float)jj + Ry) / (float)params.camera_resolution_full.y) * multiplier;
1496 sp.x = params.camera_viewplane_length;
1499 const float3 p = make_float3(
1500 params.camera_focal_length,
1501 sp.y / params.camera_viewplane_length * params.camera_focal_length,
1502 sp.z / params.camera_viewplane_length * params.camera_focal_length);
1505 float3 ray_origin = make_float3(0.f, 0.f, 0.f);
1506 if (params.camera_lens_diameter > 0.f) {
1508 d_sampleDisk(seed, disk_sample);
1509 ray_origin = make_float3(0.f, 0.5f * disk_sample.x * params.camera_lens_diameter,
1510 0.5f * disk_sample.y * params.camera_lens_diameter);
1513 float3 ray_direction = make_float3(p.x - ray_origin.x, p.y - ray_origin.y, p.z - ray_origin.z);
1518 ray_origin = d_rotatePoint(ray_origin, theta, phi) + params.camera_position;
1519 ray_direction = d_rotatePoint(ray_direction, theta, phi);
1520 ray_direction = ray_direction * (1.0f / d_magnitude(ray_direction));
1523 prd.
strength = 1.0f / (float)dim_x;
1524 prd.origin_UUID = pixel_index;
1528 prd.hit_periodic_boundary =
false;
1531 packPointer(&prd, p0, p1);
1533 const float t_min = 1e-5f;
1534 const float t_max = 1e30f;
1536 float3 cur_origin = ray_origin;
1537 for (
int wrap = 0; wrap < 10; wrap++) {
1538 prd.hit_periodic_boundary =
false;
1539 optixTrace(params.traversable, cur_origin, ray_direction,
1541 OptixVisibilityMask(255), OPTIX_RAY_FLAG_NONE,
1544 if (!prd.hit_periodic_boundary)
break;
1545 cur_origin = prd.periodic_hit;
1554extern "C" __global__
void __raygen__pixel_label() {
1555 const uint3 idx = optixGetLaunchIndex();
1556 const uint32_t col = idx.y;
1557 const uint32_t row = idx.z;
1559 const uint32_t dim_y = params.launch_dim_y;
1562 const uint32_t ii = (uint32_t)params.camera_pixel_offset.x + col;
1563 const uint32_t jj = (uint32_t)params.camera_pixel_offset.y + row;
1564 const uint32_t pixel_index = jj * (uint32_t)params.camera_resolution_full.x + ii;
1566 uint32_t seed = tea<16>(dim_y * row + col, params.random_seed);
1569 const float multiplier = 1.0f / params.FOV_aspect_ratio;
1571 sp.y = -0.5f + ((float)ii + 0.5f) / (float)params.camera_resolution_full.x;
1572 sp.z = ( 0.5f - ((float)jj + 0.5f) / (float)params.camera_resolution_full.y) * multiplier;
1573 sp.x = params.camera_viewplane_length;
1575 const float3 p = make_float3(
1576 params.camera_focal_length,
1577 sp.y / params.camera_viewplane_length * params.camera_focal_length,
1578 sp.z / params.camera_viewplane_length * params.camera_focal_length);
1580 float3 ray_origin = make_float3(0.f, 0.f, 0.f);
1581 float3 ray_direction = p;
1585 ray_origin = d_rotatePoint(ray_origin, theta, phi) + params.camera_position;
1586 ray_direction = d_rotatePoint(ray_direction, theta, phi);
1587 ray_direction = ray_direction * (1.0f / d_magnitude(ray_direction));
1591 prd.origin_UUID = pixel_index;
1595 prd.hit_periodic_boundary =
false;
1598 packPointer(&prd, p0, p1);
1600 const float t_min = 1e-5f;
1601 const float t_max = 1e30f;
1603 float3 cur_origin = ray_origin;
1604 for (
int wrap = 0; wrap < 10; wrap++) {
1605 prd.hit_periodic_boundary =
false;
1606 optixTrace(params.traversable, cur_origin, ray_direction,
1608 OptixVisibilityMask(255), OPTIX_RAY_FLAG_NONE,
1611 if (!prd.hit_periodic_boundary)
break;
1612 cur_origin = prd.periodic_hit;