1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 #include "../Math/SSEMath.h"20 21 [MATH_BEGIN_NAMESPACE]22 23 #if !defined(MATH_GEN_TRIANGLEINDEX)24 float TriangleMesh::IntersectRay_AVX(const [Ray] &ray) const25 #elif defined(MATH_GEN_TRIANGLEINDEX) && !defined(MATH_GEN_UV)26 float TriangleMesh::IntersectRay_TriangleIndex_AVX(const [Ray] &ray, int &outTriangleIndex) const27 #elif defined(MATH_GEN_TRIANGLEINDEX) && defined(MATH_GEN_UV)28 float TriangleMesh::IntersectRay_TriangleIndex_UV_AVX(const [Ray] &ray, int &outTriangleIndex, float &outU, float &outV) const29 #endif30 {31 32 33 34 [assert](sizeof([float3]) == 3*sizeof(float));35 [assert](sizeof([Triangle]) == 3*sizeof(vec));36 #ifdef _DEBUG37 [assert](vertexDataLayout == 2); 38 #endif39 40 41 42 const float [inf] = [FLOAT_INF];43 __m256 [nearestD] = _mm256_set1_ps(inf);44 #ifdef MATH_GEN_UV45 __m256 nearestU = _mm256_set1_ps(inf);46 __m256 nearestV = _mm256_set1_ps(inf);47 #endif48 #ifdef MATH_GEN_TRIANGLEINDEX49 __m256i nearestIndex = _mm256_set1_epi32(-1);50 #endif51 52 const __m256 [lX] = _mm256_broadcast_ss(&ray.pos.x);53 const __m256 [lY] = _mm256_broadcast_ss(&ray.pos.y);54 const __m256 [lZ] = _mm256_broadcast_ss(&ray.pos.z);55 56 const __m256 [dX] = _mm256_broadcast_ss(&ray.dir.x);57 const __m256 [dY] = _mm256_broadcast_ss(&ray.dir.y);58 const __m256 [dZ] = _mm256_broadcast_ss(&ray.dir.z);59 60 const __m256 [epsilon] = _mm256_set1_ps(1[e]-4f);61 const __m256 [zero] = _mm256_setzero_ps();62 const __m256 [one] = _mm256_set1_ps(1.f);63 64 [assert](((uintptr_t)data & 0x1F) == 0);65 66 const float *[tris] = reinterpret_cast<const float*>(data);67 68 for(int i = 0; i+8 <= numTriangles; i += 8)69 {70 __m256 v0x = _mm256_load_ps(tris);71 __m256 v0y = _mm256_load_ps(tris+8);72 __m256 v0z = _mm256_load_ps(tris+16);73 74 #ifdef SOA_HAS_EDGES75 76 __m256 e1x = _mm256_load_ps(tris+24);77 __m256 e1y = _mm256_load_ps(tris+32);78 __m256 e1z = _mm256_load_ps(tris+40);79 80 __m256 e2x = _mm256_load_ps(tris+48);81 __m256 e2y = _mm256_load_ps(tris+56);82 __m256 e2z = _mm256_load_ps(tris+64);83 #else84 __m256 v1x = _mm256_load_ps(tris+24);85 __m256 v1y = _mm256_load_ps(tris+32);86 __m256 v1z = _mm256_load_ps(tris+40);87 88 __m256 v2x = _mm256_load_ps(tris+48);89 __m256 v2y = _mm256_load_ps(tris+56);90 __m256 v2z = _mm256_load_ps(tris+64);91 92 93 __m256 e1x = _mm256_sub_ps(v1x, v0x);94 __m256 e1y = _mm256_sub_ps(v1y, v0y);95 __m256 e1z = _mm256_sub_ps(v1z, v0z);96 97 __m256 e2x = _mm256_sub_ps(v2x, v0x);98 __m256 e2y = _mm256_sub_ps(v2y, v0y);99 __m256 e2z = _mm256_sub_ps(v2z, v0z);100 #endif101 102 __m256 px = _mm256_sub_ps(_mm256_mul_ps(dY, e2z), _mm256_mul_ps(dZ, e2y));103 __m256 py = _mm256_sub_ps(_mm256_mul_ps(dZ, e2x), _mm256_mul_ps(dX, e2z));104 __m256 pz = _mm256_sub_ps(_mm256_mul_ps(dX, e2y), _mm256_mul_ps(dY, e2x));105 106 107 __m256 det = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(e1x, px), _mm256_mul_ps(e1y, py)), _mm256_mul_ps(e1z, pz));108 109 110 111 112 113 __m256 recipDet = _mm256_rcp_ps(det);114 115 __m256 absdet = abs_ps256(det);116 __m256 out = _mm256_cmp_ps(absdet, epsilon, _CMP_LT_OQ);117 118 119 __m256 tx = _mm256_sub_ps(lX, v0x);120 __m256 ty = _mm256_sub_ps(lY, v0y);121 __m256 tz = _mm256_sub_ps(lZ, v0z);122 123 124 __m256 u = _mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(tx, px), _mm256_mul_ps(ty, py)), _mm256_mul_ps(tz, pz)), recipDet);125 126 127 128 __m256 out2 = _mm256_cmp_ps(u, zero, 1);129 out = _mm256_or_ps(out, out2);130 out2 = _mm256_cmp_ps(u, one, _CMP_GT_OQ);131 out = _mm256_or_ps(out, out2);132 133 134 __m256 qx = _mm256_sub_ps(_mm256_mul_ps(ty, e1z), _mm256_mul_ps(tz, e1y));135 __m256 qy = _mm256_sub_ps(_mm256_mul_ps(tz, e1x), _mm256_mul_ps(tx, e1z));136 __m256 qz = _mm256_sub_ps(_mm256_mul_ps(tx, e1y), _mm256_mul_ps(ty, e1x));137 138 139 __m256 v = _mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(dX, qx), _mm256_mul_ps(dY, qy)), _mm256_mul_ps(dZ, qz)), recipDet);140 141 142 143 out2 = _mm256_cmp_ps(v, zero, _CMP_LT_OQ);144 out = _mm256_or_ps(out, out2);145 __m256 uv = _mm256_add_ps(u, v);146 out2 = _mm256_cmp_ps(uv, one, _CMP_GT_OQ);147 out = _mm256_or_ps(out, out2);148 149 150 __m256 t = _mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(e2x, qx), _mm256_mul_ps(e2y, qy)), _mm256_mul_ps(e2z, qz)), recipDet);151 152 153 out2 = _mm256_cmp_ps(t, zero, _CMP_LT_OQ);154 out = _mm256_or_ps(out, out2);155 156 157 out2 = _mm256_cmp_ps(t, nearestD, _CMP_GE_OQ);158 out = _mm256_or_ps(out, out2);159 160 161 #ifdef MATH_GEN_TRIANGLEINDEX162 __m256i hitIndex = _mm256_set1_epi32(i);163 nearestIndex = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(hitIndex), _mm256_castsi256_ps(nearestIndex), out)); 164 #endif165 166 #ifdef MATH_GEN_UV167 nearestU = _mm256_blendv_ps(u, nearestU, out);168 nearestV = _mm256_blendv_ps(v, nearestV, out);169 #endif170 171 172 173 nearestD = _mm256_blendv_ps(t, nearestD, out);174 175 tris += 72;176 }177 178 float ds[8];179 _mm256_store_ps(ds, nearestD);180 #ifdef MATH_GEN_UV181 float su[8];182 float sv[8];183 _mm256_storeu_ps(su, nearestU);184 _mm256_storeu_ps(sv, nearestV);185 #endif186 187 #ifdef MATH_GEN_TRIANGLEINDEX188 u32 ds2[8];189 _mm256_storeu_si256((__m256i*)ds2, nearestIndex);190 #endif191 192 float [smallestT] = [FLOAT_INF];193 194 for(int i = 0; i < 8; ++i)195 if (ds[i] < smallestT)196 {197 smallestT = ds[i];198 #ifdef MATH_GEN_TRIANGLEINDEX199 outTriangleIndex = ds2[i]+i;200 #endif201 #ifdef MATH_GEN_UV202 outU = su[i];203 outV = sv[i];204 #endif205 }206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 return [smallestT];222 }223 224 225 #ifdef MATH_GEN_TRIANGLEINDEX226 #undef MATH_GEN_TRIANGLEINDEX227 #endif228 #ifdef MATH_GEN_UV229 #undef MATH_GEN_UV230 #endif231 232 [MATH_END_NAMESPACE] Go back to previous page