1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 [MATH_BEGIN_NAMESPACE]19 20 #if defined(MATH_GEN_SSE2) && !defined(MATH_GEN_TRIANGLEINDEX)21 float TriangleMesh::IntersectRay_SSE2(const [Ray] &ray) const22 #elif defined(MATH_GEN_SSE2) && defined(MATH_GEN_TRIANGLEINDEX) && !defined(MATH_GEN_UV)23 float TriangleMesh::IntersectRay_TriangleIndex_SSE2(const [Ray] &ray, int &outTriangleIndex) const24 #elif defined(MATH_GEN_SSE2) && defined(MATH_GEN_TRIANGLEINDEX) && defined(MATH_GEN_UV)25 float TriangleMesh::IntersectRay_TriangleIndex_UV_SSE2(const [Ray] &ray, int &outTriangleIndex, float &outU, float &outV) const26 #elif defined(MATH_GEN_SSE41) && !defined(MATH_GEN_TRIANGLEINDEX)27 float TriangleMesh::IntersectRay_SSE41(const [Ray] &ray) const28 #elif defined(MATH_GEN_SSE41) && defined(MATH_GEN_TRIANGLEINDEX) && !defined(MATH_GEN_UV)29 float TriangleMesh::IntersectRay_TriangleIndex_SSE41(const [Ray] &ray, int &outTriangleIndex) const30 #elif defined(MATH_GEN_SSE41) && defined(MATH_GEN_TRIANGLEINDEX) && defined(MATH_GEN_UV)31 float TriangleMesh::IntersectRay_TriangleIndex_UV_SSE41(const [Ray] &ray, int &outTriangleIndex, float &outU, float &outV) const32 #endif33 {34 35 36 37 [assert](sizeof([float3]) == 3*sizeof(float));38 [assert](sizeof([Triangle]) == 3*sizeof([float3]));39 #ifdef _DEBUG40 [assert](vertexDataLayout == 1); 41 #endif42 43 const float [inf] = [FLOAT_INF];44 __m128 [nearestD] = _mm_set1_ps(inf);45 #ifdef MATH_GEN_UV46 __m128 nearestU = _mm_set1_ps(inf);47 __m128 nearestV = _mm_set1_ps(inf);48 #endif49 #ifdef MATH_GEN_TRIANGLEINDEX50 __m128i nearestIndex = _mm_set1_epi32(-1);51 #endif52 53 const __m128 [lX] = _mm_load1_ps(&ray.pos.x);54 const __m128 [lY] = _mm_load1_ps(&ray.pos.y);55 const __m128 [lZ] = _mm_load1_ps(&ray.pos.z);56 57 const __m128 [dX] = _mm_load1_ps(&ray.dir.x);58 const __m128 [dY] = _mm_load1_ps(&ray.dir.y);59 const __m128 [dZ] = _mm_load1_ps(&ray.dir.z);60 61 const __m128 [epsilon] = _mm_set1_ps(1[e]-4f);62 const __m128 [zero] = _mm_setzero_ps();63 const __m128 [one] = _mm_set1_ps(1.f);64 65 const __m128 [sign_mask] = _mm_set1_ps(-0.f); 66 67 [assert](((uintptr_t)data & 0xF) == 0);68 69 const float *[tris] = reinterpret_cast<const float*>(data);70 71 for(int i = 0; i+4 <= numTriangles; i += 4)72 {73 __m128 v0x = _mm_load_ps(tris);74 __m128 v0y = _mm_load_ps(tris+4);75 __m128 v0z = _mm_load_ps(tris+8);76 77 #ifdef SOA_HAS_EDGES78 __m128 e1x = _mm_load_ps(tris+12);79 __m128 e1y = _mm_load_ps(tris+16);80 __m128 e1z = _mm_load_ps(tris+20);81 82 __m128 e2x = _mm_load_ps(tris+24);83 __m128 e2y = _mm_load_ps(tris+28);84 __m128 e2z = _mm_load_ps(tris+32);85 #else86 __m128 v1x = _mm_load_ps(tris+12);87 __m128 v1y = _mm_load_ps(tris+16);88 __m128 v1z = _mm_load_ps(tris+20);89 90 __m128 v2x = _mm_load_ps(tris+24);91 __m128 v2y = _mm_load_ps(tris+28);92 __m128 v2z = _mm_load_ps(tris+32);93 94 95 __m128 e1x = _mm_sub_ps(v1x, v0x);96 __m128 e1y = _mm_sub_ps(v1y, v0y);97 __m128 e1z = _mm_sub_ps(v1z, v0z);98 99 __m128 e2x = _mm_sub_ps(v2x, v0x);100 __m128 e2y = _mm_sub_ps(v2y, v0y);101 __m128 e2z = _mm_sub_ps(v2z, v0z);102 #endif103 104 105 106 __m128 px = _mm_sub_ps(_mm_mul_ps(dY, e2z), _mm_mul_ps(dZ, e2y));107 __m128 py = _mm_sub_ps(_mm_mul_ps(dZ, e2x), _mm_mul_ps(dX, e2z));108 __m128 pz = _mm_sub_ps(_mm_mul_ps(dX, e2y), _mm_mul_ps(dY, e2x));109 110 111 __m128 det = _mm_add_ps(_mm_add_ps(_mm_mul_ps(e1x, px), _mm_mul_ps(e1y, py)), _mm_mul_ps(e1z, pz));112 113 114 115 116 117 __m128 recipDet = _mm_rcp_ps(det);118 119 __m128 absdet = _mm_andnot_ps(sign_mask, det);120 __m128 out = _mm_cmple_ps(absdet, epsilon);121 122 123 __m128 tx = _mm_sub_ps(lX, v0x);124 __m128 ty = _mm_sub_ps(lY, v0y);125 __m128 tz = _mm_sub_ps(lZ, v0z);126 127 128 __m128 u = _mm_mul_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(tx, px), _mm_mul_ps(ty, py)), _mm_mul_ps(tz, pz)), recipDet);129 130 131 132 __m128 out2 = _mm_cmplt_ps(u, zero);133 out = _mm_or_ps(out, out2);134 out2 = _mm_cmpgt_ps(u, one);135 out = _mm_or_ps(out, out2);136 137 138 __m128 qx = _mm_sub_ps(_mm_mul_ps(ty, e1z), _mm_mul_ps(tz, e1y));139 __m128 qy = _mm_sub_ps(_mm_mul_ps(tz, e1x), _mm_mul_ps(tx, e1z));140 __m128 qz = _mm_sub_ps(_mm_mul_ps(tx, e1y), _mm_mul_ps(ty, e1x));141 142 143 __m128 v = _mm_mul_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(dX, qx), _mm_mul_ps(dY, qy)), _mm_mul_ps(dZ, qz)), recipDet);144 145 146 147 out2 = _mm_cmplt_ps(v, zero);148 out = _mm_or_ps(out, out2);149 __m128 uv = _mm_add_ps(u, v);150 out2 = _mm_cmpgt_ps(uv, one);151 out = _mm_or_ps(out, out2);152 153 154 __m128 t = _mm_mul_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(e2x, qx), _mm_mul_ps(e2y, qy)), _mm_mul_ps(e2z, qz)), recipDet);155 156 157 out2 = _mm_cmplt_ps(t, zero);158 out = _mm_or_ps(out, out2);159 160 161 out2 = _mm_cmpge_ps(t, nearestD);162 out = _mm_or_ps(out, out2);163 164 165 166 167 #ifdef MATH_GEN_SSE41168 nearestD = _mm_blendv_ps(t, nearestD, out);169 #else170 171 nearestD = _mm_and_ps(out, nearestD);172 t = _mm_andnot_ps(out, t);173 nearestD = _mm_or_ps(t, nearestD);174 #endif175 176 #ifdef MATH_GEN_UV177 178 #ifdef MATH_GEN_SSE41179 nearestU = _mm_blendv_ps(u, nearestU, out); 180 nearestV = _mm_blendv_ps(v, nearestV, out); 181 #else182 183 nearestU = _mm_and_ps(out, nearestU);184 nearestV = _mm_and_ps(out, nearestV);185 u = _mm_andnot_ps(out, u);186 v = _mm_andnot_ps(out, v);187 nearestU = _mm_or_ps(u, nearestU);188 nearestV = _mm_or_ps(v, nearestV);189 #endif190 191 #endif192 193 #ifdef MATH_GEN_TRIANGLEINDEX194 __m128i hitIndex = _mm_set1_epi32(i);195 #ifdef MATH_GEN_SSE41196 nearestIndex = _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(hitIndex), _mm_castsi128_ps(nearestIndex), out)); 197 #else198 199 200 nearestIndex = _mm_and_si128(_mm_castps_si128(out), nearestIndex);201 hitIndex = _mm_andnot_si128(_mm_castps_si128(out), hitIndex);202 nearestIndex = _mm_or_si128(hitIndex, nearestIndex);203 #endif204 205 #endif206 207 tris += 36;208 }209 210 [float4] [d] = [nearestD];211 #ifdef MATH_GEN_UV212 [float4] u = nearestU;213 [float4] v = nearestV;214 #endif215 #ifdef MATH_GEN_TRIANGLEINDEX216 u32 idx[4];217 _mm_store_si128((__m128i*)idx, nearestIndex);218 #endif219 float [smallestT] = [FLOAT_INF];220 for(int i = 0; i < 4; ++i)221 if (d[i] < smallestT)222 {223 smallestT = d[i];224 #ifdef MATH_GEN_TRIANGLEINDEX225 outTriangleIndex = idx[i]+i;226 #endif227 #ifdef MATH_GEN_UV228 outU = u[i];229 outV = v[i];230 #endif231 }232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 return [smallestT];248 }249 250 251 252 253 254 255 256 257 258 #ifdef MATH_GEN_SSE2259 #undef MATH_GEN_SSE2260 #endif261 #ifdef MATH_GEN_SSE41262 #undef MATH_GEN_SSE41263 #endif264 #ifdef MATH_GEN_TRIANGLEINDEX265 #undef MATH_GEN_TRIANGLEINDEX266 #endif267 #ifdef MATH_GEN_UV268 #undef MATH_GEN_UV269 #endif270 271 [MATH_END_NAMESPACE] Go back to previous page