1 /* Copyright Jukka Jyl�nki
2
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7        http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License. */
14
15 /** @file TriangleMesh.cpp
16         @author Jukka Jyl�nki
17         @brief Implementation for the TriangleMesh geometry object. */
18 #include "TriangleMesh.h"
19 #include <stdlib.h>
20 #include <string.h>
21 #include "../Math/float3.h"
22 #include "Triangle.h"
23 #include "Ray.h"
24 #include "Polyhedron.h"
25 #include "../MathGeoLibFwd.h"
26 #include "../Math/MathConstants.h"
27 #include "../Math/myassert.h"
28 #include "../../tests/SystemInfo.h"
29
30 #include <vector>
31
32 #include "../Math/SSEMath.h"
33
34 // If defined, we preprocess our TriangleMesh data structure to contain (v0, v1-v0, v2-v0)
35 // instead of (v0, v1, v2) triplets for faster ray-triangle mesh intersection.
36 #define SOA_HAS_EDGES
37
38 MATH_BEGIN_NAMESPACE
39
40 enum SIMDCapability
41 {
42         SIMD_NONE,
43         SIMD_SSE,
44         SIMD_SSE2,
45 //      SIMD_SSE3,
46 //      SIMD_SSSE3,
47 //      SIMD_SSE4,
48         SIMD_SSE41,
49 //      SIMD_SSE42,
50         SIMD_AVX
51 };
52
53 SIMDCapability DetectSIMDCapability()
54 {
55 #ifdef WIN32 ///\todo SIMD detection for other x86 platforms.
56
57 #ifdef MATH_SSE
58         int CPUInfo[4] = {-1};
59
60         unsigned    nIds;//, nExIds, i;
61         int nFeatureInfo = 0;
62 //      bool    bSSE3Instructions = false;
63 //      bool    bSupplementalSSE3 = false;
64 //      bool    bCMPXCHG16B = false;
65 #ifdef MATH_SSE41
66         bool    bSSE41Extensions = false;
67 #endif
68 //      bool    bSSE42Extensions = false;
69 //      bool    bPOPCNT = false;
70
71 //      bool    bLAHF_SAHFAvailable = false;
72 //      bool    bCmpLegacy = false;
73 //      bool    bLZCNT = false;
74 //      bool    bSSE4A = false;
75 //      bool    bMisalignedSSE = false;
76 //      bool    bPREFETCH = false;
77 //      bool    bMMXExtensions = false;
78 //      bool    b3DNowExt = false;
79 //      bool    b3DNow = false;
80 //      bool    bFP128 = false;
81 #ifdef MATH_AVX
82         bool    hasAVX = false;
83 #endif
84 //      bool    bMOVOptimization = false;
85
86         CpuId(CPUInfo, 0);
87         nIds = CPUInfo[0];
88
89         // Get the information associated with each valid Id
90 //      for (i=0; i<=nIds; ++i)
91         if (nIds >= 1)
92         {
93         //      __cpuid(CPUInfo, i);
94
95                 CpuId(CPUInfo, 1);
96                 // Interpret CPU feature information.
97 //              if  (i == 1)
98                 {
99 //                      bSSE3Instructions = (CPUInfo[2] & 0x1) || false;
100 //                      bSupplementalSSE3 = (CPUInfo[2] & 0x200) || false;
101 //                      bCMPXCHG16B= (CPUInfo[2] & 0x2000) || false;
102 //                      bSSE41Extensions = (CPUInfo[2] & 0x80000) || false;
103 //                      bSSE42Extensions = (CPUInfo[2] & 0x100000) || false;
104 //                      bPOPCNT= (CPUInfo[2] & 0x800000) || false;
105 #ifdef MATH_AVX
106                         hasAVX = (CPUInfo[2] & 0x10000000) || false;
107 #endif
108                         nFeatureInfo = CPUInfo[3];
109                 }
110         }
111
112 //      const bool hasMMX = (nFeatureInfo & (1 << 23)) != 0;
113
114         // Calling __cpuid with 0x80000000 as the InfoType argument
115         // gets the number of valid extended IDs.
116 //      __cpuid(CPUInfo, 0x80000000);
117 //      nExIds = CPUInfo[0];
118 /*
119         // Get the information associated with each extended ID.
120         for (i=0x80000000; i<=nExIds; ++i)
121         {
122                 __cpuid(CPUInfo, i);
123
124                 if  (i == 0x80000001)
125                 {
126                         bLAHF_SAHFAvailable = (CPUInfo[2] & 0x1) || false;
127                         bCmpLegacy = (CPUInfo[2] & 0x2) || false;
128                         bLZCNT = (CPUInfo[2] & 0x20) || false;
129                         bSSE4A = (CPUInfo[2] & 0x40) || false;
130                         bMisalignedSSE = (CPUInfo[2] & 0x80) || false;
131                         bPREFETCH = (CPUInfo[2] & 0x100) || false;
132                         bMMXExtensions = (CPUInfo[3] & 0x40000) || false;
133                         b3DNowExt = (CPUInfo[3] & 0x40000000) || false;
134                         b3DNow = (CPUInfo[3] & 0x80000000) || false;
135                 }
136
137                 if  (i == 0x8000001A)
138                 {
139                         bFP128 = (CPUInfo[0] & 0x1) || false;
140                         bMOVOptimization = (CPUInfo[0] & 0x2) || false;
141                 }
142         }
143 */
144 #ifdef MATH_AVX
145         if (hasAVX)
146                 return SIMD_AVX;
147 #endif
148 #ifdef MATH_SSE41
149         if (bSSE41Extensions)
150                 return SIMD_SSE41;
151 #endif
152 #ifdef MATH_SSE2
153         const bool hasSSE2 = (nFeatureInfo & (1 << 26)) != 0;
154         if (hasSSE2)
155                 return SIMD_SSE2;
156 #endif
157 #ifdef MATH_SSE
158         const bool hasSSE = (nFeatureInfo & (1 << 25)) != 0;
159         if (hasSSE)
160                 return SIMD_SSE;
161 #endif
162
163 #endif // ~ MATH_SSE not defined.
164 #endif
165         return SIMD_NONE;
166 }
167
168 const int simdCapability = DetectSIMDCapability();
169
170 TriangleMesh::TriangleMesh()
171 :data(0), numTriangles(0), vertexSizeBytes(0)
172 #ifdef _DEBUG
173 , vertexDataLayout(0)
174 #endif
175 {
176
177 }
178
179 TriangleMesh::~TriangleMesh()
180 {
181         AlignedFree(data);
182 }
183
184 TriangleMesh::TriangleMesh(const TriangleMesh &rhs)
185 :data(0), numTriangles(0), vertexSizeBytes(0)
186 #ifdef _DEBUG
187 , vertexDataLayout(0)
188 #endif
189 {
190         *this = rhs;
191 }
192
193 TriangleMesh &TriangleMesh::operator =(const TriangleMesh &rhs)
194 {
195         if (this == &rhs)
196                 return *this;
197
198 #ifdef _DEBUG
199         vertexDataLayout = rhs.vertexDataLayout;
200 #endif
201         ReallocVertexBuffer(rhs.numTriangles, rhs.vertexSizeBytes);
202         memcpy(data, rhs.data, numTriangles*3*vertexSizeBytes);
203
204         return *this;
205 }
206
207 void TriangleMesh::Set(const Polyhedron &polyhedron)
208 {
209         TriangleArray tris = polyhedron.Triangulate();
210         if (!tris.empty())
211         {
212                 int alignment = (simdCapability == SIMD_AVX) ? 8 : ((simdCapability == SIMD_SSE41 || simdCapability == SIMD_SSE2) ? 4 : 1);
213                 vec degen = POINT_VEC_SCALAR(-FLOAT_INF);
214                 Triangle degent(degen, degen, degen);
215                 while(tris.size() % alignment != 0)
216                         tris.push_back(degent);
217                 Set((Triangle*)&tris[0], (int)tris.size());
218         }
219 }
220
221 void TriangleMesh::Set(const float *triangleMesh, int numTriangles, int vertexSizeBytes)
222 {
223 #ifndef MATH_AUTOMATIC_SSE // TODO: Restore support for this when MATH_AUTOMATIC_SSE is defined!
224         if (simdCapability == SIMD_AVX)
225                 SetSoA8(triangleMesh, numTriangles, vertexSizeBytes);
226         else if (simdCapability == SIMD_SSE41 || simdCapability == SIMD_SSE2)
227                 SetSoA4(triangleMesh, numTriangles, vertexSizeBytes);
228         else
229 #endif
230                 SetAoS(triangleMesh, numTriangles, vertexSizeBytes);
231 }
232
233 float TriangleMesh::IntersectRay(const Ray &ray) const
234 {
235 #ifndef MATH_AUTOMATIC_SSE // TODO: Restore support for this when MATH_AUTOMATIC_SSE is defined!
236 #ifdef MATH_AVX
237         if (simdCapability == SIMD_AVX)
238                 return IntersectRay_AVX(ray);
239 #endif
240 #ifdef MATH_SSE41
241         if (simdCapability == SIMD_SSE41)
242                 return IntersectRay_SSE41(ray);
243 #endif
244 #ifdef MATH_SSE2
245         if (simdCapability == SIMD_SSE2)
246                 return IntersectRay_SSE2(ray);
247 #endif
248 #endif
249         int triangleIndex;
250         float u, v;
251         return IntersectRay_TriangleIndex_UV_CPP(ray, triangleIndex, u, v);
252 }
253
254 float TriangleMesh::IntersectRay_TriangleIndex(const Ray &ray, int &outTriangleIndex) const
255 {
256 #ifndef MATH_AUTOMATIC_SSE // TODO: Restore support for this when MATH_AUTOMATIC_SSE is defined!
257 #ifdef MATH_AVX
258         if (simdCapability == SIMD_AVX)
259                 return IntersectRay_TriangleIndex_AVX(ray, outTriangleIndex);
260 #endif
261 #ifdef MATH_SSE41
262         if (simdCapability == SIMD_SSE41)
263                 return IntersectRay_TriangleIndex_SSE41(ray, outTriangleIndex);
264 #endif
265 #ifdef MATH_SSE2
266         if (simdCapability == SIMD_SSE2)
267                 return IntersectRay_TriangleIndex_SSE2(ray, outTriangleIndex);
268 #endif
269 #endif
270         float u, v;
271         return IntersectRay_TriangleIndex_UV_CPP(ray, outTriangleIndex, u, v);
272 }
273
274 float TriangleMesh::IntersectRay_TriangleIndex_UV(const Ray &ray, int &outTriangleIndex, float &outU, float &outV) const
275 {
276 #ifndef MATH_AUTOMATIC_SSE // TODO: Restore support for this when MATH_AUTOMATIC_SSE is defined!
277 #ifdef MATH_AVX
278         if (simdCapability == SIMD_AVX)
279                 return IntersectRay_TriangleIndex_UV_AVX(ray, outTriangleIndex, outU, outV);
280 #endif
281 #ifdef MATH_SSE41
282         if (simdCapability == SIMD_SSE41)
283                 return IntersectRay_TriangleIndex_UV_SSE41(ray, outTriangleIndex, outU, outV);
284 #endif
285 #ifdef MATH_SSE2
286         if (simdCapability == SIMD_SSE2)
287                 return IntersectRay_TriangleIndex_UV_SSE2(ray, outTriangleIndex, outU, outV);
288 #endif
289 #endif
290
291         return IntersectRay_TriangleIndex_UV_CPP(ray, outTriangleIndex, outU, outV);
292 }
293
294 void TriangleMesh::ReallocVertexBuffer(int numTris, int vertexSizeBytes_)
295 {
296         AlignedFree(data);
297         vertexSizeBytes = vertexSizeBytes_;
298         data = (float*)AlignedMalloc(numTris * 3 * vertexSizeBytes, 32);
299         numTriangles = numTris;
300 }
301
302 void TriangleMesh::SetAoS(const float *vertexData, int numTriangles, int vertexSizeBytes)
303 {
304         ReallocVertexBuffer(numTriangles, vertexSizeBytes);
305 #ifdef _DEBUG
306         vertexDataLayout = 0; // AoS
307 #endif
308
309         memcpy(data, vertexData, numTriangles * 3 * vertexSizeBytes);
310 }
311
312 void TriangleMesh::SetSoA4(const float *vertexData, int numTriangles, int vertexSizeBytes)
313 {
314         ReallocVertexBuffer(numTriangles, 3*sizeof(float));
315 #ifdef _DEBUG
316         vertexDataLayout = 1; // SoA4
317 #endif
318
319         assert(vertexSizeBytes % 4 == 0);
320         int vertexSizeFloats = vertexSizeBytes / 4;
321         int triangleSizeFloats = vertexSizeFloats * 3;
322         assert(numTriangles % 4 == 0); // We must have an evenly divisible amount of triangles, so that the SoA swizzling succeeds.
323
324         // From (xyz xyz xyz) (xyz xyz xyz) (xyz xyz xyz) (xyz xyz xyz)
325         // To xxxx yyyy zzzz xxxx yyyy zzzz xxxx yyyy zzzz
326
327         float *o = data;
328         for(int i = 0; i + 4 <= numTriangles; i += 4) // 4 triangles at a time
329         {
330                 for (int j = 0; j < 3; ++j) // v0,v1,v2
331                 {
332                         const float *src = vertexData;
333                         for (int k = 0; k < 3; ++k) // x,y,z
334                         {
335                                 *o++ = src[0];
336                                 *o++ = src[triangleSizeFloats];
337                                 *o++ = src[2 * triangleSizeFloats];
338                                 *o++ = src[3 * triangleSizeFloats];
339                                 ++src;
340                         }
341                         vertexData += vertexSizeFloats;
342                 }
343                 vertexData += 3 * triangleSizeFloats;
344         }
345
346 #ifdef SOA_HAS_EDGES
347         o = data;
348         for(int i = 0; i + 4 <= numTriangles; i += 4)
349         {
350                 for(int j = 12; j < 24; ++j)
351                         o[j] -= o[j-12];
352                 for(int j = 24; j < 36; ++j)
353                         o[j] -= o[j-24];
354                 o += 36;
355         }
356 #endif
357 }
358
359 void TriangleMesh::SetSoA8(const float *vertexData, int numTriangles, int vertexSizeBytes)
360 {
361         ReallocVertexBuffer(numTriangles, 3*sizeof(float));
362 #ifdef _DEBUG
363         vertexDataLayout = 2; // SoA8
364 #endif
365
366         assert(vertexSizeBytes % 4 == 0);
367         int vertexSizeFloats = vertexSizeBytes / 4;
368         int triangleSizeFloats = vertexSizeFloats * 3;
369         assert(numTriangles % 8 == 0); // We must have an evenly divisible amount of triangles, so that the SoA swizzling succeeds.
370
371         // From (xyz xyz xyz) (xyz xyz xyz) (xyz xyz xyz) (xyz xyz xyz) (xyz xyz xyz) (xyz xyz xyz) (xyz xyz xyz) (xyz xyz xyz)
372         // To xxxxxxxx yyyyyyyy zzzzzzzz xxxxxxxx yyyyyyyy zzzzzzzz xxxxxxxx yyyyyyyy zzzzzzzz
373
374         float *o = data;
375         for(int i = 0; i + 8 <= numTriangles; i += 8) // 8 triangles at a time.
376         {
377                 for (int j = 0; j < 3; ++j) // v0, v1, v2
378                 {
379                         const float *src = vertexData;
380                         for (int k = 0; k < 3; ++k) // x,y,z
381                         {
382                                 *o++ = src[0];
383                                 *o++ = src[triangleSizeFloats];
384                                 *o++ = src[2 * triangleSizeFloats];
385                                 *o++ = src[3 * triangleSizeFloats];
386                                 *o++ = src[4 * triangleSizeFloats];
387                                 *o++ = src[5 * triangleSizeFloats];
388                                 *o++ = src[6 * triangleSizeFloats];
389                                 *o++ = src[7 * triangleSizeFloats];
390                                 ++src;
391                         }
392                         vertexData += vertexSizeFloats;
393                 }
394                 vertexData += 7 * triangleSizeFloats;
395         }
396
397 #ifdef SOA_HAS_EDGES
398         o = data;
399         for(int i = 0; i + 8 <= numTriangles; i += 8)
400         {
401                 for(int j = 24; j < 48; ++j)
402                         o[j] -= o[j-24];
403                 for(int j = 48; j < 72; ++j)
404                         o[j] -= o[j-48];
405                 o += 72;
406         }
407 #endif
408 }
409
410 float TriangleMesh::IntersectRay_TriangleIndex_UV_CPP(const Ray &ray, int &outTriangleIndex, float &outU, float &outV) const
411 {
412         assert(sizeof(float3) == 3*sizeof(float));
413         assert(sizeof(Triangle) == 3*sizeof(vec));
414 #ifdef _DEBUG
415         assert(vertexDataLayout == 0); // Must be AoS structured!
416 #endif
417
418         float nearestD = FLOAT_INF;
419
420         const Triangle *tris = reinterpret_cast<const Triangle*>(data);
421         for(int i = 0; i < numTriangles; ++i)
422         {
423                 float u, v;
424                 float d = Triangle::IntersectLineTri(ray.pos, ray.dir, tris->a, tris->b, tris->c, u, v);
425                 if (d >= 0.f && d < nearestD)
426                 {
427                         nearestD = d;
428                         outU = u;
429                         outV = v;
430                         outTriangleIndex = i;
431                 }
432                 ++tris;
433         }
434
435         return nearestD;
436 }
437
438 MATH_END_NAMESPACE
439
440 #ifdef MATH_SSE2
441 #define MATH_GEN_SSE2
442 #include "TriangleMesh_IntersectRay_SSE.inl"
443
444 #define MATH_GEN_SSE2
445 #define MATH_GEN_TRIANGLEINDEX
446 #include "TriangleMesh_IntersectRay_SSE.inl"
447
448 #define MATH_GEN_SSE2
449 #define MATH_GEN_TRIANGLEINDEX
450 #define MATH_GEN_UV
451 #include "TriangleMesh_IntersectRay_SSE.inl"
452 #endif
453
454 #ifdef MATH_SSE41
455 #define MATH_GEN_SSE41
456 #include "TriangleMesh_IntersectRay_SSE.inl"
457
458 #define MATH_GEN_SSE41
459 #define MATH_GEN_TRIANGLEINDEX
460 #include "TriangleMesh_IntersectRay_SSE.inl"
461
462 #define MATH_GEN_SSE41
463 #define MATH_GEN_TRIANGLEINDEX
464 #define MATH_GEN_UV
465 #include "TriangleMesh_IntersectRay_SSE.inl"
466 #endif
467
468 #ifdef MATH_AVX
469 #define MATH_GEN_AVX
470 #include "TriangleMesh_IntersectRay_AVX.inl"
471
472 #define MATH_GEN_AVX
473 #define MATH_GEN_TRIANGLEINDEX
474 #include "TriangleMesh_IntersectRay_AVX.inl"
475
476 #define MATH_GEN_AVX
477 #define MATH_GEN_TRIANGLEINDEX
478 #define MATH_GEN_UV
479 #include "TriangleMesh_IntersectRay_AVX.inl"
480 #endif

Go back to previous page