float4x4.cpp

1 /* Copyright Jukka Jylï¿½nki
2
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7        http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License. */
14
15 /** @file float4x4.cpp
16         @author Jukka Jylï¿½nki
17         @brief */
18 #include "float4x4.h"
19 #include <string.h>
20
21 #include "MathFunc.h"
22 #include "float3.h"
23 #include "float4.h"
24 #include "float3x3.h"
25 #include "float3x4.h"
26 #include "Matrix.inl"
27 #include "Quat.h"
28 #include "TransformOps.h"
29 #include "../Geometry/Plane.h"
30 #include "../Algorithm/Random/LCG.h"
31 #include "SSEMath.h"
32 #include "float4x4_sse.h"
33 #include "float4x4_neon.h"
34 #include "quat_simd.h"
35
36 #ifdef MATH_ENABLE_STL_SUPPORT
37 #include <iostream>
38 #endif
39
40 MATH_BEGIN_NAMESPACE
41
42 float4x4::float4x4(float _00, float _01, float _02, float _03,
43                    float _10, float _11, float _12, float _13,
44                    float _20, float _21, float _22, float _23,
45                    float _30, float _31, float _32, float _33)
46 {
47         Set(_00, _01, _02, _03,
48                 _10, _11, _12, _13,
49                 _20, _21, _22, _23,
50                 _30, _31, _32, _33);
51 }
52
53 float4x4::float4x4(const float3x3 &m)
54 {
55         Set(m.At(0,0), m.At(0,1), m.At(0,2), 0.f,
56                 m.At(1,0), m.At(1,1), m.At(1,2), 0.f,
57                 m.At(2,0), m.At(2,1), m.At(2,2), 0.f,
58                       0.f,       0.f,       0.f, 1.f);
59 }
60
61 float4x4::float4x4(const float3x4 &m)
62 {
63 #ifdef MATH_AUTOMATIC_SSE
64         row[0] = m.row[0];
65         row[1] = m.row[1];
66         row[2] = m.row[2];
67         row[3] = set_ps(1.f, 0.f, 0.f, 0.f);
68 #else
69         Set(m.At(0,0), m.At(0,1), m.At(0,2), m.At(0,3),
70                 m.At(1,0), m.At(1,1), m.At(1,2), m.At(1,3),
71                 m.At(2,0), m.At(2,1), m.At(2,2), m.At(2,3),
72                       0.f,       0.f,       0.f,     1.f);
73 #endif
74 }
75
76 float4x4::float4x4(const float4 &col0, const float4 &col1, const float4 &col2, const float4 &col3)
77 {
78 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
79         __m128 tmp0 = _mm_unpacklo_ps(col0.v, col1.v);
80         __m128 tmp2 = _mm_unpacklo_ps(col2.v, col3.v);
81         __m128 tmp1 = _mm_unpackhi_ps(col0.v, col1.v);
82         __m128 tmp3 = _mm_unpackhi_ps(col2.v, col3.v);
83         row[0] = _mm_movelh_ps(tmp0, tmp2);
84         row[1] = _mm_movehl_ps(tmp2, tmp0);
85         row[2] = _mm_movelh_ps(tmp1, tmp3);
86         row[3] = _mm_movehl_ps(tmp3, tmp1);
87 #else
88         SetCol(0, col0);
89         SetCol(1, col1);
90         SetCol(2, col2);
91         SetCol(3, col3);
92 #endif
93 }
94
95 float4x4::float4x4(const Quat &orientation)
96 {
97 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
98         quat_to_mat4x4(orientation.q, _mm_set_ps(1, 0, 0, 0), row);
99 #else
100         SetRotatePart(orientation);
101         SetRow(3, 0, 0, 0, 1);
102         SetCol3(3, 0, 0, 0);
103 #endif
104 }
105
106 float4x4::float4x4(const Quat &orientation, const float3 &translation)
107 {
108 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
109         quat_to_mat4x4(orientation.q, float4(translation, 1.f), row);
110 #else
111         SetRotatePart(orientation);
112         SetTranslatePart(translation);
113         SetRow(3, 0, 0, 0, 1);
114 #endif
115 }
116
117 TranslateOp float4x4::Translate(float tx, float ty, float tz)
118 {
119         return TranslateOp(tx, ty, tz);
120 }
121
122 TranslateOp float4x4::Translate(const float3 &offset)
123 {
124         return TranslateOp(offset);
125 }
126
127 float4x4 float4x4::RotateX(float angleRadians)
128 {
129         float4x4 r;
130         r.SetRotatePartX(angleRadians);
131         r.SetRow(3, 0, 0, 0, 1);
132         r.SetCol3(3, 0, 0, 0);
133         return r;
134 }
135
136 float4x4 float4x4::RotateX(float angleRadians, const float3 &pointOnAxis)
137 {
138         return float4x4::Translate(pointOnAxis) * RotateX(angleRadians) * float4x4::Translate(-pointOnAxis);
139 }
140
141 float4x4 float4x4::RotateY(float angleRadians)
142 {
143         float4x4 r;
144         r.SetRotatePartY(angleRadians);
145         r.SetRow(3, 0, 0, 0, 1);
146         r.SetCol3(3, 0, 0, 0);
147         return r;
148 }
149
150 float4x4 float4x4::RotateY(float angleRadians, const float3 &pointOnAxis)
151 {
152         return float4x4::Translate(pointOnAxis) * RotateY(angleRadians) * float4x4::Translate(-pointOnAxis);
153 }
154
155 float4x4 float4x4::RotateZ(float angleRadians)
156 {
157         float4x4 r;
158         r.SetRotatePartZ(angleRadians);
159         r.SetRow(3, 0, 0, 0, 1);
160         r.SetCol3(3, 0, 0, 0);
161         return r;
162 }
163
164 float4x4 float4x4::RotateZ(float angleRadians, const float3 &pointOnAxis)
165 {
166         return float4x4::Translate(pointOnAxis) * RotateZ(angleRadians) * float4x4::Translate(-pointOnAxis);
167 }
168
169 float4x4 float4x4::RotateAxisAngle(const float3 &axisDirection, float angleRadians)
170 {
171         float4x4 r;
172         r.SetRotatePart(axisDirection, angleRadians);
173         r.SetRow(3, 0, 0, 0, 1);
174         r.SetCol3(3, 0, 0, 0);
175         return r;
176 }
177
178 float4x4 float4x4::RotateAxisAngle(const float3 &axisDirection, float angleRadians, const float3 &pointOnAxis)
179 {
180         return float4x4::Translate(pointOnAxis) * RotateAxisAngle(axisDirection, angleRadians) * float4x4::Translate(-pointOnAxis);
181 }
182
183 float4x4 float4x4::RotateFromTo(const float3 &sourceDirection, const float3 &targetDirection)
184 {
185         return Quat::RotateFromTo(sourceDirection, targetDirection).ToFloat4x4();
186 }
187
188 float4x4 float4x4::RotateFromTo(const float3 &sourceDirection, const float3 &targetDirection, const float3 &centerPoint)
189 {
190         return float4x4::Translate(centerPoint) * RotateFromTo(sourceDirection, targetDirection) * float4x4::Translate(-centerPoint);
191 }
192
193 float4x4 float4x4::RotateFromTo(const float3 &sourceDirection, const float3 &targetDirection,
194         const float3 &sourceDirection2, const float3 &targetDirection2)
195 {
196         return LookAt(sourceDirection, targetDirection, sourceDirection2, targetDirection2);
197 }
198
199 float4x4 float4x4::RotateFromTo(const float3 &sourceDirection, const float3 &targetDirection,
200         const float3 &sourceDirection2, const float3 &targetDirection2, const float3 &centerPoint)
201 {
202         return float4x4::Translate(centerPoint) * RotateFromTo(sourceDirection, targetDirection, sourceDirection2, targetDirection2) * float4x4::Translate(-centerPoint);
203 }
204
205 float4x4 float4x4::RandomGeneral(LCG &lcg, float minElem, float maxElem)
206 {
207         assume(MATH_NS::IsFinite(minElem));
208         assume(MATH_NS::IsFinite(maxElem));
209         float4x4 m;
210         for(int y = 0; y < 4; ++y)
211                 for(int x = 0; x < 4; ++x)
212                         m[y][x] = lcg.Float(minElem, maxElem);
213         return m;
214 }
215
216 float4x4 float4x4::FromQuat(const Quat &orientation)
217 {
218         return float4x4(orientation);
219 }
220
221 float4x4 float4x4::FromQuat(const Quat &orientation, const float3 &pointOnAxis)
222 {
223         return float4x4::Translate(pointOnAxis) * float4x4(orientation) * float4x4::Translate(-pointOnAxis);
224 }
225
226 float4x4 float4x4::FromTRS(const float3 &translate, const Quat &rotate, const float3 &scale)
227 {
228         return float4x4::Translate(translate) * float4x4(rotate) * float4x4::Scale(scale);
229 }
230
231 float4x4 float4x4::FromTRS(const float3 &translate, const float3x3 &rotate, const float3 &scale)
232 {
233         return float4x4::Translate(translate) * float4x4(rotate) * float4x4::Scale(scale);
234 }
235
236 float4x4 float4x4::FromTRS(const float3 &translate, const float3x4 &rotate, const float3 &scale)
237 {
238         return float4x4::Translate(translate) * float4x4(rotate) * float4x4::Scale(scale);
239 }
240
241 float4x4 float4x4::FromTRS(const float3 &translate, const float4x4 &rotate, const float3 &scale)
242 {
243         return float4x4::Translate(translate) * rotate * float4x4::Scale(scale);
244 }
245
246 float4x4 float4x4::FromEulerXYX(float x2, float y, float x)
247 {
248         float4x4 r;
249         r.SetTranslatePart(0,0,0);
250         r.SetRow(3, 0,0,0,1);
251         Set3x3PartRotateEulerXYX(r, x2, y, x);
252         assume(r.Equals(float4x4::RotateX(x2) * float4x4::RotateY(y) * float4x4::RotateX(x)));
253         return r;
254 }
255
256 float4x4 float4x4::FromEulerXZX(float x2, float z, float x)
257 {
258         float4x4 r;
259         r.SetTranslatePart(0,0,0);
260         r.SetRow(3, 0,0,0,1);
261         Set3x3PartRotateEulerXZX(r, x2, z, x);
262         assume(r.Equals(float4x4::RotateX(x2) * float4x4::RotateZ(z) * float4x4::RotateX(x)));
263         return r;
264 }
265
266 float4x4 float4x4::FromEulerYXY(float y2, float x, float y)
267 {
268         float4x4 r;
269         r.SetTranslatePart(0,0,0);
270         r.SetRow(3, 0,0,0,1);
271         Set3x3PartRotateEulerYXY(r, y2, x, y);
272         assume(r.Equals(float4x4::RotateY(y2) * float4x4::RotateX(x) * float4x4::RotateY(y)));
273         return r;
274 }
275
276 float4x4 float4x4::FromEulerYZY(float y2, float z, float y)
277 {
278         float4x4 r;
279         r.SetTranslatePart(0,0,0);
280         r.SetRow(3, 0,0,0,1);
281         Set3x3PartRotateEulerYZY(r, y2, z, y);
282         assume(r.Equals(float4x4::RotateY(y2) * float4x4::RotateZ(z) * float4x4::RotateY(y)));
283         return r;
284 }
285
286 float4x4 float4x4::FromEulerZXZ(float z2, float x, float z)
287 {
288         float4x4 r;
289         r.SetTranslatePart(0,0,0);
290         r.SetRow(3, 0,0,0,1);
291         Set3x3PartRotateEulerZXZ(r, z2, x, z);
292         assume(r.Equals(float4x4::RotateZ(z2) * float4x4::RotateX(x) * float4x4::RotateZ(z)));
293         return r;
294 }
295
296 float4x4 float4x4::FromEulerZYZ(float z2, float y, float z)
297 {
298         float4x4 r;
299         r.SetTranslatePart(0,0,0);
300         r.SetRow(3, 0,0,0,1);
301         Set3x3PartRotateEulerZYZ(r, z2, y, z);
302         assume(r.Equals(float4x4::RotateZ(z2) * float4x4::RotateY(y) * float4x4::RotateZ(z)));
303         return r;
304 }
305
306 float4x4 float4x4::FromEulerXYZ(float x, float y, float z)
307 {
308         float4x4 r;
309         r.SetTranslatePart(0,0,0);
310         r.SetRow(3, 0,0,0,1);
311         Set3x3PartRotateEulerXYZ(r, x, y, z);
312         assume(r.Equals(float4x4::RotateX(x) * float4x4::RotateY(y) * float4x4::RotateZ(z)));
313         return r;
314 }
315
316 float4x4 float4x4::FromEulerXZY(float x, float z, float y)
317 {
318         float4x4 r;
319         r.SetTranslatePart(0,0,0);
320         r.SetRow(3, 0,0,0,1);
321         Set3x3PartRotateEulerXZY(r, x, z, y);
322         assume(r.Equals(float4x4::RotateX(x) * float4x4::RotateZ(z) * float4x4::RotateY(y)));
323         return r;
324 }
325
326 float4x4 float4x4::FromEulerYXZ(float y, float x, float z)
327 {
328         float4x4 r;
329         r.SetTranslatePart(0,0,0);
330         r.SetRow(3, 0,0,0,1);
331         Set3x3PartRotateEulerYXZ(r, y, x, z);
332         assume(r.Equals(float4x4::RotateY(y) * float4x4::RotateX(x) * float4x4::RotateZ(z)));
333         return r;
334 }
335
336 float4x4 float4x4::FromEulerYZX(float y, float z, float x)
337 {
338         float4x4 r;
339         r.SetTranslatePart(0,0,0);
340         r.SetRow(3, 0,0,0,1);
341         Set3x3PartRotateEulerYZX(r, y, z, x);
342         assume(r.Equals(float4x4::RotateY(y) * float4x4::RotateZ(z) * float4x4::RotateX(x)));
343         return r;
344 }
345
346 float4x4 float4x4::FromEulerZXY(float z, float x, float y)
347 {
348         float4x4 r;
349         r.SetTranslatePart(0,0,0);
350         r.SetRow(3, 0,0,0,1);
351         Set3x3PartRotateEulerZXY(r, z, x, y);
352         assume(r.Equals(float4x4::RotateZ(z) * float4x4::RotateX(x) * float4x4::RotateY(y)));
353         return r;
354 }
355
356 float4x4 float4x4::FromEulerZYX(float z, float y, float x)
357 {
358         float4x4 r;
359         r.SetTranslatePart(0,0,0);
360         r.SetRow(3, 0,0,0,1);
361         Set3x3PartRotateEulerZYX(r, z, y, x);
362         assume(r.Equals(float4x4::RotateZ(z) * float4x4::RotateY(y) * float4x4::RotateX(x)));
363         return r;
364 }
365
366 ScaleOp float4x4::Scale(float sx, float sy, float sz)
367 {
368         return ScaleOp(sx, sy, sz);
369 }
370
371 ScaleOp float4x4::Scale(const float3 &scale)
372 {
373         return ScaleOp(scale);
374 }
375
376 float4x4 float4x4::Scale(const float3 &scale, const float3 &scaleCenter)
377 {
378         return Translate(scaleCenter) * Scale(scale) * Translate(-scaleCenter);
379 }
380
381 float4x4 float4x4::ScaleAlongAxis(const float3 &axis, float scalingFactor)
382 {
383         return Scale(axis * scalingFactor);
384 }
385
386 float4x4 float4x4::ScaleAlongAxis(const float3 &axis, float scalingFactor, const float3 &scaleCenter)
387 {
388         return Translate(scaleCenter) * Scale(axis * scalingFactor) * Translate(-scaleCenter);
389 }
390
391 ScaleOp float4x4::UniformScale(float uniformScale)
392 {
393         return ScaleOp(uniformScale, uniformScale, uniformScale);
394 }
395
396 float4x4 float4x4::UniformScale(float uniformScale, const float3 &scaleCenter)
397 {
398         return float4x4::Translate(scaleCenter) * float4x4::UniformScale(uniformScale) * float4x4::Translate(-scaleCenter);
399 }
400
401 float3 float4x4::GetScale() const
402 {
403         return float3(Col3(0).Length(), Col3(1).Length(), Col3(2).Length());
404 }
405
406 float4x4 float4x4::ShearX(float yFactor, float zFactor)
407 {
408         return float4x4(1.f, yFactor, zFactor, 0.f,
409                                         0.f, 1.f, 0.f, 0.f,
410                                         0.f, 0.f, 1.f, 0.f,
411                                         0.f, 0.f, 0.f, 1.f);
412 }
413
414 float4x4 float4x4::ShearY(float xFactor, float zFactor)
415 {
416         return float4x4(1.f, 0.f, 0.f, 0.f,
417                                         xFactor, 1.f, zFactor, 0.f,
418                                         0.f, 0.f, 1.f, 0.f,
419                                         0.f, 0.f, 0.f, 1.f);
420 }
421
422 float4x4 float4x4::ShearZ(float xFactor, float yFactor)
423 {
424         return float4x4(1.f, 0.f, 0.f, 0.f,
425                                         0.f, 1.f, 0.f, 0.f,
426                                         xFactor, yFactor, 1.f, 0.f,
427                                         0.f, 0.f, 0.f, 1.f);
428 }
429
430 float4x4 float4x4::Mirror(const Plane &p)
431 {
432         float4x4 v;
433         SetMatrix3x4AffinePlaneMirror(v, p.normal.x, p.normal.y, p.normal.z, p.d);
434         v[3][0] = 0.f; v[3][1] = 0.f; v[3][2] = 0.f; v[3][3] = 1.f;
435         return v;
436 }
437
438 float4x4 float4x4::D3DOrthoProjLH(float n, float f, float h, float v)
439 {
440         float4x4 p;
441         p[0][0] = 2.f / h; p[0][1] = 0;       p[0][2] = 0;           p[0][3] = 0.f;
442         p[1][0] = 0;       p[1][1] = 2.f / v; p[1][2] = 0;           p[1][3] = 0.f;
443         p[2][0] = 0;       p[2][1] = 0;       p[2][2] = 1.f / (f-n); p[2][3] = n / (n-f);
444         p[3][0] = 0;       p[3][1] = 0;       p[3][2] = 0.f;         p[3][3] = 1.f;
445
446         return p;
447 }
448
449 /** This function generates an orthographic projection matrix that maps from
450         the Direct3D view space to the Direct3D normalized viewport space as follows:
451
452         In Direct3D view space, we assume that the camera is positioned at the origin (0,0,0).
453         The camera looks directly towards the positive Z axis (0,0,1).
454         The -X axis spans to the left of the screen, +X goes to the right.
455         -Y goes to the bottom of the screen, +Y goes to the top.
456
457         After the transformation, we're in the Direct3D normalized viewport space as follows:
458
459         (-1,-1,0) is the bottom-left corner of the viewport at the near plane.
460         (1,1,0) is the top-right corner of the viewport at the near plane.
461         (0,0,0) is the center point at the near plane.
462         Coordinates with z=1 are at the far plane.
463
464         Examples:
465                 (0,0,n) maps to (0,0,0).
466                 (0,0,f) maps to (0,0,1).
467                 (-h/2, -v/2, n) maps to (-1, -1, 0).
468                 (h/2, v/2, f) maps to (1, 1, 1).
469         */
470 float4x4 float4x4::D3DOrthoProjRH(float n, float f, float h, float v)
471 {
472         // D3DOrthoProjLH and D3DOrthoProjRH differ from each other in that the third column is negated.
473         // This corresponds to LH = RH * In, where In is a diagonal matrix with elements [1 1 -1 1].
474
475         float4x4 p;
476         p[0][0] = 2.f / h; p[0][1] = 0;       p[0][2] = 0;           p[0][3] = 0.f;
477         p[1][0] = 0;       p[1][1] = 2.f / v; p[1][2] = 0;           p[1][3] = 0.f;
478         p[2][0] = 0;       p[2][1] = 0;       p[2][2] = 1.f / (n-f); p[2][3] = n / (n-f);
479         p[3][0] = 0;       p[3][1] = 0;       p[3][2] = 0.f;         p[3][3] = 1.f;
480
481         return p;
482 }
483
484 float4x4 float4x4::D3DPerspProjLH(float n, float f, float h, float v)
485 {
486         float4x4 p;
487         p[0][0] = 2.f * n / h; p[0][1] = 0;           p[0][2] = 0;           p[0][3] = 0.f;
488         p[1][0] = 0;           p[1][1] = 2.f * n / v; p[1][2] = 0;           p[1][3] = 0.f;
489         p[2][0] = 0;           p[2][1] = 0;           p[2][2] = f / (f-n);   p[2][3] = n * f / (n-f);
490         p[3][0] = 0;           p[3][1] = 0;           p[3][2] = 1.f;         p[3][3] = 0.f;
491
492         return p;
493 }
494
495 float4x4 float4x4::D3DPerspProjRH(float n, float f, float h, float v)
496 {
497         // D3DPerspProjLH and D3DPerspProjRH differ from each other in that the third column is negated.
498         // This corresponds to LH = RH * In, where In is a diagonal matrix with elements [1 1 -1 1].
499
500         // In Direct3D, the post-perspective unit cube ranges in [-1, 1] in X and Y directions,
501         // and in [0, 1] for the Z direction. See http://msdn.microsoft.com/en-us/library/windows/desktop/bb147302(v=vs.85).aspx
502         float4x4 p;
503         p[0][0] = 2.f * n / h; p[0][1] = 0;           p[0][2] = 0;           p[0][3] = 0.f;
504         p[1][0] = 0;           p[1][1] = 2.f * n / v; p[1][2] = 0;           p[1][3] = 0.f;
505         p[2][0] = 0;           p[2][1] = 0;           p[2][2] = f / (n-f);   p[2][3] = n * f / (n-f);
506         p[3][0] = 0;           p[3][1] = 0;           p[3][2] = -1.f;        p[3][3] = 0.f;
507
508         return p;
509 }
510
511 float4x4 float4x4::OpenGLOrthoProjLH(float n, float f, float h, float v)
512 {
513         /// Same as OpenGLOrthoProjRH, except that the camera looks towards +Z in view space, instead of -Z.
514         float4x4 p;
515         p[0][0] = 2.f / h; p[0][1] = 0;       p[0][2] = 0;           p[0][3] = 0.f;
516         p[1][0] = 0;       p[1][1] = 2.f / v; p[1][2] = 0;           p[1][3] = 0.f;
517         p[2][0] = 0;       p[2][1] = 0;       p[2][2] = 2.f / (f-n); p[2][3] = (f+n) / (n-f);
518         p[3][0] = 0;       p[3][1] = 0;       p[3][2] = 0;           p[3][3] = 1.f;
519
520         return p;
521 }
522
523 float4x4 float4x4::OpenGLOrthoProjRH(float n, float f, float h, float v)
524 {
525         float4x4 p;
526         p[0][0] = 2.f / h; p[0][1] = 0;       p[0][2] = 0;           p[0][3] = 0.f;
527         p[1][0] = 0;       p[1][1] = 2.f / v; p[1][2] = 0;           p[1][3] = 0.f;
528         p[2][0] = 0;       p[2][1] = 0;       p[2][2] = 2.f / (n-f); p[2][3] = (f+n) / (n-f);
529         p[3][0] = 0;       p[3][1] = 0;       p[3][2] = 0;           p[3][3] = 1.f;
530
531         return p;
532 }
533
534 float4x4 float4x4::OpenGLPerspProjLH(float n, float f, float h, float v)
535 {
536         // Same as OpenGLPerspProjRH, except that the camera looks towards +Z in view space, instead of -Z.
537         float4x4 p;
538         p[0][0] = 2.f *n / h;  p[0][1] = 0;           p[0][2] = 0;              p[0][3] = 0.f;
539         p[1][0] = 0;           p[1][1] = 2.f * n / v; p[1][2] = 0;              p[1][3] = 0.f;
540         p[2][0] = 0;           p[2][1] = 0;           p[2][2] = (n+f) / (f-n);  p[2][3] = 2.f*n*f / (n-f);
541         p[3][0] = 0;           p[3][1] = 0;           p[3][2] = 1.f;            p[3][3] = 0.f;
542
543         return p;
544 }
545
546 float4x4 float4x4::OpenGLPerspProjRH(float n, float f, float h, float v)
547 {
548         // In OpenGL, the post-perspective unit cube ranges in [-1, 1] in all X, Y and Z directions.
549         // See http://www.songho.ca/opengl/gl_projectionmatrix.html , unlike in Direct3D, where the
550         // Z coordinate ranges in [0, 1]. This is the only difference between D3DPerspProjRH and OpenGLPerspProjRH.
551         float4x4 p;
552         p[0][0] = 2.f *n / h;  p[0][1] = 0;           p[0][2] = 0;              p[0][3] = 0.f;
553         p[1][0] = 0;           p[1][1] = 2.f * n / v; p[1][2] = 0;              p[1][3] = 0.f;
554         p[2][0] = 0;           p[2][1] = 0;           p[2][2] = (n+f) / (n-f);  p[2][3] = 2.f*n*f / (n-f);
555         p[3][0] = 0;           p[3][1] = 0;           p[3][2] = -1.f;           p[3][3] = 0.f;
556
557         return p;
558 }
559
560 float4x4 float4x4::OrthographicProjection(const Plane &p)
561 {
562         float4x4 v;
563         SetMatrix3x4AffinePlaneProject(v, p.normal.x, p.normal.y, p.normal.z, p.d);
564         return v;
565 }
566
567 float4x4 float4x4::OrthographicProjectionYZ()
568 {
569         float4x4 v = identity;
570         v[0][0] = 0.f;
571         return v;
572 }
573
574 float4x4 float4x4::OrthographicProjectionXZ()
575 {
576         float4x4 v = identity;
577         v[1][1] = 0.f;
578         return v;
579 }
580
581 float4x4 float4x4::OrthographicProjectionXY()
582 {
583         float4x4 v = identity;
584         v[2][2] = 0.f;
585         return v;
586 }
587
588 float4x4 float4x4::ComplementaryProjection() const
589 {
590         assume(IsIdempotent());
591
592         return float4x4::identity - *this;
593 }
594
595 MatrixProxy<float4x4::Cols> &float4x4::operator[](int row)
596 {
597         assume(row >= 0);
598         assume(row < Rows);
599 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
600         if (row < 0 || row >= Rows)
601                 row = 0; // Benign failure, just give the first row.
602 #endif
603         return *(reinterpret_cast<MatrixProxy<Cols>*>(v[row]));
604 }
605
606 const MatrixProxy<float4x4::Cols> &float4x4::operator[](int row) const
607 {
608         assume(row >= 0);
609         assume(row < Rows);
610 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
611         if (row < 0 || row >= Rows)
612                 row = 0; // Benign failure, just give the first row.
613 #endif
614         return *(reinterpret_cast<const MatrixProxy<Cols>*>(v[row]));
615 }
616
617 float &float4x4::At(int row, int col)
618 {
619         assume(row >= 0);
620         assume(row < Rows);
621         assume(col >= 0);
622         assume(col < Cols);
623 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
624         if (row < 0 || row >= Rows || col < 0 || col >= Cols)
625                 return v[0][0]; // Benign failure, return the first element.
626 #endif
627         return v[row][col];
628 }
629
630 CONST_WIN32 float float4x4::At(int row, int col) const
631 {
632         assume(row >= 0);
633         assume(row < Rows);
634         assume(col >= 0);
635         assume(col < Cols);
636 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
637         if (row < 0 || row >= Rows || col < 0 || col >= Cols)
638                 return FLOAT_NAN;
639 #endif
640         return v[row][col];
641 }
642
643 float4 &float4x4::Row(int row)
644 {
645         assume(row >= 0);
646         assume(row < Rows);
647
648 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
649         if (row < 0 || row >= Rows)
650                 row = 0; // Benign failure, just give the first row.
651 #endif
652
653         return reinterpret_cast<float4 &>(v[row]);
654 }
655
656 const float4 &float4x4::Row(int row) const
657 {
658         assume(row >= 0);
659         assume(row < Rows);
660
661 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
662         if (row < 0 || row >= Rows)
663                 row = 0; // Benign failure, just give the first row.
664 #endif
665         return reinterpret_cast<const float4 &>(v[row]);
666 }
667
668 float3 &float4x4::Row3(int row)
669 {
670         assume(row >= 0);
671         assume(row < Rows);
672
673 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
674         if (row < 0 || row >= Rows)
675                 row = 0; // Benign failure, just give the first row.
676 #endif
677         return reinterpret_cast<float3 &>(v[row]);
678 }
679
680 const float3 &float4x4::Row3(int row) const
681 {
682         assume(row >= 0);
683         assume(row < Rows);
684
685 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
686         if (row < 0 || row >= Rows)
687                 row = 0; // Benign failure, just give the first row.
688 #endif
689         return reinterpret_cast<const float3 &>(v[row]);
690 }
691
692 CONST_WIN32 float4 float4x4::Col(int col) const
693 {
694         assume(col >= 0);
695         assume(col < Cols);
696 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
697         if (col < 0 || col >= Cols)
698                 return float4::nan;
699 #endif
700         return float4(v[0][col], v[1][col], v[2][col], v[3][col]);
701 }
702
703 CONST_WIN32 float3 float4x4::Col3(int col) const
704 {
705         assume(col >= 0);
706         assume(col < Cols);
707 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
708         if (col < 0 || col >= Cols)
709                 return float3::nan;
710 #endif
711         return float3(v[0][col], v[1][col], v[2][col]);
712 }
713
714 CONST_WIN32 float4 float4x4::Diagonal() const
715 {
716         return float4(v[0][0], v[1][1], v[2][2], v[3][3]);
717 }
718
719 CONST_WIN32 float3 float4x4::Diagonal3() const
720 {
721         return float3(v[0][0], v[1][1], v[2][2]);
722 }
723
724 void float4x4::ScaleRow3(int row, float scalar)
725 {
726         assume(MATH_NS::IsFinite(scalar));
727         Row3(row) *= scalar;
728 }
729
730 void float4x4::ScaleRow(int row, float scalar)
731 {
732         assume(MATH_NS::IsFinite(scalar));
733         Row(row) *= scalar;
734 }
735
736 void float4x4::ScaleCol3(int col, float scalar)
737 {
738         assume(MATH_NS::IsFinite(scalar));
739         assume(col >= 0);
740         assume(col < Cols);
741 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
742         if (col < 0 || col >= Cols)
743                 return; // Benign failure
744 #endif
745         v[0][col] *= scalar;
746         v[1][col] *= scalar;
747         v[2][col] *= scalar;
748 }
749
750 void float4x4::ScaleCol(int col, float scalar)
751 {
752         assume(MATH_NS::IsFinite(scalar));
753         assume(col >= 0);
754         assume(col < Cols);
755 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
756         if (col < 0 || col >= Cols)
757                 return; // Benign failure
758 #endif
759         v[0][col] *= scalar;
760         v[1][col] *= scalar;
761         v[2][col] *= scalar;
762         v[3][col] *= scalar;
763 }
764
765 CONST_WIN32 float3x3 float4x4::Float3x3Part() const
766 {
767         return float3x3(v[0][0], v[0][1], v[0][2],
768                                         v[1][0], v[1][1], v[1][2],
769                                         v[2][0], v[2][1], v[2][2]);
770 }
771
772 float3x4 &float4x4::Float3x4Part()
773 {
774         return reinterpret_cast<float3x4 &>(*this);
775 }
776
777 const float3x4 &float4x4::Float3x4Part() const
778 {
779         return reinterpret_cast<const float3x4 &>(*this);
780 }
781
782 CONST_WIN32 float3 float4x4::TranslatePart() const
783 {
784         return Col3(3);
785 }
786
787 CONST_WIN32 float3x3 float4x4::RotatePart() const
788 {
789         return Float3x3Part();
790 }
791
792 float3 float4x4::WorldX() const
793 {
794         return Col3(0);
795 }
796
797 float3 float4x4::WorldY() const
798 {
799         return Col3(1);
800 }
801
802 float3 float4x4::WorldZ() const
803 {
804         return Col3(2);
805 }
806
807 void float4x4::SetRow3(int row, const float3 &rowVector)
808 {
809         SetRow3(row, rowVector.x, rowVector.y, rowVector.z);
810 }
811
812 void float4x4::SetRow3(int row, const float *data)
813 {
814         assume(data);
815 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
816         if (!data)
817                 return;
818 #endif
819         SetRow3(row, data[0], data[1], data[2]);
820 }
821
822 void float4x4::SetRow3(int row, float m_r0, float m_r1, float m_r2)
823 {
824         assume(row >= 0);
825         assume(row < Rows);
826         assume(MATH_NS::IsFinite(m_r0));
827         assume(MATH_NS::IsFinite(m_r1));
828         assume(MATH_NS::IsFinite(m_r2));
829 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
830         if (row < 0 || row >= Rows)
831                 return; // Benign failure
832 #endif
833         v[row][0] = m_r0;
834         v[row][1] = m_r1;
835         v[row][2] = m_r2;
836 }
837
838 void float4x4::SetRow(int row, const float3 &rowVector, float m_r3)
839 {
840         SetRow(row, rowVector.x, rowVector.y, rowVector.z, m_r3);
841 }
842
843 void float4x4::SetRow(int row, const float4 &rowVector)
844 {
845         SetRow(row, rowVector.x, rowVector.y, rowVector.z, rowVector.w);
846 }
847
848 void float4x4::SetRow(int row, const float *data)
849 {
850         assume(data);
851 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
852         if (!data)
853                 return;
854 #endif
855         SetRow(row, data[0], data[1], data[2], data[3]);
856 }
857
858 void float4x4::SetRow(int row, float m_r0, float m_r1, float m_r2, float m_r3)
859 {
860         assume(row >= 0);
861         assume(row < Rows);
862         assume(MATH_NS::IsFinite(m_r0));
863         assume(MATH_NS::IsFinite(m_r1));
864         assume(MATH_NS::IsFinite(m_r2));
865         assume(MATH_NS::IsFinite(m_r3));
866 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
867         if (row < 0 || row >= Rows)
868                 return; // Benign failure
869 #endif
870
871 // Require VS2012 for the following line - VS2010 fails at internal compiler error, see
872 // http://clb.demon.fi:8113/builders/vs2010-MathGeoLib-32bit-SSE4.1/builds/379/steps/Compile%20MathGeoLib-32bit-Release/logs/stdio
873 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE) && _MSC_VER >= 1700
874         this->row[row] = set_ps(m_r3, m_r2, m_r1, m_r0);
875 #else
876         v[row][0] = m_r0;
877         v[row][1] = m_r1;
878         v[row][2] = m_r2;
879         v[row][3] = m_r3;
880 #endif
881 }
882
883 void float4x4::SetCol3(int column, const float3 &columnVector)
884 {
885         SetCol3(column, columnVector.x, columnVector.y, columnVector.z);
886 }
887
888 void float4x4::SetCol3(int column, const float *data)
889 {
890         assume(data);
891 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
892         if (!data)
893                 return;
894 #endif
895         SetCol3(column, data[0], data[1], data[2]);
896 }
897
898 void float4x4::SetCol3(int column, float m_0c, float m_1c, float m_2c)
899 {
900         assume(column >= 0);
901         assume(column < Cols);
902         assume(MATH_NS::IsFinite(m_0c));
903         assume(MATH_NS::IsFinite(m_1c));
904         assume(MATH_NS::IsFinite(m_2c));
905 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
906         if (column < 0 || column >= Cols)
907                 return; // Benign failure
908 #endif
909         v[0][column] = m_0c;
910         v[1][column] = m_1c;
911         v[2][column] = m_2c;
912 }
913
914 void float4x4::SetCol(int column, const float3 &columnVector, float m_3c)
915 {
916         SetCol(column, columnVector.x, columnVector.y, columnVector.z, m_3c);
917 }
918
919 void float4x4::SetCol(int column, const float4 &columnVector)
920 {
921         SetCol(column, columnVector.x, columnVector.y, columnVector.z, columnVector.w);
922 }
923
924 void float4x4::SetCol(int column, const float *data)
925 {
926         assume(data);
927 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
928         if (!data)
929                 return;
930 #endif
931         SetCol(column, data[0], data[1], data[2], data[3]);
932 }
933
934 void float4x4::SetCol(int column, float m_0c, float m_1c, float m_2c, float m_3c)
935 {
936         assume(column >= 0);
937         assume(column < Cols);
938         assume(MATH_NS::IsFinite(m_0c));
939         assume(MATH_NS::IsFinite(m_1c));
940         assume(MATH_NS::IsFinite(m_2c));
941         assume(MATH_NS::IsFinite(m_3c));
942 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
943         if (column < 0 || column >= Cols)
944                 return; // Benign failure
945 #endif
946         v[0][column] = m_0c;
947         v[1][column] = m_1c;
948         v[2][column] = m_2c;
949         v[3][column] = m_3c;
950 }
951
952 void float4x4::Set(float _00, float _01, float _02, float _03,
953                                    float _10, float _11, float _12, float _13,
954                                    float _20, float _21, float _22, float _23,
955                                    float _30, float _31, float _32, float _33)
956 {
957 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
958         mat4x4_set(row, _00, _01, _02, _03,
959                         _10, _11, _12, _13,
960                         _20, _21, _22, _23,
961                         _30, _31, _32, _33);
962 #else
963         v[0][0] = _00; v[0][1] = _01; v[0][2] = _02; v[0][3] = _03;
964         v[1][0] = _10; v[1][1] = _11; v[1][2] = _12; v[1][3] = _13;
965         v[2][0] = _20; v[2][1] = _21; v[2][2] = _22; v[2][3] = _23;
966         v[3][0] = _30; v[3][1] = _31; v[3][2] = _32; v[3][3] = _33;
967 #endif
968 }
969
970 void float4x4::Set(const float4x4 &rhs)
971 {
972 #ifdef MATH_AVX
973         row2[0] = rhs.row2[0];
974         row2[1] = rhs.row2[1];
975 #elif defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
976         row[0] = rhs.row[0];
977         row[1] = rhs.row[1];
978         row[2] = rhs.row[2];
979         row[3] = rhs.row[3];
980 #else
981         Set(rhs.ptr());
982 #endif
983 }
984
985 void float4x4::Set(const float *v)
986 {
987         assume(v);
988 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
989         if (!v)
990                 return;
991 #endif
992         Set( v[0],  v[1],  v[2],  v[3],
993              v[4],  v[5],  v[6],  v[7],
994              v[8],  v[9], v[10], v[11],
995             v[12], v[13], v[14], v[15]);
996 }
997
998 void float4x4::Set(int row, int col, float value)
999 {
1000         assume(row >= 0);
1001         assume(row < Rows);
1002         assume(col >= 0);
1003         assume(col < Cols);
1004 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
1005         if (row < 0 || row >= Rows || col < 0 || col >= Cols)
1006                 return; // Benign failure
1007 #endif
1008         v[row][col] = value;
1009 }
1010
1011 void float4x4::SetIdentity()
1012 {
1013         Set(1,0,0,0,
1014                 0,1,0,0,
1015                 0,0,1,0,
1016                 0,0,0,1);
1017 }
1018
1019 void float4x4::Set3x3Part(const float3x3 &r)
1020 {
1021         assume(r.IsFinite());
1022         v[0][0] = r[0][0]; v[0][1] = r[0][1]; v[0][2] = r[0][2];
1023         v[1][0] = r[1][0]; v[1][1] = r[1][1]; v[1][2] = r[1][2];
1024         v[2][0] = r[2][0]; v[2][1] = r[2][1]; v[2][2] = r[2][2];
1025 }
1026
1027 void float4x4::Set3x4Part(const float3x4 &r)
1028 {
1029         assume(r.IsFinite());
1030
1031 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
1032         row[0] = r.row[0];
1033         row[1] = r.row[1];
1034         row[2] = r.row[2];
1035 #else
1036         v[0][0] = r[0][0]; v[0][1] = r[0][1]; v[0][2] = r[0][2]; v[0][3] = r[0][3];
1037         v[1][0] = r[1][0]; v[1][1] = r[1][1]; v[1][2] = r[1][2]; v[1][3] = r[1][3];
1038         v[2][0] = r[2][0]; v[2][1] = r[2][1]; v[2][2] = r[2][2]; v[2][3] = r[2][3];
1039 #endif
1040 }
1041
1042 void float4x4::SwapColumns(int col1, int col2)
1043 {
1044         assume(col1 >= 0);
1045         assume(col1 < Cols);
1046         assume(col2 >= 0);
1047         assume(col2 < Cols);
1048 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
1049         if (col1 < 0 || col1 >= Cols || col2 < 0 || col2 >= Cols)
1050                 return; // Benign failure
1051 #endif
1052         Swap(v[0][col1], v[0][col2]);
1053         Swap(v[1][col1], v[1][col2]);
1054         Swap(v[2][col1], v[2][col2]);
1055         Swap(v[3][col1], v[3][col2]);
1056 }
1057
1058 void float4x4::SwapColumns3(int col1, int col2)
1059 {
1060         assume(col1 >= 0);
1061         assume(col1 < Cols);
1062         assume(col2 >= 0);
1063         assume(col2 < Cols);
1064 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
1065         if (col1 < 0 || col1 >= Cols || col2 < 0 || col2 >= Cols)
1066                 return; // Benign failure
1067 #endif
1068         Swap(v[0][col1], v[0][col2]);
1069         Swap(v[1][col1], v[1][col2]);
1070         Swap(v[2][col1], v[2][col2]);
1071 }
1072
1073 void float4x4::SwapRows(int row1, int row2)
1074 {
1075         assume(row1 >= 0);
1076         assume(row1 < Rows);
1077         assume(row2 >= 0);
1078         assume(row2 < Rows);
1079 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
1080         if (row1 < 0 || row1 >= Rows || row2 < 0 || row2 >= Rows)
1081                 return; // Benign failure
1082 #endif
1083
1084 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
1085         Swap(row[row1], row[row2]);
1086 #else
1087         Swap(v[row1][0], v[row2][0]);
1088         Swap(v[row1][1], v[row2][1]);
1089         Swap(v[row1][2], v[row2][2]);
1090         Swap(v[row1][3], v[row2][3]);
1091 #endif
1092 }
1093
1094 void float4x4::SwapRows3(int row1, int row2)
1095 {
1096         assume(row1 >= 0);
1097         assume(row1 < Rows);
1098         assume(row2 >= 0);
1099         assume(row2 < Rows);
1100 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
1101         if (row1 < 0 || row1 >= Rows || row2 < 0 || row2 >= Rows)
1102                 return; // Benign failure
1103 #endif
1104         Swap(v[row1][0], v[row2][0]);
1105         Swap(v[row1][1], v[row2][1]);
1106         Swap(v[row1][2], v[row2][2]);
1107 }
1108
1109 void float4x4::SetTranslatePart(float tx, float ty, float tz)
1110 {
1111         v[0][3] = tx;
1112         v[1][3] = ty;
1113         v[2][3] = tz;
1114 }
1115
1116 void float4x4::SetTranslatePart(const float3 &offset)
1117 {
1118         v[0][3] = offset.x;
1119         v[1][3] = offset.y;
1120         v[2][3] = offset.z;
1121 }
1122
1123 void float4x4::SetTranslatePart(const float4 &offset)
1124 {
1125         v[0][3] = offset.x;
1126         v[1][3] = offset.y;
1127         v[2][3] = offset.z;
1128         assume(EqualAbs(offset.w, 1.f) || EqualAbs(offset.w, 0.f));
1129 }
1130
1131 void float4x4::SetRotatePartX(float angle)
1132 {
1133         Set3x3PartRotateX(*this, angle);
1134 }
1135
1136 void float4x4::SetRotatePartY(float angle)
1137 {
1138         Set3x3PartRotateY(*this, angle);
1139 }
1140
1141 void float4x4::SetRotatePartZ(float angle)
1142 {
1143         Set3x3PartRotateZ(*this, angle);
1144 }
1145
1146 void float4x4::SetRotatePart(const float3 &a, float angle)
1147 {
1148         SetRotationAxis3x3(*this, a, angle);
1149 }
1150
1151 void float4x4::SetRotatePart(const Quat &q)
1152 {
1153         SetMatrixRotatePart(*this, q);
1154 }
1155
1156 float4x4 float4x4::LookAt(const float3 &localForward, const float3 &targetDirection, const float3 &localUp, const float3 &worldUp)
1157 {
1158         float4x4 m;
1159         m.SetRotatePart(float3x3::LookAt(localForward, targetDirection, localUp, worldUp));
1160         m.SetTranslatePart(0,0,0);
1161         m.SetRow(3, 0,0,0,1);
1162         return m;
1163 }
1164
1165 float4x4 float4x4::LookAt(const float3 &eyePos, const float3 &targetPos, const float3 &localForward,
1166                           const float3 &localUp, const float3 &worldUp)
1167 {
1168         float4x4 m;
1169         m.SetRotatePart(float3x3::LookAt(localForward, (targetPos-eyePos).Normalized(), localUp, worldUp));
1170         m.SetTranslatePart(eyePos);
1171         m.SetRow(3, 0,0,0,1);
1172         return m;
1173 }
1174
1175 float4x4 &float4x4::operator =(const float3x3 &rhs)
1176 {
1177         SetRotatePart(rhs);
1178         SetTranslatePart(0,0,0);
1179         SetRow(3, 0,0,0,1);
1180         return *this;
1181 }
1182
1183 float4x4 &float4x4::operator =(const float3x4 &rhs)
1184 {
1185 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
1186         row[0] = rhs.row[0];
1187         row[1] = rhs.row[1];
1188         row[2] = rhs.row[2];
1189         row[3] = set_ps(1.f, 0.f, 0.f, 0.f);
1190 #else
1191         Float3x4Part() = rhs;
1192         v[3][0] = 0.f;
1193         v[3][1] = 0.f;
1194         v[3][2] = 0.f;
1195         v[3][3] = 1.f;
1196 #endif
1197         return *this;
1198 }
1199
1200 float4x4 &float4x4::operator =(const float4x4 &rhs)
1201 {
1202         // We deliberately don't want to assume rhs is finite, it is ok
1203         // to copy around uninitialized matrices.
1204         // But note that when assigning through a conversion above (float3x3 -> float4x4 or float3x4 -> float4x4),
1205         // we do assume the input matrix is finite.
1206 //      assume(rhs.IsFinite());
1207
1208 /* // AVX path determined to be one clock cycle slower than SSE path: (6 clock cycles on AVX, 5 on SSE)
1209 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_AVX)
1210         assert(IS32ALIGNED(this));
1211         assert(IS32ALIGNED(&rhs));
1212         row2[0] = rhs.row2[0];
1213         row2[1] = rhs.row2[1];
1214 #elif defined(MATH_SSE) */
1215
1216 #if defined(MATH_AUTOMATIC_SSE)
1217
1218 #if !defined(ANDROID) // Android NEON doesn't currently use aligned loads.
1219         assert(IS16ALIGNED(this));
1220         assert(IS16ALIGNED(&rhs));
1221 #endif
1222         row[0] = rhs.row[0];
1223         row[1] = rhs.row[1];
1224         row[2] = rhs.row[2];
1225         row[3] = rhs.row[3];
1226 #else
1227         v[0][0] = rhs.v[0][0];
1228         v[0][1] = rhs.v[0][1];
1229         v[0][2] = rhs.v[0][2];
1230         v[0][3] = rhs.v[0][3];
1231
1232         v[1][0] = rhs.v[1][0];
1233         v[1][1] = rhs.v[1][1];
1234         v[1][2] = rhs.v[1][2];
1235         v[1][3] = rhs.v[1][3];
1236
1237         v[2][0] = rhs.v[2][0];
1238         v[2][1] = rhs.v[2][1];
1239         v[2][2] = rhs.v[2][2];
1240         v[2][3] = rhs.v[2][3];
1241
1242         v[3][0] = rhs.v[3][0];
1243         v[3][1] = rhs.v[3][1];
1244         v[3][2] = rhs.v[3][2];
1245         v[3][3] = rhs.v[3][3];
1246 #endif
1247         return *this;
1248 }
1249
1250 float4x4 &float4x4::operator =(const Quat &rhs)
1251 {
1252         *this = rhs.ToFloat4x4();
1253         return *this;
1254 }
1255
1256 float4x4 &float4x4::operator =(const TranslateOp &rhs)
1257 {
1258         Set(1.f,   0,   0, rhs.offset.x,
1259               0, 1.f,   0, rhs.offset.y,
1260               0,   0, 1.f, rhs.offset.z,
1261               0,   0,   0,   1.f);
1262
1263         return *this;
1264 }
1265
1266 float float4x4::Determinant3() const
1267 {
1268 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
1269         return mat3x4_determinant(row);
1270 #else
1271         assume(Float3x3Part().IsFinite());
1272         const float a = v[0][0];
1273         const float b = v[0][1];
1274         const float c = v[0][2];
1275         const float d = v[1][0];
1276         const float e = v[1][1];
1277         const float f = v[1][2];
1278         const float g = v[2][0];
1279         const float h = v[2][1];
1280         const float i = v[2][2];
1281
1282         return a*e*i + b*f*g + c*d*h - a*f*h - b*d*i - c*e*g;
1283 #endif
1284 }
1285
1286 float float4x4::Determinant4() const
1287 {
1288 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
1289         return mat4x4_determinant(row);
1290 #else
1291         assume(IsFinite());
1292         return v[0][0] * Minor(0,0) - v[0][1] * Minor(0,1) + v[0][2] * Minor(0,2) - v[0][3] * Minor(0,3);
1293 #endif
1294 }
1295
1296 #define SKIPNUM(val, skip) (val >= skip ? (val+1) : val)
1297
1298 float3x3 float4x4::SubMatrix(int i, int j) const
1299 {
1300         int r0 = SKIPNUM(0, i);
1301         int r1 = SKIPNUM(1, i);
1302         int r2 = SKIPNUM(2, i);
1303         int c0 = SKIPNUM(0, j);
1304         int c1 = SKIPNUM(1, j);
1305         int c2 = SKIPNUM(2, j);
1306
1307         return float3x3(v[r0][c0], v[r0][c1], v[r0][c2],
1308                                         v[r1][c0], v[r1][c1], v[r1][c2],
1309                                         v[r2][c0], v[r2][c1], v[r2][c2]);
1310 }
1311
1312 float float4x4::Minor(int i, int j) const
1313 {
1314         int r0 = SKIPNUM(0, i);
1315         int r1 = SKIPNUM(1, i);
1316         int r2 = SKIPNUM(2, i);
1317         int c0 = SKIPNUM(0, j);
1318         int c1 = SKIPNUM(1, j);
1319         int c2 = SKIPNUM(2, j);
1320
1321         float a = v[r0][c0];
1322         float b = v[r0][c1];
1323         float c = v[r0][c2];
1324         float d = v[r1][c0];
1325         float e = v[r1][c1];
1326         float f = v[r1][c2];
1327         float g = v[r2][c0];
1328         float h = v[r2][c1];
1329         float k = v[r2][c2];
1330
1331         return a*e*k + b*f*g + c*d*h - a*f*h - b*d*k - c*e*g;
1332 }
1333
1334 float4x4 float4x4::Adjugate() const
1335 {
1336         float4x4 a;
1337         for(int y = 0; y < Rows; ++y)
1338                 for(int x = 0; x < Cols; ++x)
1339                         a[y][x] = (((x+y) & 1) != 0) ? -Minor(y, x) : Minor(y, x);
1340
1341         return a;
1342 }
1343
1344 bool float4x4::CholeskyDecompose(float4x4 &outL) const
1345 {
1346         return CholeskyDecomposeMatrix(*this, outL);
1347 }
1348
1349 bool float4x4::LUDecompose(float4x4 &outLower, float4x4 &outUpper) const
1350 {
1351         return LUDecomposeMatrix(*this, outLower, outUpper);
1352 }
1353
1354 bool float4x4::Inverse(float epsilon)
1355 {
1356 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
1357         MARK_UNUSED(epsilon);
1358         float det = mat4x4_inverse(row, row);
1359         return MATH_NS::Abs(det) > 1e-5f;
1360 #else
1361         return InverseMatrix(*this, epsilon);
1362 #endif
1363 }
1364
1365 float4x4 float4x4::Inverted() const
1366 {
1367 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
1368         float4x4 copy;
1369         mat4x4_inverse(row, copy.row);
1370 #else
1371         float4x4 copy = *this;
1372         copy.Inverse();
1373 #endif
1374         return copy;
1375 }
1376
1377 bool float4x4::InverseColOrthogonal()
1378 {
1379         ///\todo SSE
1380         assume(!ContainsProjection());
1381         return Float3x4Part().InverseColOrthogonal();
1382 }
1383
1384 bool float4x4::InverseOrthogonalUniformScale()
1385 {
1386         ///\todo SSE
1387         assume(!ContainsProjection());
1388         return Float3x4Part().InverseOrthogonalUniformScale();
1389 }
1390
1391 void float4x4::InverseOrthonormal()
1392 {
1393         assume(!ContainsProjection());
1394 #ifdef MATH_SSE
1395         mat3x4_inverse_orthonormal(row, row);
1396 #else
1397         return Float3x4Part().InverseOrthonormal();
1398 #endif
1399 }
1400
1401 void float4x4::Transpose()
1402 {
1403 #if defined(MATH_AUTOMATIC_SSE) && !defined(ANDROID) ///\bug Android GCC 4.6.6 gives internal compiler error!
1404         mat4x4_transpose(row, row);
1405 #else
1406         Swap(v[0][1], v[1][0]);
1407         Swap(v[0][2], v[2][0]);
1408         Swap(v[0][3], v[3][0]);
1409         Swap(v[1][2], v[2][1]);
1410         Swap(v[1][3], v[3][1]);
1411         Swap(v[2][3], v[3][2]);
1412 #endif
1413 }
1414
1415 float4x4 float4x4::Transposed() const
1416 {
1417 #if defined(MATH_AUTOMATIC_SSE) && !defined(ANDROID) ///\bug Android GCC 4.6.6 gives internal compiler error!
1418         float4x4 copy;
1419         mat4x4_transpose(copy.row, row);
1420         return copy;
1421 #else
1422         float4x4 copy;
1423         copy.v[0][0] = v[0][0]; copy.v[0][1] = v[1][0]; copy.v[0][2] = v[2][0]; copy.v[0][3] = v[3][0];
1424         copy.v[1][0] = v[0][1]; copy.v[1][1] = v[1][1]; copy.v[1][2] = v[2][1]; copy.v[1][3] = v[3][1];
1425         copy.v[2][0] = v[0][2]; copy.v[2][1] = v[1][2]; copy.v[2][2] = v[2][2]; copy.v[2][3] = v[3][2];
1426         copy.v[3][0] = v[0][3]; copy.v[3][1] = v[1][3]; copy.v[3][2] = v[2][3]; copy.v[3][3] = v[3][3];
1427         return copy;
1428 #endif
1429 }
1430
1431 bool float4x4::InverseTranspose()
1432 {
1433         bool success = Inverse();
1434         Transpose();
1435         return success;
1436 }
1437
1438 float4x4 float4x4::InverseTransposed() const
1439 {
1440         float4x4 copy = *this;
1441         copy.Transpose();
1442         copy.Inverse();
1443         return copy;
1444 }
1445
1446 float float4x4::Trace() const
1447 {
1448         assume(IsFinite());
1449         return v[0][0] + v[1][1] + v[2][2] + v[3][3];
1450 }
1451
1452 void float4x4::Orthogonalize3(int c0, int c1, int c2)
1453 {
1454         ///\todo SSE
1455         assume(c0 != c1 && c0 != c2 && c1 != c2);
1456         assume(c0 >= 0 && c1 >= 0 && c2 >= 0 && c0 < Cols && c1 < Cols && c2 < Cols);
1457 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
1458         if (c0 == c1 || c0 == c2 || c1 == c2)
1459                 return;
1460 #endif
1461         ///@todo Optimize away copies.
1462         float3 v0 = Col3(c0);
1463         float3 v1 = Col3(c1);
1464         float3 v2 = Col3(c2);
1465         float3::Orthogonalize(v0, v1, v2);
1466         SetCol3(c0, v0);
1467         SetCol3(c1, v1);
1468         SetCol3(c2, v2);
1469 }
1470
1471 void float4x4::Orthonormalize3(int c0, int c1, int c2)
1472 {
1473         ///\todo SSE
1474         assume(c0 != c1 && c0 != c2 && c1 != c2);
1475         assume(c0 >= 0 && c1 >= 0 && c2 >= 0 && c0 < Cols && c1 < Cols && c2 < Cols);
1476 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
1477         if (c0 == c1 || c0 == c2 || c1 == c2)
1478                 return;
1479 #endif
1480         ///@todo Optimize away copies.
1481         float3 v0 = Col3(c0);
1482         float3 v1 = Col3(c1);
1483         float3 v2 = Col3(c2);
1484         float3::Orthonormalize(v0, v1, v2);
1485         SetCol3(c0, v0);
1486         SetCol3(c1, v1);
1487         SetCol3(c2, v2);
1488 }
1489
1490 void float4x4::RemoveScale()
1491 {
1492         ///\todo SSE
1493         float x = Row3(0).Normalize();
1494         float y = Row3(1).Normalize();
1495         float z = Row3(2).Normalize();
1496         assume(x != 0 && y != 0 && z != 0 && "float4x4::RemoveScale failed!");
1497         MARK_UNUSED(x);
1498         MARK_UNUSED(y);
1499         MARK_UNUSED(z);
1500 }
1501
1502 /// Algorithm from Eric Lengyel's Mathematics for 3D Game Programming & Computer Graphics, 2nd Ed.
1503 void float4x4::Pivot()
1504 {
1505         int row = 0;
1506
1507         for(int col = 0; col < Cols; ++col)
1508         {
1509                 int greatest = row;
1510
1511                 // find the row k with k >= 1 for which Mkj has the largest absolute value.
1512                 for(int i = row; i < Rows; ++i)
1513                         if (MATH_NS::Abs(v[i][col]) > MATH_NS::Abs(v[greatest][col]))
1514                                 greatest = i;
1515
1516                 if (!EqualAbs(v[greatest][col], 0))
1517                 {
1518                         if (row != greatest)
1519                                 SwapRows(row, greatest); // the greatest now in row
1520
1521                         ScaleRow(row, 1.f/v[row][col]);
1522
1523                         for(int r = 0; r < Rows; ++r)
1524                                 if (r != row)
1525                                         SetRow(r, Row(r) - Row(row) * v[r][col]);
1526
1527                         ++row;
1528                 }
1529         }
1530 }
1531
1532 float3 float4x4::TransformPos(const float3 &pointVector) const
1533 {
1534         assume(!this->ContainsProjection()); // This function does not divide by w or output it, so cannot have projection.
1535 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
1536         return mat3x4_mul_vec(row, set_ps(1.f, pointVector.z, pointVector.y, pointVector.x));
1537 #else
1538         return TransformPos(pointVector.x, pointVector.y, pointVector.z);
1539 #endif
1540 }
1541
1542 float3 float4x4::TransformPos(float x, float y, float z) const
1543 {
1544         assume(!this->ContainsProjection()); // This function does not divide by w or output it, so cannot have projection.
1545 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
1546         return mat3x4_mul_vec(row, set_ps(1.f, z, y, x));
1547 #else
1548         return float3(DOT4POS_xyz(Row(0), x,y,z),
1549                                   DOT4POS_xyz(Row(1), x,y,z),
1550                                   DOT4POS_xyz(Row(2), x,y,z));
1551 #endif
1552 }
1553
1554 float3 float4x4::TransformDir(const float3 &directionVector) const
1555 {
1556         assume(!this->ContainsProjection()); // This function does not divide by w or output it, so cannot have projection.
1557 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
1558         return mat3x4_mul_vec(row, set_ps(0.f, directionVector.z, directionVector.y, directionVector.x));
1559 #else
1560         return TransformDir(directionVector.x, directionVector.y, directionVector.z);
1561 #endif
1562 }
1563
1564 float3 float4x4::TransformDir(float x, float y, float z) const
1565 {
1566         assume(!this->ContainsProjection()); // This function does not divide by w or output it, so cannot have projection.
1567 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
1568         return mat3x4_mul_vec(row, set_ps(0.f, z, y, x));
1569 #else
1570         return float3(DOT4DIR_xyz(Row(0), x,y,z),
1571                       DOT4DIR_xyz(Row(1), x,y,z),
1572                       DOT4DIR_xyz(Row(2), x,y,z));
1573 #endif
1574 }
1575
1576 float4 float4x4::Transform(const float4 &vector) const
1577 {
1578 #if defined(MATH_AUTOMATIC_SSE) && !defined(ANDROID) ///\bug Android GCC 4.6.6 gives internal compiler error!
1579         return mat4x4_mul_vec4(row, vector.v);
1580 #else
1581         return float4(DOT4(Row(0), vector),
1582                       DOT4(Row(1), vector),
1583                       DOT4(Row(2), vector),
1584                       DOT4(Row(3), vector));
1585 #endif
1586 }
1587
1588 void float4x4::TransformPos(float3 *pointArray, int numPoints) const
1589 {
1590         ///\todo SSE.
1591         assume(pointArray);
1592 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
1593         if (!pointArray)
1594                 return;
1595 #endif
1596         for(int i = 0; i < numPoints; ++i)
1597                 pointArray[i] = this->TransformPos(pointArray[i]);
1598 }
1599
1600 void float4x4::TransformPos(float3 *pointArray, int numPoints, int strideBytes) const
1601 {
1602         ///\todo SSE.
1603         assume(pointArray);
1604 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
1605         if (!pointArray)
1606                 return;
1607 #endif
1608         u8 *data = reinterpret_cast<u8*>(pointArray);
1609         for(int i = 0; i < numPoints; ++i)
1610         {
1611                 float3 *v = reinterpret_cast<float3*>(data);
1612                 *v = this->TransformPos(*v);
1613                 data += strideBytes;
1614         }
1615 }
1616
1617 void float4x4::TransformDir(float3 *dirArray, int numVectors) const
1618 {
1619         ///\todo SSE.
1620         assume(dirArray);
1621 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
1622         if (!dirArray)
1623                 return;
1624 #endif
1625         for(int i = 0; i < numVectors; ++i)
1626                 dirArray[i] = this->TransformDir(dirArray[i]);
1627 }
1628
1629 void float4x4::TransformDir(float3 *dirArray, int numVectors, int strideBytes) const
1630 {
1631         ///\todo SSE.
1632         assume(dirArray);
1633 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
1634         if (!dirArray)
1635                 return;
1636 #endif
1637         u8 *data = reinterpret_cast<u8*>(dirArray);
1638         for(int i = 0; i < numVectors; ++i)
1639         {
1640                 float3 *v = reinterpret_cast<float3*>(data);
1641                 *v = this->TransformDir(*v);
1642                 data += strideBytes;
1643         }
1644 }
1645
1646 void float4x4::Transform(float4 *vectorArray, int numVectors) const
1647 {
1648         ///\todo SSE.
1649         assume(vectorArray);
1650 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
1651         if (!vectorArray)
1652                 return;
1653 #endif
1654         for(int i = 0; i < numVectors; ++i)
1655                 vectorArray[i] = *this * vectorArray[i];
1656 }
1657
1658 void float4x4::Transform(float4 *vectorArray, int numVectors, int strideBytes) const
1659 {
1660         ///\todo SSE.
1661         assume(vectorArray);
1662 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
1663         if (!vectorArray)
1664                 return;
1665 #endif
1666         u8 *data = reinterpret_cast<u8*>(vectorArray);
1667         for(int i = 0; i < numVectors; ++i)
1668         {
1669                 float4 *v = reinterpret_cast<float4*>(data);
1670                 *v = *this * *v;
1671                 data += strideBytes;
1672         }
1673 }
1674
1675 float4x4 float4x4::operator *(const float3x3 &rhs) const
1676 {
1677         float4x4 r;
1678 #ifdef MATH_AUTOMATIC_SSE
1679         mat4x4_mul_mat3x3_sse(r.row, this->row, rhs.ptr());
1680 #else
1681         const float *c0 = rhs.ptr();
1682         const float *c1 = rhs.ptr() + 1;
1683         const float *c2 = rhs.ptr() + 2;
1684         r[0][0] = DOT3STRIDED(v[0], c0, 3);
1685         r[0][1] = DOT3STRIDED(v[0], c1, 3);
1686         r[0][2] = DOT3STRIDED(v[0], c2, 3);
1687         r[0][3] = v[0][3];
1688
1689         r[1][0] = DOT3STRIDED(v[1], c0, 3);
1690         r[1][1] = DOT3STRIDED(v[1], c1, 3);
1691         r[1][2] = DOT3STRIDED(v[1], c2, 3);
1692         r[1][3] = v[1][3];
1693
1694         r[2][0] = DOT3STRIDED(v[2], c0, 3);
1695         r[2][1] = DOT3STRIDED(v[2], c1, 3);
1696         r[2][2] = DOT3STRIDED(v[2], c2, 3);
1697         r[2][3] = v[2][3];
1698
1699         r[3][0] = DOT3STRIDED(v[3], c0, 3);
1700         r[3][1] = DOT3STRIDED(v[3], c1, 3);
1701         r[3][2] = DOT3STRIDED(v[3], c2, 3);
1702         r[3][3] = v[3][3];
1703 #endif
1704         return r;
1705 }
1706
1707 float4x4 float4x4::operator *(const float3x4 &rhs) const
1708 {
1709         float4x4 r;
1710 #ifdef MATH_AUTOMATIC_SSE
1711         mat4x4_mul_mat3x4_sse(r.row, this->row, rhs.row);
1712 #else
1713         const float *c0 = rhs.ptr();
1714         const float *c1 = rhs.ptr() + 1;
1715         const float *c2 = rhs.ptr() + 2;
1716         const float *c3 = rhs.ptr() + 3;
1717         r[0][0] = DOT3STRIDED(v[0], c0, 4);
1718         r[0][1] = DOT3STRIDED(v[0], c1, 4);
1719         r[0][2] = DOT3STRIDED(v[0], c2, 4);
1720         r[0][3] = DOT3STRIDED(v[0], c3, 4) + v[0][3];
1721
1722         r[1][0] = DOT3STRIDED(v[1], c0, 4);
1723         r[1][1] = DOT3STRIDED(v[1], c1, 4);
1724         r[1][2] = DOT3STRIDED(v[1], c2, 4);
1725         r[1][3] = DOT3STRIDED(v[1], c3, 4) + v[1][3];
1726
1727         r[2][0] = DOT3STRIDED(v[2], c0, 4);
1728         r[2][1] = DOT3STRIDED(v[2], c1, 4);
1729         r[2][2] = DOT3STRIDED(v[2], c2, 4);
1730         r[2][3] = DOT3STRIDED(v[2], c3, 4) + v[2][3];
1731
1732         r[3][0] = DOT3STRIDED(v[3], c0, 4);
1733         r[3][1] = DOT3STRIDED(v[3], c1, 4);
1734         r[3][2] = DOT3STRIDED(v[3], c2, 4);
1735         r[3][3] = DOT3STRIDED(v[3], c3, 4) + v[3][3];
1736 #endif
1737         return r;
1738 }
1739
1740 float4x4 float4x4::operator *(const float4x4 &rhs) const
1741 {
1742         float4x4 r;
1743 #ifdef MATH_AUTOMATIC_SSE
1744         mat4x4_mul_mat4x4(r.row, this->row, rhs.row);
1745 #else
1746         const float *c0 = rhs.ptr();
1747         const float *c1 = rhs.ptr() + 1;
1748         const float *c2 = rhs.ptr() + 2;
1749         const float *c3 = rhs.ptr() + 3;
1750         r[0][0] = DOT4STRIDED(v[0], c0, 4);
1751         r[0][1] = DOT4STRIDED(v[0], c1, 4);
1752         r[0][2] = DOT4STRIDED(v[0], c2, 4);
1753         r[0][3] = DOT4STRIDED(v[0], c3, 4);
1754
1755         r[1][0] = DOT4STRIDED(v[1], c0, 4);
1756         r[1][1] = DOT4STRIDED(v[1], c1, 4);
1757         r[1][2] = DOT4STRIDED(v[1], c2, 4);
1758         r[1][3] = DOT4STRIDED(v[1], c3, 4);
1759
1760         r[2][0] = DOT4STRIDED(v[2], c0, 4);
1761         r[2][1] = DOT4STRIDED(v[2], c1, 4);
1762         r[2][2] = DOT4STRIDED(v[2], c2, 4);
1763         r[2][3] = DOT4STRIDED(v[2], c3, 4);
1764
1765         r[3][0] = DOT4STRIDED(v[3], c0, 4);
1766         r[3][1] = DOT4STRIDED(v[3], c1, 4);
1767         r[3][2] = DOT4STRIDED(v[3], c2, 4);
1768         r[3][3] = DOT4STRIDED(v[3], c3, 4);
1769 #endif
1770
1771         return r;
1772 }
1773
1774 float4x4 float4x4::operator *(const Quat &rhs) const
1775 {
1776 #ifdef MATH_SIMD
1777         float4x4 rot(rhs);
1778         return *this * rot;
1779 #else
1780         float3x3 rot(rhs);
1781         return *this * rot;
1782 #endif
1783 }
1784
1785 float4 float4x4::operator *(const float4 &rhs) const
1786 {
1787         return this->Transform(rhs);
1788 }
1789
1790 float4x4 float4x4::operator *(float scalar) const
1791 {
1792 #ifdef MATH_AUTOMATIC_SSE
1793         float4x4 r;
1794         mat4x4_mul_float(r.row, row, scalar);
1795 #else
1796         float4x4 r = *this;
1797         r *= scalar;
1798 #endif
1799         return r;
1800 }
1801
1802 float4x4 float4x4::operator /(float scalar) const
1803 {
1804         assume(!EqualAbs(scalar, 0));
1805
1806 #ifdef MATH_AUTOMATIC_SSE
1807         float4x4 r;
1808         mat4x4_div_float(r.row, row, scalar);
1809 #else
1810         float4x4 r = *this;
1811         r /= scalar;
1812 #endif
1813         return r;
1814 }
1815
1816 float4x4 float4x4::operator +(const float4x4 &rhs) const
1817 {
1818 #ifdef MATH_AUTOMATIC_SSE
1819         float4x4 r;
1820         mat4x4_add_mat4x4(r.row, row, rhs.row);
1821 #else
1822         float4x4 r = *this;
1823         r += rhs;
1824 #endif
1825         return r;
1826 }
1827
1828 float4x4 float4x4::operator -(const float4x4 &rhs) const
1829 {
1830 #ifdef MATH_AUTOMATIC_SSE
1831         float4x4 r;
1832         mat4x4_sub_mat4x4(r.row, row, rhs.row);
1833 #else
1834         float4x4 r = *this;
1835         r -= rhs;
1836 #endif
1837         return r;
1838 }
1839
1840 float4x4 float4x4::operator -() const
1841 {
1842 #ifdef MATH_AUTOMATIC_SSE
1843         float4x4 r;
1844         mat4x4_negate(r.row, row);
1845         return r;
1846 #else
1847         return float4x4(-v[0][0], -v[0][1], -v[0][2], -v[0][3],
1848                         -v[1][0], -v[1][1], -v[1][2], -v[1][3],
1849                         -v[2][0], -v[2][1], -v[2][2], -v[2][3],
1850                         -v[3][0], -v[3][1], -v[3][2], -v[3][3]);
1851 #endif
1852 }
1853
1854 float4x4 &float4x4::operator *=(float s)
1855 {
1856 #ifdef MATH_AUTOMATIC_SSE
1857         mat4x4_mul_float(row, row, s);
1858 #else
1859         v[0][0] *= s; v[0][1] *= s; v[0][2] *= s; v[0][3] *= s;
1860         v[1][0] *= s; v[1][1] *= s; v[1][2] *= s; v[1][3] *= s;
1861         v[2][0] *= s; v[2][1] *= s; v[2][2] *= s; v[2][3] *= s;
1862         v[3][0] *= s; v[3][1] *= s; v[3][2] *= s; v[3][3] *= s;
1863 #endif
1864         return *this;
1865 }
1866
1867 float4x4 &float4x4::operator /=(float scalar)
1868 {
1869         assume(!EqualAbs(scalar, 0));
1870         return *this *= (1.f / scalar);
1871 }
1872
1873 float4x4 &float4x4::operator +=(const float4x4 &rhs)
1874 {
1875 #ifdef MATH_AUTOMATIC_SSE
1876         mat4x4_add_mat4x4(row, row, rhs.row);
1877 #else
1878         v[0][0] += rhs.v[0][0]; v[0][1] += rhs.v[0][1]; v[0][2] += rhs.v[0][2]; v[0][3] += rhs.v[0][3];
1879         v[1][0] += rhs.v[1][0]; v[1][1] += rhs.v[1][1]; v[1][2] += rhs.v[1][2]; v[1][3] += rhs.v[1][3];
1880         v[2][0] += rhs.v[2][0]; v[2][1] += rhs.v[2][1]; v[2][2] += rhs.v[2][2]; v[2][3] += rhs.v[2][3];
1881         v[3][0] += rhs.v[3][0]; v[3][1] += rhs.v[3][1]; v[3][2] += rhs.v[3][2]; v[3][3] += rhs.v[3][3];
1882 #endif
1883         return *this;
1884 }
1885
1886 float4x4 &float4x4::operator -=(const float4x4 &rhs)
1887 {
1888 #ifdef MATH_AUTOMATIC_SSE
1889         mat4x4_sub_mat4x4(row, row, rhs.row);
1890 #else
1891         v[0][0] -= rhs.v[0][0]; v[0][1] -= rhs.v[0][1]; v[0][2] -= rhs.v[0][2]; v[0][3] -= rhs.v[0][3];
1892         v[1][0] -= rhs.v[1][0]; v[1][1] -= rhs.v[1][1]; v[1][2] -= rhs.v[1][2]; v[1][3] -= rhs.v[1][3];
1893         v[2][0] -= rhs.v[2][0]; v[2][1] -= rhs.v[2][1]; v[2][2] -= rhs.v[2][2]; v[2][3] -= rhs.v[2][3];
1894         v[3][0] -= rhs.v[3][0]; v[3][1] -= rhs.v[3][1]; v[3][2] -= rhs.v[3][2]; v[3][3] -= rhs.v[3][3];
1895 #endif
1896         return *this;
1897 }
1898
1899 bool float4x4::IsFinite() const
1900 {
1901         for(int y = 0; y < Rows; ++y)
1902                 for(int x = 0; x < Cols; ++x)
1903                         if (!MATH_NS::IsFinite(v[y][x]))
1904                                 return false;
1905         return true;
1906 }
1907
1908 bool float4x4::IsIdentity(float epsilon) const
1909 {
1910         for(int y = 0; y < Rows; ++y)
1911                 for(int x = 0; x < Cols; ++x)
1912                         if (!EqualAbs(v[y][x], (x == y) ? 1.f : 0.f, epsilon))
1913                                 return false;
1914
1915         return true;
1916 }
1917
1918 bool float4x4::IsLowerTriangular(float epsilon) const
1919 {
1920         return EqualAbs(v[0][1], 0.f, epsilon)
1921                 && EqualAbs(v[0][2], 0.f, epsilon)
1922                 && EqualAbs(v[0][3], 0.f, epsilon)
1923                 && EqualAbs(v[1][2], 0.f, epsilon)
1924                 && EqualAbs(v[1][3], 0.f, epsilon)
1925                 && EqualAbs(v[2][3], 0.f, epsilon);
1926 }
1927
1928 bool float4x4::IsUpperTriangular(float epsilon) const
1929 {
1930         return EqualAbs(v[1][0], 0.f, epsilon)
1931                 && EqualAbs(v[2][0], 0.f, epsilon)
1932                 && EqualAbs(v[3][0], 0.f, epsilon)
1933                 && EqualAbs(v[2][1], 0.f, epsilon)
1934                 && EqualAbs(v[3][1], 0.f, epsilon)
1935                 && EqualAbs(v[3][2], 0.f, epsilon);
1936 }
1937
1938 bool float4x4::IsInvertible(float epsilon) const
1939 {
1940         ///@todo Optimize.
1941         float4x4 copy = *this;
1942         return copy.Inverse(epsilon);
1943 }
1944
1945 bool float4x4::IsSymmetric(float epsilon) const
1946 {
1947         for(int y = 0; y < Rows; ++y)
1948                 for(int x = y+1; x < Cols; ++x)
1949                         if (!EqualAbs(v[y][x], v[x][y], epsilon))
1950                                 return false;
1951         return true;
1952 }
1953
1954 bool float4x4::IsSkewSymmetric(float epsilon) const
1955 {
1956         for(int y = 0; y < Rows; ++y)
1957                 for(int x = y; x < Cols; ++x)
1958                         if (!EqualAbs(v[y][x], -v[x][y], epsilon))
1959                                 return false;
1960         return true;
1961 }
1962
1963 bool float4x4::IsIdempotent(float epsilon) const
1964 {
1965         float4x4 m2 = *this * *this;
1966         return this->Equals(m2, epsilon);
1967 }
1968
1969 bool float4x4::HasUnitaryScale(float epsilon) const
1970 {
1971         float3 scale = ExtractScale();
1972         return scale.Equals(1.f, 1.f, 1.f, epsilon);
1973 }
1974
1975 bool float4x4::HasNegativeScale() const
1976 {
1977         return Determinant3() < 0.f;
1978 }
1979
1980 bool float4x4::HasUniformScale(float epsilon) const
1981 {
1982         float3 scale = ExtractScale();
1983         return EqualAbs(scale.x, scale.y, epsilon) && EqualAbs(scale.x, scale.z, epsilon);
1984 }
1985
1986 bool float4x4::IsRowOrthogonal3(float epsilon) const
1987 {
1988         return Row3(0).IsPerpendicular(Row3(1), epsilon)
1989                 && Row3(0).IsPerpendicular(Row3(2), epsilon)
1990                 && Row3(1).IsPerpendicular(Row3(2), epsilon);
1991 }
1992
1993 bool float4x4::IsColOrthogonal3(float epsilon) const
1994 {
1995         return Col3(0).IsPerpendicular(Col3(1), epsilon)
1996                 && Col3(0).IsPerpendicular(Col3(2), epsilon)
1997                 && Col3(1).IsPerpendicular(Col3(2), epsilon);
1998 }
1999
2000 bool float4x4::IsOrthonormal3(float epsilon) const
2001 {
2002         ///@todo Epsilon magnitudes don't match.
2003         return IsColOrthogonal3(epsilon) && Row3(0).IsNormalized(epsilon) && Row3(1).IsNormalized(epsilon) && Row3(2).IsNormalized(epsilon);
2004 }
2005
2006 bool float4x4::Equals(const float4x4 &other, float epsilon) const
2007 {
2008         for(int y = 0; y < Rows; ++y)
2009                 for(int x = 0; x < Cols; ++x)
2010                         if (!EqualAbs(v[y][x], other[y][x], epsilon))
2011                                 return false;
2012         return true;
2013 }
2014
2015 bool float4x4::ContainsProjection(float epsilon) const
2016 {
2017         return Row(3).Equals(0.f, 0.f, 0.f, 1.f, epsilon) == false;
2018 }
2019
2020 #ifdef MATH_ENABLE_STL_SUPPORT
2021 std::string float4x4::ToString() const
2022 {
2023         char str[256];
2024         sprintf(str, "(%.2f, %.2f, %.2f, %.2f) (%.2f, %.2f, %.2f, %.2f) (%.2f, %.2f, %.2f, %.2f) (%.2f, %.2f, %.2f, %.2f)",
2025                 v[0][0], v[0][1], v[0][2], v[0][3],
2026                 v[1][0], v[1][1], v[1][2], v[1][3],
2027                 v[2][0], v[2][1], v[2][2], v[2][3],
2028                 v[3][0], v[3][1], v[3][2], v[3][3]);
2029
2030         return std::string(str);
2031 }
2032
2033 std::string float4x4::SerializeToString() const
2034 {
2035         char str[512];
2036         char *s = SerializeFloat(v[0][0], str); *s = ','; ++s;
2037         s = SerializeFloat(v[0][1], s); *s = ','; ++s;
2038         s = SerializeFloat(v[0][2], s); *s = ','; ++s;
2039         s = SerializeFloat(v[0][3], s); *s = ','; ++s;
2040         s = SerializeFloat(v[1][0], s); *s = ','; ++s;
2041         s = SerializeFloat(v[1][1], s); *s = ','; ++s;
2042         s = SerializeFloat(v[1][2], s); *s = ','; ++s;
2043         s = SerializeFloat(v[1][3], s); *s = ','; ++s;
2044         s = SerializeFloat(v[2][0], s); *s = ','; ++s;
2045         s = SerializeFloat(v[2][1], s); *s = ','; ++s;
2046         s = SerializeFloat(v[2][2], s); *s = ','; ++s;
2047         s = SerializeFloat(v[2][3], s); *s = ','; ++s;
2048         s = SerializeFloat(v[3][0], s); *s = ','; ++s;
2049         s = SerializeFloat(v[3][1], s); *s = ','; ++s;
2050         s = SerializeFloat(v[3][2], s); *s = ','; ++s;
2051         s = SerializeFloat(v[3][3], s);
2052         assert(s+1 - str < 512);
2053         MARK_UNUSED(s);
2054         return str;
2055 }
2056
2057 std::string float4x4::ToString2() const
2058 {
2059         char str[256];
2060         sprintf(str, "float4x4(X:(%.2f,%.2f,%.2f,%.2f) Y:(%.2f,%.2f,%.2f,%.2f) Z:(%.2f,%.2f,%.2f,%.2f), Pos:(%.2f,%.2f,%.2f,%.2f))",
2061                 v[0][0], v[1][0], v[2][0], v[3][0],
2062                 v[0][1], v[1][1], v[2][1], v[3][1],
2063                 v[0][2], v[1][2], v[2][2], v[3][2],
2064                 v[0][3], v[1][3], v[2][3], v[3][3]);
2065
2066         return std::string(str);
2067 }
2068 #endif
2069
2070 float3 float4x4::ToEulerXYX() const { float3 f; ExtractEulerXYX(*this, f[0], f[1], f[2]); return f; }
2071 float3 float4x4::ToEulerXZX() const { float3 f; ExtractEulerXZX(*this, f[0], f[1], f[2]); return f; }
2072 float3 float4x4::ToEulerYXY() const { float3 f; ExtractEulerYXY(*this, f[0], f[1], f[2]); return f; }
2073 float3 float4x4::ToEulerYZY() const { float3 f; ExtractEulerYZY(*this, f[0], f[1], f[2]); return f; }
2074 float3 float4x4::ToEulerZXZ() const { float3 f; ExtractEulerZXZ(*this, f[0], f[1], f[2]); return f; }
2075 float3 float4x4::ToEulerZYZ() const { float3 f; ExtractEulerZYZ(*this, f[0], f[1], f[2]); return f; }
2076 float3 float4x4::ToEulerXYZ() const { float3 f; ExtractEulerXYZ(*this, f[0], f[1], f[2]); return f; }
2077 float3 float4x4::ToEulerXZY() const { float3 f; ExtractEulerXZY(*this, f[0], f[1], f[2]); return f; }
2078 float3 float4x4::ToEulerYXZ() const { float3 f; ExtractEulerYXZ(*this, f[0], f[1], f[2]); return f; }
2079 float3 float4x4::ToEulerYZX() const { float3 f; ExtractEulerYZX(*this, f[0], f[1], f[2]); return f; }
2080 float3 float4x4::ToEulerZXY() const { float3 f; ExtractEulerZXY(*this, f[0], f[1], f[2]); return f; }
2081 float3 float4x4::ToEulerZYX() const { float3 f; ExtractEulerZYX(*this, f[0], f[1], f[2]); return f; }
2082
2083 float3 float4x4::ExtractScale() const
2084 {
2085         return float3(Col3(0).Length(), Col3(1).Length(), Col3(2).Length());
2086 }
2087
2088 void float4x4::Decompose(float3 &translate, Quat &rotate, float3 &scale) const
2089 {
2090         assume(this->IsColOrthogonal3());
2091
2092         float3x3 r;
2093         Decompose(translate, r, scale);
2094         rotate = Quat(r);
2095
2096         // Test that composing back yields the original float4x4.
2097         assume(float4x4::FromTRS(translate, rotate, scale).Equals(*this, 0.1f));
2098 }
2099
2100 void float4x4::Decompose(float3 &translate, float3x3 &rotate, float3 &scale) const
2101 {
2102         assume(this->IsColOrthogonal3());
2103
2104         assume(Row(3).Equals(0,0,0,1));
2105         Float3x4Part().Decompose(translate, rotate, scale);
2106
2107         // Test that composing back yields the original float4x4.
2108         assume(float4x4::FromTRS(translate, rotate, scale).Equals(*this, 0.1f));
2109 }
2110
2111 void float4x4::Decompose(float3 &translate, float3x4 &rotate, float3 &scale) const
2112 {
2113         assume(this->IsColOrthogonal3());
2114
2115         float3x3 r;
2116         Decompose(translate, r, scale);
2117         rotate.SetRotatePart(r);
2118         rotate.SetTranslatePart(0,0,0);
2119
2120         // Test that composing back yields the original float4x4.
2121         assume(float4x4::FromTRS(translate, rotate, scale).Equals(*this, 0.1f));
2122 }
2123
2124 void float4x4::Decompose(float3 &translate, float4x4 &rotate, float3 &scale) const
2125 {
2126         assume(this->IsColOrthogonal3());
2127
2128         float3x3 r;
2129         Decompose(translate, r, scale);
2130         rotate.SetRotatePart(r);
2131         rotate.SetTranslatePart(0,0,0);
2132         rotate.SetRow(3, 0, 0, 0, 1);
2133
2134         // Test that composing back yields the original float4x4.
2135         assume(float4x4::FromTRS(translate, rotate, scale).Equals(*this, 0.1f));
2136 }
2137
2138 float4x4 float4x4::Abs() const
2139 {
2140         float4x4 ret;
2141 #ifdef MATH_AUTOMATIC_SSE
2142         ret.row[0] = abs_ps(row[0]);
2143         ret.row[1] = abs_ps(row[1]);
2144         ret.row[2] = abs_ps(row[2]);
2145         ret.row[3] = abs_ps(row[3]);
2146 #else
2147         for(int y = 0; y < 4; ++y)
2148                 for(int x = 0; x < 4; ++x)
2149                         ret.v[y][x] = MATH_NS::Abs(v[y][x]);
2150 #endif
2151         return ret;
2152 }
2153
2154 #ifdef MATH_ENABLE_STL_SUPPORT
2155 std::ostream &operator <<(std::ostream &out, const float4x4 &rhs)
2156 {
2157         out << rhs.ToString();
2158         return out;
2159 }
2160 #endif
2161
2162 float4x4 operator *(const Quat &lhs, const float4x4 &rhs)
2163 {
2164         float3x3 rot(lhs);
2165         return rot * rhs;
2166 }
2167
2168 float4x4 operator *(const float3x3 &lhs, const float4x4 &rhs)
2169 {
2170         float4x4 r;
2171 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
2172         mat3x3_mul_mat4x4_sse(r.row, lhs.ptr(), rhs.row);
2173 #else
2174         const float *c0 = rhs.ptr();
2175         const float *c1 = rhs.ptr() + 1;
2176         const float *c2 = rhs.ptr() + 2;
2177         const float *c3 = rhs.ptr() + 3;
2178         r[0][0] = DOT3STRIDED(lhs[0], c0, 4);
2179         r[0][1] = DOT3STRIDED(lhs[0], c1, 4);
2180         r[0][2] = DOT3STRIDED(lhs[0], c2, 4);
2181         r[0][3] = DOT3STRIDED(lhs[0], c3, 4);
2182
2183         r[1][0] = DOT3STRIDED(lhs[1], c0, 4);
2184         r[1][1] = DOT3STRIDED(lhs[1], c1, 4);
2185         r[1][2] = DOT3STRIDED(lhs[1], c2, 4);
2186         r[1][3] = DOT3STRIDED(lhs[1], c3, 4);
2187
2188         r[2][0] = DOT3STRIDED(lhs[2], c0, 4);
2189         r[2][1] = DOT3STRIDED(lhs[2], c1, 4);
2190         r[2][2] = DOT3STRIDED(lhs[2], c2, 4);
2191         r[2][3] = DOT3STRIDED(lhs[2], c3, 4);
2192
2193         r[3][0] = rhs[3][0];
2194         r[3][1] = rhs[3][1];
2195         r[3][2] = rhs[3][2];
2196         r[3][3] = rhs[3][3];
2197 #endif
2198         return r;
2199 }
2200
2201 float4x4 operator *(const float3x4 &lhs, const float4x4 &rhs)
2202 {
2203         float4x4 r;
2204 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
2205         mat3x4_mul_mat4x4_sse(r.row, lhs.row, rhs.row);
2206 #else
2207         const float *c0 = rhs.ptr();
2208         const float *c1 = rhs.ptr() + 1;
2209         const float *c2 = rhs.ptr() + 2;
2210         const float *c3 = rhs.ptr() + 3;
2211         r[0][0] = DOT4STRIDED(lhs[0], c0, 4);
2212         r[0][1] = DOT4STRIDED(lhs[0], c1, 4);
2213         r[0][2] = DOT4STRIDED(lhs[0], c2, 4);
2214         r[0][3] = DOT4STRIDED(lhs[0], c3, 4);
2215
2216         r[1][0] = DOT4STRIDED(lhs[1], c0, 4);
2217         r[1][1] = DOT4STRIDED(lhs[1], c1, 4);
2218         r[1][2] = DOT4STRIDED(lhs[1], c2, 4);
2219         r[1][3] = DOT4STRIDED(lhs[1], c3, 4);
2220
2221         r[2][0] = DOT4STRIDED(lhs[2], c0, 4);
2222         r[2][1] = DOT4STRIDED(lhs[2], c1, 4);
2223         r[2][2] = DOT4STRIDED(lhs[2], c2, 4);
2224         r[2][3] = DOT4STRIDED(lhs[2], c3, 4);
2225
2226         r[3][0] = rhs[3][0];
2227         r[3][1] = rhs[3][1];
2228         r[3][2] = rhs[3][2];
2229         r[3][3] = rhs[3][3];
2230 #endif
2231         return r;
2232 }
2233
2234 float4 operator *(const float4 &lhs, const float4x4 &rhs)
2235 {
2236 #ifdef MATH_AUTOMATIC_SSE
2237         return vec4_mul_mat4x4(lhs.v, rhs.row);
2238 #else
2239         return float4(DOT4STRIDED(lhs, rhs.ptr(), 4),
2240                       DOT4STRIDED(lhs, rhs.ptr()+1, 4),
2241                       DOT4STRIDED(lhs, rhs.ptr()+2, 4),
2242                       DOT4STRIDED(lhs, rhs.ptr()+3, 4));
2243 #endif
2244 }
2245
2246 float4x4 float4x4::Mul(const float3x3 &rhs) const { return *this * rhs; }
2247 float4x4 float4x4::Mul(const float3x4 &rhs) const { return *this * rhs; }
2248 float4x4 float4x4::Mul(const float4x4 &rhs) const { return *this * rhs; }
2249 float4x4 float4x4::Mul(const Quat &rhs) const { return *this * rhs; }
2250 float3 float4x4::MulPos(const float3 &pointVector) const { return this->TransformPos(pointVector); }
2251 float3 float4x4::MulDir(const float3 &directionVector) const { return this->TransformDir(directionVector); }
2252 float4 float4x4::Mul(const float4 &vector) const { return *this * vector; }
2253
2254 const float4x4 float4x4::zero    = float4x4(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0);
2255 const float4x4 float4x4::identity = float4x4(1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1);
2256 const float4x4 float4x4::nan = float4x4(FLOAT_NAN, FLOAT_NAN, FLOAT_NAN, FLOAT_NAN, FLOAT_NAN, FLOAT_NAN, FLOAT_NAN, FLOAT_NAN, FLOAT_NAN, FLOAT_NAN, FLOAT_NAN, FLOAT_NAN, FLOAT_NAN, FLOAT_NAN, FLOAT_NAN, FLOAT_NAN);
2257
2258 MATH_END_NAMESPACE