Quat.cpp

1 /* Copyright Jukka Jyl�nki
2
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7        http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License. */
14
15 /** @file Quat.cpp
16         @author Jukka Jyl�nki
17         @brief */
18 #include "Quat.h"
19 #include <stdlib.h>
20 #include "float3.h"
21 #include "float4.h"
22 #include "float3x3.h"
23 #include "float3x4.h"
24 #include "float4x4.h"
25 #include "../Algorithm/Random/LCG.h"
26 #include "assume.h"
27 #include "MathFunc.h"
28 #include "SSEMath.h"
29 #include "float4x4_sse.h"
30 #include "quat_simd.h"
31 #include "float4_neon.h"
32
33 #ifdef MATH_AUTOMATIC_SSE
34 #include "sse_mathfun.h"
35 #endif
36
37 #ifdef MATH_ENABLE_STL_SUPPORT
38 #include <iostream>
39 #endif
40
41 MATH_BEGIN_NAMESPACE
42
43 Quat::Quat(const float *data)
44 {
45         assume(data);
46 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
47         if (!data)
48                 return;
49 #endif
50 #if defined(MATH_AUTOMATIC_SSE)
51         q = loadu_ps(data);
52 #else
53         x = data[0];
54         y = data[1];
55         z = data[2];
56         w = data[3];
57 #endif
58 }
59
60 Quat::Quat(float x_, float y_, float z_, float w_)
61 #if !defined(MATH_AUTOMATIC_SSE)
62 :x(x_), y(y_), z(z_), w(w_)
63 #endif
64 {
65 #if defined(MATH_AUTOMATIC_SSE)
66         q = set_ps(w_, z_, y_, x_);
67 #endif
68 }
69
70 vec Quat::WorldX() const
71 {
72 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
73         return FLOAT4_TO_DIR(quat_transform_vec4(q, float4::unitX));
74 #else
75         return DIR_VEC(this->Transform(1.f, 0.f, 0.f));
76 #endif
77 }
78
79 vec Quat::WorldY() const
80 {
81 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
82         return FLOAT4_TO_DIR(quat_transform_vec4(q, float4::unitY));
83 #else
84         return DIR_VEC(this->Transform(0.f, 1.f, 0.f));
85 #endif
86 }
87
88 vec Quat::WorldZ() const
89 {
90 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
91         return FLOAT4_TO_DIR(quat_transform_vec4(q, float4::unitZ));
92 #else
93         return DIR_VEC(this->Transform(0.f, 0.f, 1.f));
94 #endif
95 }
96
97 vec Quat::Axis() const
98 {
99         assume2(this->IsNormalized(), *this, this->Length());
100 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
101         // Best: 6.145 nsecs / 16.88 ticks, Avg: 6.367 nsecs, Worst: 6.529 nsecs
102         assume2(this->IsNormalized(), *this, this->Length());
103         simd4f cosAngle = wwww_ps(q);
104         simd4f rcpSinAngle = rsqrt_ps(sub_ps(set1_ps(1.f), mul_ps(cosAngle, cosAngle)));
105         simd4f a = mul_ps(q, rcpSinAngle);
106
107         // Set the w component to zero.
108         simd4f highPart = _mm_unpackhi_ps(a, zero_ps()); // [_ _ 0 z]
109         a = _mm_movelh_ps(a, highPart); // [0 z y x]
110         return FLOAT4_TO_DIR(a);
111 #else
112         // Best: 6.529 nsecs / 18.152 ticks, Avg: 6.851 nsecs, Worst: 8.065 nsecs
113
114         // Convert cos to sin via the identity sin^2 + cos^2 = 1, and fuse reciprocal and square root to the same instruction,
115         // since we are about to divide by it.
116         float rcpSinAngle = RSqrt(1.f - w*w);
117         return DIR_VEC(x, y, z) * rcpSinAngle;
118 #endif
119 }
120
121 float Quat::Angle() const
122 {
123         return Acos(w) * 2.f;
124 }
125
126 float Quat::Dot(const Quat &rhs) const
127 {
128 #ifdef MATH_AUTOMATIC_SSE
129         return dot4_float(q, rhs.q);
130 #else
131         return x*rhs.x + y*rhs.y + z*rhs.z + w*rhs.w;
132 #endif
133 }
134
135 float Quat::LengthSq() const
136 {
137 #ifdef MATH_AUTOMATIC_SSE
138         return vec4_length_sq_float(q);
139 #else
140         return x*x + y*y + z*z + w*w;
141 #endif
142 }
143
144 float Quat::Length() const
145 {
146 #ifdef MATH_AUTOMATIC_SSE
147         return vec4_length_float(q);
148 #else
149         return Sqrt(LengthSq());
150 #endif
151 }
152
153 float Quat::Normalize()
154 {
155 #ifdef MATH_AUTOMATIC_SSE
156         simd4f lenSq = vec4_length_sq_ps(q);
157         simd4f len = vec4_rsqrt(lenSq);
158         simd4f isZero = cmplt_ps(lenSq, simd4fEpsilon); // Was the length zero?
159         simd4f normalized = mul_ps(q, len); // Normalize.
160         q = cmov_ps(normalized, float4::unitX.v, isZero); // If length == 0, output the vector (1,0,0,0).
161         return s4f_x(len);
162 #else
163         float length = Length();
164         if (length < 1e-4f)
165                 return 0.f;
166         float rcpLength = 1.f / length;
167         x *= rcpLength;
168         y *= rcpLength;
169         z *= rcpLength;
170         w *= rcpLength;
171         return length;
172 #endif
173 }
174
175 Quat Quat::Normalized() const
176 {
177 #ifdef MATH_AUTOMATIC_SSE
178         return Quat(vec4_normalize(q));
179 #else
180         Quat copy = *this;
181         float success = copy.Normalize();
182         assume(success > 0 && "Quat::Normalized failed!");
183         MARK_UNUSED(success);
184         return copy;
185 #endif
186 }
187
188 bool Quat::IsNormalized(float epsilonSq) const
189 {
190         return EqualAbs(LengthSq(), 1.f, epsilonSq);
191 }
192
193 bool Quat::IsInvertible(float epsilon) const
194 {
195         return LengthSq() > epsilon && IsFinite();
196 }
197
198 bool Quat::IsFinite() const
199 {
200         return MATH_NS::IsFinite(x) && MATH_NS::IsFinite(y) && MATH_NS::IsFinite(z) && MATH_NS::IsFinite(w);
201 }
202
203 bool Quat::Equals(const Quat &rhs, float epsilon) const
204 {
205         return EqualAbs(x, rhs.x, epsilon) && EqualAbs(y, rhs.y, epsilon) && EqualAbs(z, rhs.z, epsilon) && EqualAbs(w, rhs.w, epsilon);
206 }
207
208 bool Quat::BitEquals(const Quat &other) const
209 {
210         return ReinterpretAsU32(x) == ReinterpretAsU32(other.x) &&
211                 ReinterpretAsU32(y) == ReinterpretAsU32(other.y) &&
212                 ReinterpretAsU32(z) == ReinterpretAsU32(other.z) &&
213                 ReinterpretAsU32(w) == ReinterpretAsU32(other.w);
214 }
215
216 void Quat::Inverse()
217 {
218         assume(IsNormalized());
219         assume(IsInvertible());
220         Conjugate();
221 }
222
223 Quat MUST_USE_RESULT Quat::Inverted() const
224 {
225         assume(IsNormalized());
226         assume(IsInvertible());
227         return Conjugated();
228 }
229
230 float Quat::InverseAndNormalize()
231 {
232         Conjugate();
233         return Normalize();
234 }
235
236 void Quat::Conjugate()
237 {
238 #ifdef MATH_AUTOMATIC_SSE
239         q = negate3_ps(q);
240 #else
241         x = -x;
242         y = -y;
243         z = -z;
244 #endif
245 }
246
247 Quat MUST_USE_RESULT Quat::Conjugated() const
248 {
249 #ifdef MATH_AUTOMATIC_SSE
250         return negate3_ps(q);
251 #else
252         return Quat(-x, -y, -z, w);
253 #endif
254 }
255
256 float3 MUST_USE_RESULT Quat::Transform(const float3 &vec) const
257 {
258         assume2(this->IsNormalized(), *this, this->LengthSq());
259 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
260         return float4(quat_transform_vec4(q, load_vec3(vec.ptr(), 0.f))).xyz();
261 #else
262         ///\todo Optimize/benchmark the scalar path not to generate a matrix!
263         float3x3 mat = this->ToFloat3x3();
264         return mat * vec;
265 #endif
266 }
267
268 float3 MUST_USE_RESULT Quat::Transform(float x, float y, float z) const
269 {
270 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
271         return float4(quat_transform_vec4(q, set_ps(0.f, z, y, x))).xyz();
272 #else
273         return Transform(float3(x, y, z));
274 #endif
275 }
276
277 float4 MUST_USE_RESULT Quat::Transform(const float4 &vec) const
278 {
279         assume(vec.IsWZeroOrOne());
280
281 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
282         return quat_transform_vec4(q, vec);
283 #else
284         return float4(Transform(vec.x, vec.y, vec.z), vec.w);
285 #endif
286 }
287
288 Quat MUST_USE_RESULT Quat::Lerp(const Quat &b, float t) const
289 {
290         assume(0.f <= t && t <= 1.f);
291
292 #ifdef MATH_AUTOMATIC_SSE
293         return vec4_lerp(q, b.q, t);
294 #else
295         return *this * (1.f - t) + b * t;
296 #endif
297 }
298
299 /** Implementation based on the math in the book Watt, Policarpo. 3D Games: Real-time rendering and Software Technology, pp. 383-386. */
300 Quat MUST_USE_RESULT Quat::Slerp(const Quat &q2, float t) const
301 {
302         ///\todo SSE.
303         assume(0.f <= t && t <= 1.f);
304         assume(IsNormalized());
305         assume(q2.IsNormalized());
306
307         float angle = this->Dot(q2);
308         float sign = 1.f; // Multiply by a sign of +/-1 to guarantee we rotate the shorter arc.
309         if (angle < 0.f)
310         {
311                 angle = -angle;
312                 sign = -1.f;
313         }
314
315         float a;
316         float b;
317         if (angle <= 0.97f) // perform spherical linear interpolation.
318         {
319                 angle = Acos(angle); // After this, angle is in the range pi/2 -> 0 as the original angle variable ranged from 0 -> 1.
320
321                 float angleT = t*angle;
322
323 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE2)
324                 // Compute three sines in one go with SSE.
325                 simd4f s = set_ps(0.f, angleT, angle - angleT, angle);
326                 s = sin_ps(s);
327                 simd4f denom = xxxx_ps(s);
328                 s = div_ps(s, denom);
329                 a = s4f_y(s);
330                 b = s4f_z(s);
331 #else
332                 float s[3] = { Sin(angle), Sin(angle - angleT), Sin(angleT) };
333                 float c = 1.f / s[0];
334                 a = s[1] * c;
335                 b = s[2] * c;
336 #endif
337         }
338         else // If angle is close to taking the denominator to zero, resort to linear interpolation (and normalization).
339         {
340                 a = 1.f - t;
341                 b = t;
342         }
343
344         return (*this * (a * sign) + q2 * b).Normalized();
345 }
346
347 float3 MUST_USE_RESULT Quat::SlerpVector(const float3 &from, const float3 &to, float t)
348 {
349         if (t <= 0.f)
350                 return from;
351         if (t >= 1.f)
352                 return to;
353         ///\todo The following chain can be greatly optimized.
354         Quat q = Quat::RotateFromTo(from, to);
355         q = Slerp(Quat::identity, q, t);
356         return q.Transform(from);
357 }
358
359 float3 MUST_USE_RESULT Quat::SlerpVectorAbs(const float3 &from, const float3 &to, float angleRadians)
360 {
361         if (angleRadians <= 0.f)
362                 return from;
363         Quat q = Quat::RotateFromTo(from, to);
364         float a = q.Angle();
365         if (a <= angleRadians)
366                 return to;
367         float t = angleRadians / a;
368         q = Slerp(Quat::identity, q, t);
369         return q.Transform(from);
370 }
371
372 float MUST_USE_RESULT Quat::AngleBetween(const Quat &target) const
373 {
374         assume(this->IsInvertible());
375         Quat q = target / *this;
376         return q.Angle();
377 }
378
379 vec MUST_USE_RESULT Quat::AxisFromTo(const Quat &target) const
380 {
381         assume(this->IsInvertible());
382         Quat q = target / *this;
383         return q.Axis();
384 }
385
386 void Quat::ToAxisAngle(float3 &axis, float &angle) const
387 {
388         // Best: 36.868 nsecs / 98.752 ticks, Avg: 37.095 nsecs, Worst: 37.636 nsecs
389         assume2(this->IsNormalized(), *this, this->Length());
390         float halfAngle = Acos(w);
391         angle = halfAngle * 2.f;
392         // Convert cos to sin via the identity sin^2 + cos^2 = 1, and fuse reciprocal and square root to the same instruction,
393         // since we are about to divide by it.
394         float rcpSinAngle = RSqrt(1.f - w*w);
395         axis.x = x * rcpSinAngle;
396         axis.y = y * rcpSinAngle;
397         axis.z = z * rcpSinAngle;
398 }
399
400 void Quat::ToAxisAngle(float4 &axis, float &angle) const
401 {
402 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
403         // Best: 35.332 nsecs / 94.328 ticks, Avg: 35.870 nsecs, Worst: 57.607 nsecs
404         assume2(this->IsNormalized(), *this, this->Length());
405         simd4f cosAngle = wwww_ps(q);
406         simd4f rcpSinAngle = rsqrt_ps(sub_ps(set1_ps(1.f), mul_ps(cosAngle, cosAngle)));
407         angle = Acos(s4f_x(cosAngle)) * 2.f;
408         simd4f a = mul_ps(q, rcpSinAngle);
409
410         // Set the w component to zero.
411         simd4f highPart = _mm_unpackhi_ps(a, zero_ps()); // [_ _ 0 z]
412         axis.v = _mm_movelh_ps(a, highPart); // [0 z y x]
413 #else
414         // Best: 85.258 nsecs / 227.656 ticks, Avg: 85.492 nsecs, Worst: 86.410 nsecs
415         ToAxisAngle(reinterpret_cast<float3&>(axis), angle);
416         axis.w = 0.f;
417 #endif
418 }
419
420 void Quat::SetFromAxisAngle(const float3 &axis, float angle)
421 {
422 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
423         SetFromAxisAngle(load_vec3(axis.ptr(), 0.f), angle);
424 #else
425         assume1(axis.IsNormalized(), axis);
426         assume1(MATH_NS::IsFinite(angle), angle);
427         float sinz, cosz;
428         SinCos(angle*0.5f, sinz, cosz);
429         x = axis.x * sinz;
430         y = axis.y * sinz;
431         z = axis.z * sinz;
432         w = cosz;
433 #endif
434 }
435
436 void Quat::SetFromAxisAngle(const float4 &axis, float angle)
437 {
438         assume1(EqualAbs(axis.w, 0.f), axis);
439         assume2(axis.IsNormalized(1e-4f), axis, axis.Length4());
440         assume1(MATH_NS::IsFinite(angle), angle);
441
442 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE2)
443         // Best: 26.499 nsecs / 71.024 ticks, Avg: 26.856 nsecs, Worst: 27.651 nsecs
444         simd4f half = set1_ps(0.5f);
445         simd4f halfAngle = mul_ps(set1_ps(angle), half);
446         simd4f sinAngle, cosAngle;
447         sincos_ps(halfAngle, &sinAngle, &cosAngle);
448         simd4f quat = mul_ps(axis, sinAngle);
449
450         // Set the w component to cosAngle.
451         simd4f highPart = _mm_unpackhi_ps(quat, cosAngle); // [_ _ 1 z]
452         q = _mm_movelh_ps(quat, highPart); // [1 z y x]
453 #else
454         // Best: 36.868 nsecs / 98.312 ticks, Avg: 36.980 nsecs, Worst: 41.477 nsecs
455         SetFromAxisAngle(axis.xyz(), angle);
456 #endif
457 }
458
459 /// See Schneider, Eberly. Geometric Tools for Computer Graphics, p. 861.
460 template<typename M>
461 void SetQuatFrom(Quat &q, const M &m)
462 {
463         // The rotation matrix is of form: (Eric Lengyel's Mathematics for 3D Game Programming and Computer Graphics 2nd ed., p. 92)
464         // 1 - 2y^2 - 2z^2        2xy - 2wz            2xz + 2wy
465         //    2xy + 2wz        1 - 2x^2 - 2z^2         2yz - 2wx
466         //    2xz - 2wy           2yz + 2wx         1 - 2x^2 - 2y^2
467
468         float r = m[0][0] + m[1][1] + m[2][2]; // The element w is easiest picked up as a sum of the diagonals.
469         // Above, r == 3 - 4(x^2+y^2+z^2) == 4(1-x^2-y^2-z^2) - 1 == 4*w^2 - 1.
470         if (r > 0) // In this case, |w| > 1/2.
471         {
472                 q.w = Sqrt(r + 1.f) * 0.5f; // We have two choices for the sign of w, arbitrarily pick the positive.
473                 float inv4w = 1.f / (4.f * q.w);
474                 q.x = (m[2][1] - m[1][2]) * inv4w;
475                 q.y = (m[0][2] - m[2][0]) * inv4w;
476                 q.z = (m[1][0] - m[0][1]) * inv4w;
477         }
478         else if (m[0][0] > m[1][1] && m[0][0] > m[2][2]) // If |q.x| is larger than |q.y| and |q.z|, extract it first. This gives
479         {                                                // best stability, and we know below x can't be zero.
480                 q.x = Sqrt(1.f + m[0][0] - m[1][1] - m[2][2]) * 0.5f; // We have two choices for the sign of x, arbitrarily pick the positive.
481                 const float x4 = 1.f / (4.f * q.x);
482                 q.y = (m[0][1] + m[1][0]) * x4;
483                 q.z = (m[0][2] + m[2][0]) * x4;
484                 q.w = (m[2][1] - m[1][2]) * x4;
485         }
486         else if (m[1][1] > m[2][2]) // |q.y| is larger than |q.x| and |q.z|
487         {
488                 q.y = Sqrt(1.f + m[1][1] - m[0][0] - m[2][2]) * 0.5f; // We have two choices for the sign of y, arbitrarily pick the positive.
489                 const float y4 = 1.f / (4.f * q.y);
490                 q.x = (m[0][1] + m[1][0]) * y4;
491                 q.z = (m[1][2] + m[2][1]) * y4;
492                 q.w = (m[0][2] - m[2][0]) * y4;
493         }
494         else // |q.z| is larger than |q.x| or |q.y|
495         {
496                 q.z = Sqrt(1.f + m[2][2] - m[0][0] - m[1][1]) * 0.5f; // We have two choices for the sign of z, arbitrarily pick the positive.
497                 const float z4 = 1.f / (4.f * q.z);
498                 q.x = (m[0][2] + m[2][0]) * z4;
499                 q.y = (m[1][2] + m[2][1]) * z4;
500                 q.w = (m[1][0] - m[0][1]) * z4;
501         }
502         float oldLength = q.Normalize();
503         assume(oldLength > 0.f);
504         MARK_UNUSED(oldLength);
505 }
506
507 void Quat::Set(const float3x3 &m)
508 {
509         assume(m.IsColOrthogonal());
510         assume(m.HasUnitaryScale());
511         assume(!m.HasNegativeScale());
512         SetQuatFrom(*this, m);
513
514 #ifdef MATH_ASSERT_CORRECTNESS
515         // Test that the conversion float3x3->Quat->float3x3 is correct.
516         mathassert(this->ToFloat3x3().Equals(m, 0.01f));
517 #endif
518 }
519
520 void Quat::Set(const float3x4 &m)
521 {
522         assume(m.IsColOrthogonal());
523         assume(m.HasUnitaryScale());
524         assume(!m.HasNegativeScale());
525         SetQuatFrom(*this, m);
526
527 #ifdef MATH_ASSERT_CORRECTNESS
528         // Test that the conversion float3x3->Quat->float3x3 is correct.
529         mathassert(this->ToFloat3x3().Equals(m.Float3x3Part(), 0.01f));
530 #endif
531 }
532
533 void Quat::Set(const float4x4 &m)
534 {
535         assume(m.IsColOrthogonal3());
536         assume(m.HasUnitaryScale());
537         assume(!m.HasNegativeScale());
538         assume(m.Row(3).Equals(0,0,0,1));
539         SetQuatFrom(*this, m);
540
541 #ifdef MATH_ASSERT_CORRECTNESS
542         // Test that the conversion float3x3->Quat->float3x3 is correct.
543         mathassert(this->ToFloat3x3().Equals(m.Float3x3Part(), 0.01f));
544 #endif
545 }
546
547 void Quat::Set(float x_, float y_, float z_, float w_)
548 {
549 #ifdef MATH_AUTOMATIC_SSE
550         q = set_ps(w_, z_, y_, x_);
551 #else
552         x = x_;
553         y = y_;
554         z = z_;
555         w = w_;
556 #endif
557 }
558
559 Quat MUST_USE_RESULT Quat::LookAt(const float3 &localForward, const float3 &targetDirection, const float3 &localUp, const float3 &worldUp)
560 {
561         return float3x3::LookAt(localForward, targetDirection, localUp, worldUp).ToQuat();
562 }
563
564 Quat MUST_USE_RESULT Quat::RotateX(float angle)
565 {
566         return Quat(float3(1,0,0), angle);
567 }
568
569 Quat MUST_USE_RESULT Quat::RotateY(float angle)
570 {
571         return Quat(float3(0,1,0), angle);
572 }
573
574 Quat MUST_USE_RESULT Quat::RotateZ(float angle)
575 {
576         return Quat(float3(0,0,1), angle);
577 }
578
579 Quat MUST_USE_RESULT Quat::RotateAxisAngle(const float3 &axis, float angle)
580 {
581         return Quat(axis, angle);
582 }
583
584 Quat MUST_USE_RESULT Quat::RotateFromTo(const float3 &sourceDirection, const float3 &targetDirection)
585 {
586         assume(sourceDirection.IsNormalized());
587         assume(targetDirection.IsNormalized());
588         // If sourceDirection == targetDirection, the cross product comes out zero, and normalization would fail. In that case, pick an arbitrary axis.
589         float3 axis = sourceDirection.Cross(targetDirection);
590         float oldLength = axis.Normalize();
591         if (oldLength != 0.f)
592         {
593                 float halfCosAngle = 0.5f*sourceDirection.Dot(targetDirection);
594                 float cosHalfAngle = Sqrt(0.5f + halfCosAngle);
595                 float sinHalfAngle = Sqrt(0.5f - halfCosAngle);
596                 return Quat(axis.x * sinHalfAngle, axis.y * sinHalfAngle, axis.z * sinHalfAngle, cosHalfAngle);
597         }
598         else
599                 return Quat(1.f, 0.f, 0.f, 0.f);
600
601 }
602
603 Quat MUST_USE_RESULT Quat::RotateFromTo(const float4 &sourceDirection, const float4 &targetDirection)
604 {
605 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
606         // Best: 12.289 nsecs / 33.144 ticks, Avg: 12.489 nsecs, Worst: 14.210 nsecs
607         simd4f cosAngle = dot4_ps(sourceDirection.v, targetDirection.v);
608         cosAngle = negate3_ps(cosAngle); // [+ - - -]
609         // XYZ channels use the trigonometric formula sin(x/2) = +/-sqrt(0.5-0.5*cosx))
610         // The W channel uses the trigonometric formula cos(x/2) = +/-sqrt(0.5+0.5*cosx))
611         simd4f half = set1_ps(0.5f);
612         simd4f cosSinHalfAngle = sqrt_ps(add_ps(half, mul_ps(half, cosAngle))); // [cos(x/2), sin(x/2), sin(x/2), sin(x/2)]
613         simd4f axis = cross_ps(sourceDirection.v, targetDirection.v);
614         simd4f recipLen = rsqrt_ps(dot4_ps(axis, axis));
615         axis = mul_ps(axis, recipLen); // [0 z y x]
616         // Set the w component to one.
617         simd4f one = add_ps(half, half); // [1 1 1 1]
618         simd4f highPart = _mm_unpackhi_ps(axis, one); // [_ _ 1 z]
619         axis = _mm_movelh_ps(axis, highPart); // [1 z y x]
620         Quat q;
621         q.q = mul_ps(axis, cosSinHalfAngle);
622         return q;
623 #else
624         // Best: 19.970 nsecs / 53.632 ticks, Avg: 20.197 nsecs, Worst: 21.122 nsecs
625         assume(EqualAbs(sourceDirection.w, 0.f));
626         assume(EqualAbs(targetDirection.w, 0.f));
627         return Quat::RotateFromTo(sourceDirection.xyz(), targetDirection.xyz());
628 #endif
629 }
630
631 Quat MUST_USE_RESULT Quat::RotateFromTo(const float3 &sourceDirection, const float3 &targetDirection,
632         const float3 &sourceDirection2, const float3 &targetDirection2)
633 {
634         return LookAt(sourceDirection, targetDirection, sourceDirection2, targetDirection2);
635 }
636
637 Quat MUST_USE_RESULT Quat::FromEulerXYX(float x2, float y, float x) { return (Quat::RotateX(x2) * Quat::RotateY(y) * Quat::RotateX(x)).Normalized(); }
638 Quat MUST_USE_RESULT Quat::FromEulerXZX(float x2, float z, float x) { return (Quat::RotateX(x2) * Quat::RotateZ(z) * Quat::RotateX(x)).Normalized(); }
639 Quat MUST_USE_RESULT Quat::FromEulerYXY(float y2, float x, float y) { return (Quat::RotateY(y2) * Quat::RotateX(x) * Quat::RotateY(y)).Normalized(); }
640 Quat MUST_USE_RESULT Quat::FromEulerYZY(float y2, float z, float y) { return (Quat::RotateY(y2) * Quat::RotateZ(z) * Quat::RotateY(y)).Normalized(); }
641 Quat MUST_USE_RESULT Quat::FromEulerZXZ(float z2, float x, float z) { return (Quat::RotateZ(z2) * Quat::RotateX(x) * Quat::RotateZ(z)).Normalized(); }
642 Quat MUST_USE_RESULT Quat::FromEulerZYZ(float z2, float y, float z) { return (Quat::RotateZ(z2) * Quat::RotateY(y) * Quat::RotateZ(z)).Normalized(); }
643 Quat MUST_USE_RESULT Quat::FromEulerXYZ(float x, float y, float z) { return (Quat::RotateX(x) * Quat::RotateY(y) * Quat::RotateZ(z)).Normalized(); }
644 Quat MUST_USE_RESULT Quat::FromEulerXZY(float x, float z, float y) { return (Quat::RotateX(x) * Quat::RotateZ(z) * Quat::RotateY(y)).Normalized(); }
645 Quat MUST_USE_RESULT Quat::FromEulerYXZ(float y, float x, float z) { return (Quat::RotateY(y) * Quat::RotateX(x) * Quat::RotateZ(z)).Normalized(); }
646 Quat MUST_USE_RESULT Quat::FromEulerYZX(float y, float z, float x) { return (Quat::RotateY(y) * Quat::RotateZ(z) * Quat::RotateX(x)).Normalized(); }
647 Quat MUST_USE_RESULT Quat::FromEulerZXY(float z, float x, float y) { return (Quat::RotateZ(z) * Quat::RotateX(x) * Quat::RotateY(y)).Normalized(); }
648 Quat MUST_USE_RESULT Quat::FromEulerZYX(float z, float y, float x) { return (Quat::RotateZ(z) * Quat::RotateY(y) * Quat::RotateX(x)).Normalized(); }
649
650 Quat MUST_USE_RESULT Quat::RandomRotation(LCG &lcg)
651 {
652         // Generate a random point on the 4D unitary hypersphere using the rejection sampling method.
653         for(int i = 0; i < 1000; ++i)
654         {
655                 float x = lcg.Float(-1, 1);
656                 float y = lcg.Float(-1, 1);
657                 float z = lcg.Float(-1, 1);
658                 float w = lcg.Float(-1, 1);
659                 float lenSq = x*x + y*y + z*z + w*w;
660                 if (lenSq >= 1e-6f && lenSq <= 1.f)
661                         return Quat(x, y, z, w) / Sqrt(lenSq);
662         }
663         assume(false && "Quat::RandomRotation failed!");
664         return Quat::identity;
665 }
666
667 ///@todo the following could be heavily optimized. Don't route through float3x3 conversion.
668
669 float3 MUST_USE_RESULT Quat::ToEulerXYX() const { return ToFloat3x3().ToEulerXYX(); }
670 float3 MUST_USE_RESULT Quat::ToEulerXZX() const { return ToFloat3x3().ToEulerXZX(); }
671 float3 MUST_USE_RESULT Quat::ToEulerYXY() const { return ToFloat3x3().ToEulerYXY(); }
672 float3 MUST_USE_RESULT Quat::ToEulerYZY() const { return ToFloat3x3().ToEulerYZY(); }
673 float3 MUST_USE_RESULT Quat::ToEulerZXZ() const { return ToFloat3x3().ToEulerZXZ(); }
674 float3 MUST_USE_RESULT Quat::ToEulerZYZ() const { return ToFloat3x3().ToEulerZYZ(); }
675 float3 MUST_USE_RESULT Quat::ToEulerXYZ() const { return ToFloat3x3().ToEulerXYZ(); }
676 float3 MUST_USE_RESULT Quat::ToEulerXZY() const { return ToFloat3x3().ToEulerXZY(); }
677 float3 MUST_USE_RESULT Quat::ToEulerYXZ() const { return ToFloat3x3().ToEulerYXZ(); }
678 float3 MUST_USE_RESULT Quat::ToEulerYZX() const { return ToFloat3x3().ToEulerYZX(); }
679 float3 MUST_USE_RESULT Quat::ToEulerZXY() const { return ToFloat3x3().ToEulerZXY(); }
680 float3 MUST_USE_RESULT Quat::ToEulerZYX() const { return ToFloat3x3().ToEulerZYX(); }
681
682 float3x3 MUST_USE_RESULT Quat::ToFloat3x3() const
683 {
684         return float3x3(*this);
685 }
686
687 float3x4 MUST_USE_RESULT Quat::ToFloat3x4() const
688 {
689 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
690         float3x4 m;
691         quat_to_mat3x4(q, _mm_set_ps(1,0,0,0), m.row);
692         return m;
693 #else
694         return float3x4(*this);
695 #endif
696 }
697
698 float4x4 MUST_USE_RESULT Quat::ToFloat4x4() const
699 {
700         assume(IsNormalized());
701 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
702         float4x4 m;
703         quat_to_mat4x4(q, _mm_set_ps(1,0,0,0), m.row);
704         return m;
705 #else
706         return float4x4(*this);
707 #endif
708 }
709
710 float4x4 MUST_USE_RESULT Quat::ToFloat4x4(const float3 &translation) const
711 {
712 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
713         return ToFloat4x4(float4(translation, 1.f));
714 #else
715         return float4x4(*this, translation);
716 #endif
717 }
718
719 float4x4 MUST_USE_RESULT Quat::ToFloat4x4(const float4 &translation) const
720 {
721         assume(IsNormalized());
722 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
723         float4x4 m;
724         quat_to_mat4x4(q, translation.v, m.row);
725         return m;
726 #else
727         return float4x4(*this, translation.xyz());
728 #endif
729 }
730
731 bool IsNeutralCLocale();
732
733 #ifdef MATH_ENABLE_STL_SUPPORT
734 std::string MUST_USE_RESULT Quat::ToString() const
735 {
736         char str[256];
737         sprintf(str, "(%.3f, %.3f, %.3f, %.3f)", x, y, z, w);
738         return str;
739 }
740
741 std::string MUST_USE_RESULT Quat::ToString2() const
742 {
743         float3 axis;
744         float angle;
745         ToAxisAngle(axis, angle);
746         char str[256];
747         sprintf(str, "Quat(axis:(%.2f,%.2f,%.2f) angle:%2.f)", axis.x, axis.y, axis.z, RadToDeg(angle));
748         return str;
749 }
750
751 std::string MUST_USE_RESULT Quat::SerializeToString() const
752 {
753         char str[256];
754         char *s = SerializeFloat(x, str); *s = ','; ++s;
755         s = SerializeFloat(y, s); *s = ','; ++s;
756         s = SerializeFloat(z, s); *s = ','; ++s;
757         s = SerializeFloat(w, s);
758         assert(s+1 - str < 256);
759         MARK_UNUSED(s);
760         return str;
761 }
762
763 std::string Quat::SerializeToCodeString() const
764 {
765         return "Quat(" + SerializeToString() + ")";
766 }
767 #endif
768
769 Quat MUST_USE_RESULT Quat::FromString(const char *str, const char **outEndStr)
770 {
771         assert(IsNeutralCLocale());
772         assume(str);
773         if (!str)
774                 return Quat::nan;
775         MATH_SKIP_WORD(str, "Quat");
776         MATH_SKIP_WORD(str, "(");
777         Quat f;
778         f.x = DeserializeFloat(str, &str);
779         f.y = DeserializeFloat(str, &str);
780         f.z = DeserializeFloat(str, &str);
781         f.w = DeserializeFloat(str, &str);
782         if (*str == ')')
783                 ++str;
784         if (*str == ',')
785                 ++str;
786         if (outEndStr)
787                 *outEndStr = str;
788         return f;
789 }
790
791 Quat Quat::operator +(const Quat &rhs) const
792 {
793 #ifdef MATH_AUTOMATIC_SSE
794         return add_ps(q, rhs.q);
795 #else
796         return Quat(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w);
797 #endif
798 }
799
800 Quat Quat::operator -(const Quat &rhs) const
801 {
802 #ifdef MATH_AUTOMATIC_SSE
803         return sub_ps(q, rhs.q);
804 #else
805         return Quat(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w);
806 #endif
807 }
808
809 Quat Quat::operator -() const
810 {
811 #ifdef MATH_AUTOMATIC_SSE
812         return negate_ps(q);
813 #else
814         return Quat(-x, -y, -z, -w);
815 #endif
816 }
817
818 Quat Quat::operator *(float scalar) const
819 {
820 #ifdef MATH_AUTOMATIC_SSE
821         return vec4_mul_float(q, scalar);
822 #else
823         return Quat(x * scalar, y * scalar, z * scalar, w * scalar);
824 #endif
825 }
826
827 float3 Quat::operator *(const float3 &rhs) const
828 {
829 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
830         return float4(quat_transform_vec4(q, float4(rhs, 0.f).v)).xyz();
831 #else
832         return Transform(rhs);
833 #endif
834 }
835
836 float4 Quat::operator *(const float4 &rhs) const
837 {
838 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
839         return quat_transform_vec4(q, rhs);
840 #else
841         return Transform(rhs);
842 #endif
843 }
844
845 Quat Quat::operator /(float scalar) const
846 {
847         assume(!EqualAbs(scalar, 0.f));
848
849 #ifdef MATH_AUTOMATIC_SSE
850         return vec4_div_float(q, scalar);
851 #else
852         return *this * (1.f / scalar);
853 #endif
854 }
855
856 Quat Quat::operator *(const Quat &r) const
857 {
858 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
859         // Best: 3.456 nsecs / 9.752 ticks, Avg: 3.721 nsecs, Worst: 3.840 nsecs
860         return quat_mul_quat(q, r.q);
861 #else
862         // Best: 12.289 nsecs / 33.216 ticks, Avg: 12.585 nsecs, Worst: 13.442 nsecs
863         return Quat(x*r.w + y*r.z - z*r.y + w*r.x,
864                    -x*r.z + y*r.w + z*r.x + w*r.y,
865                     x*r.y - y*r.x + z*r.w + w*r.z,
866                    -x*r.x - y*r.y - z*r.z + w*r.w);
867 #endif
868 }
869
870 Quat Quat::operator /(const Quat &r) const
871 {
872 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
873         return quat_div_quat(q, r.q);
874 #else
875         return Quat(x*r.w - y*r.z + z*r.y - w*r.x,
876                     x*r.z + y*r.w - z*r.x - w*r.y,
877                    -x*r.y + y*r.x + z*r.w - w*r.z,
878                     x*r.x + y*r.y + z*r.z + w*r.w);
879 #endif
880 }
881
882 #ifdef MATH_ENABLE_STL_SUPPORT
883 std::ostream &operator <<(std::ostream &out, const Quat &rhs)
884 {
885         out << rhs.ToString();
886         return out;
887 }
888 #endif
889
890 Quat MUST_USE_RESULT Quat::Mul(const Quat &rhs) const { return *this * rhs; }
891 Quat MUST_USE_RESULT Quat::Mul(const float3x3 &rhs) const { return *this * Quat(rhs); }
892 float3 MUST_USE_RESULT Quat::Mul(const float3 &vector) const { return this->Transform(vector); }
893 float4 MUST_USE_RESULT Quat::Mul(const float4 &vector) const { return this->Transform(vector); }
894
895 const Quat Quat::identity = Quat(0.f, 0.f, 0.f, 1.f);
896 const Quat Quat::nan = Quat(FLOAT_NAN, FLOAT_NAN, FLOAT_NAN, FLOAT_NAN);
897
898 MATH_END_NAMESPACE