float4.cpp

1 /* Copyright 2011 Jukka Jyl�nki
2
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7        http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License. */
14
15 /** @file float4.cpp
16         @author Jukka Jyl�nki
17         @brief */
18 #include "float4.h"
19 #ifdef MATH_ENABLE_STL_SUPPORT
20 #include "myassert.h"
21 #include <utility>
22 #include <iostream>
23 #endif
24
25 #include <stdlib.h>
26
27 #include "float2.h"
28 #include "float3.h"
29 #include "../Geometry/AABB.h"
30 #include "../Geometry/Sphere.h"
31 #include "../Algorithm/Random/LCG.h"
32 #include "float4x4.h"
33 #include "MathFunc.h"
34 #include "SSEMath.h"
35 #include "float4_sse.h"
36 #include "float4_neon.h"
37
38 MATH_BEGIN_NAMESPACE
39
40 using namespace std;
41
42 float4::float4(float x_, float y_, float z_, float w_)
43 #if !defined(MATH_AUTOMATIC_SSE)
44         // Best: 8.449 nsecs / 23.376 ticks, Avg: 8.837 nsecs, Worst: 9.601 nsecs
45 :x(x_), y(y_), z(z_), w(w_)
46 #endif
47 {
48 #if defined(MATH_AUTOMATIC_SSE)
49         // Best: 1.536 nsecs / 4.2 ticks, Avg: 1.609 nsecs, Worst: 1.920 nsecs
50         v = set_ps(w_, z_, y_, x_);
51 #endif
52 }
53
54 float4::float4(const float3 &xyz, float w_)
55 #if !defined(MATH_AUTOMATIC_SSE) || !defined(MATH_SSE)
56 // Best: 5.761 nsecs / 15.872 ticks, Avg: 6.237 nsecs, Worst: 7.681 nsecs
57 :x(xyz.x), y(xyz.y), z(xyz.z), w(w_)
58 #endif
59 {
60 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
61         // Best: 1.536 nsecs / 4.032 ticks, Avg: 1.540 nsecs, Worst: 1.920 nsecs
62         v = load_vec3(xyz.ptr(), w_);
63 #endif
64 }
65
66 float4::float4(float x_, float y_, const float2 &zw)
67 #if !defined(MATH_AUTOMATIC_SSE)
68 :x(x_), y(y_), z(zw.x), w(zw.y)
69 #endif
70 {
71 #if defined(MATH_AUTOMATIC_SSE)
72         v = set_ps(zw.y, zw.x, y_, x_);
73 #endif
74 }
75
76 float4::float4(float x_, const float2 &yz, float w_)
77 #if !defined(MATH_AUTOMATIC_SSE)
78 :x(x_), y(yz.x), z(yz.y), w(w_)
79 #endif
80 {
81 #if defined(MATH_AUTOMATIC_SSE)
82         v = set_ps(w_, yz.y, yz.x, x_);
83 #endif
84 }
85
86 float4::float4(float x_, const float3 &yzw)
87 #if !defined(MATH_AUTOMATIC_SSE)
88 :x(x_), y(yzw.x), z(yzw.y), w(yzw.z)
89 #endif
90 {
91 #if defined(MATH_AUTOMATIC_SSE)
92         v = set_ps(yzw.z, yzw.y, yzw.x, x_);
93 #endif
94 }
95
96 float4::float4(const float2 &xy, float z_, float w_)
97 #if !defined(MATH_AUTOMATIC_SSE)
98 :x(xy.x), y(xy.y), z(z_), w(w_)
99 #endif
100 {
101 #if defined(MATH_AUTOMATIC_SSE)
102         v = set_ps(w_, z_, xy.y, xy.x);
103 #endif
104 }
105
106 float4::float4(const float2 &xy, const float2 &zw)
107 #if !defined(MATH_AUTOMATIC_SSE)
108 :x(xy.x), y(xy.y), z(zw.x), w(zw.y)
109 #endif
110 {
111 #if defined(MATH_AUTOMATIC_SSE)
112         v = set_ps(zw.y, zw.x, xy.y, xy.x);
113 #endif
114 }
115
116 float4::float4(const float *data)
117 {
118         assume(data);
119 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
120         if (!data)
121                 return;
122 #endif
123 #if defined(MATH_AUTOMATIC_SSE)
124         v = loadu_ps(data);
125 #else
126         x = data[0];
127         y = data[1];
128         z = data[2];
129         w = data[3];
130 #endif
131 }
132
133 CONST_WIN32 float float4::At(int index) const
134 {
135         assume(index >= 0);
136         assume(index < Size);
137 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
138         if (index < 0 || index >= Size)
139                 return FLOAT_NAN;
140 #endif
141         return ptr()[index];
142 }
143
144 float &float4::At(int index)
145 {
146         assume(index >= 0);
147         assume(index < Size);
148 #ifndef MATH_ENABLE_INSECURE_OPTIMIZATIONS
149         if (index < 0 || index >= Size)
150                 return ptr()[0];
151 #endif
152         return ptr()[index];
153 }
154
155 float2 float4::xy() const
156 {
157         return float2(x, y);
158 }
159
160 float3 float4::xyz() const
161 {
162         return float3(x, y, z);
163 }
164
165 float2 float4::Swizzled(int i, int j) const
166 {
167         return float2(At(i), At(j));
168 }
169
170 float3 float4::Swizzled(int i, int j, int k) const
171 {
172         return float3(At(i), At(j), At(k));
173 }
174
175 float4 float4::Swizzled(int i, int j, int k, int l) const
176 {
177 #if defined(MATH_AVX) && defined MATH_AUTOMATIC_SSE
178         return vec4_permute(v, i, j, k, l);
179         ///\todo How to perform an efficient swizzle if AVX is not available?
180         ///      We need a dynamic runtime shuffle operation, so _mm_shuffle_ps
181         ///      cannot be used.
182 #else
183         return float4(At(i), At(j), At(k), At(l));
184 #endif
185 }
186
187 float4 float4::xxxx() const
188 {
189 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
190         return xxxx_ps(v);
191 #else
192         return float4::FromScalar(x);
193 #endif
194 }
195
196 float4 float4::yyyy() const
197 {
198 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
199         return yyyy_ps(v);
200 #else
201         return float4::FromScalar(x);
202 #endif
203 }
204
205 float4 float4::zzzz() const
206 {
207 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
208         return zzzz_ps(v);
209 #else
210         return float4::FromScalar(x);
211 #endif
212 }
213
214 float4 float4::xxxw() const
215 {
216 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
217         return shuffle1_ps(v, _MM_SHUFFLE(3, 0, 0, 0));
218 #else
219         return float4(x, x, x, w);
220 #endif
221 }
222
223 float4 float4::yyyw() const
224 {
225 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
226         return shuffle1_ps(v, _MM_SHUFFLE(3, 1, 1, 1));
227 #else
228         return float4(y, y, y, w);
229 #endif
230 }
231
232 float4 float4::zzzw() const
233 {
234 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
235         return shuffle1_ps(v, _MM_SHUFFLE(3, 2, 2, 2));
236 #else
237         return float4(z, z, z, w);
238 #endif
239 }
240
241 float4 float4::wwww() const
242 {
243 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
244         return wwww_ps(v);
245 #else
246         return float4::FromScalar(x);
247 #endif
248 }
249
250 #ifdef MATH_SIMD
251
252 /// The returned vector contains the squared length of the float3 part in the lowest channel of the vector.
253 ///\todo Delete this function.
254 simd4f float4::LengthSq3_SSE() const
255 {
256         return dot3_ps3(v, v);
257 }
258
259 /// The returned vector contains the length of the float3 part in each channel of the vector.
260 ///\todo Delete this function.
261 simd4f float4::Length3_SSE() const
262 {
263         return sqrt_ps(dot3_ps(v, v));
264 }
265
266 /// The returned vector contains the squared length of the float4 in each channel of the vector.
267 simd4f float4::LengthSq4_SSE() const
268 {
269         return dot4_ps(v, v);
270 }
271
272 /// The returned vector contains the length of the float4 in each channel of the vector.
273 simd4f float4::Length4_SSE() const
274 {
275         return sqrt_ps(dot4_ps(v, v));
276 }
277
278 void float4::Normalize3_Fast_SSE()
279 {
280         simd4f len = Length3_SSE();
281         simd4f normalized = div_ps(v, len); // Normalize.
282         v = cmov_ps(v, normalized, sseMaskXYZ); // Return the original .w component to the vector (this function is supposed to preserve original .w).
283 }
284
285 simd4f float4::Normalize4_SSE()
286 {
287         simd4f len = Length4_SSE();
288         simd4f isZero = cmplt_ps(len, simd4fEpsilon); // Was the length zero?
289         simd4f normalized = div_ps(v, len); // Normalize.
290         v = cmov_ps(normalized, float4::unitX.v, isZero); // If length == 0, output the vector (1,0,0,0).
291         return len;
292 }
293
294 void float4::Normalize4_Fast_SSE()
295 {
296         simd4f recipLen = rsqrt_ps(dot4_ps(v, v));
297         v = mul_ps(v, recipLen);
298 }
299
300 void float4::NormalizeW_SSE()
301 {
302 #ifdef MATH_SSE
303         simd4f div = wwww_ps(v);
304         v = div_ps(v, div);
305 #elif defined(MATH_NEON)
306         v = div_ps(v, vdupq_n_f32(vgetq_lane_f32(v, 3)));
307 #endif
308 }
309
310 #endif
311
312 float float4::LengthSq3() const
313 {
314 #ifdef MATH_AUTOMATIC_SSE
315         return vec3_length_sq_float(v);
316 #else
317         return x*x + y*y + z*z;
318 #endif
319 }
320
321 float float4::Length3() const
322 {
323 #ifdef MATH_AUTOMATIC_SSE
324         return vec3_length_float(v);
325 #else
326         return Sqrt(x*x + y*y + z*z);
327 #endif
328 }
329
330 float float4::LengthSq4() const
331 {
332 #ifdef MATH_AUTOMATIC_SSE
333         return vec4_length_sq_float(v);
334 #else
335         return x*x + y*y + z*z + w*w;
336 #endif
337 }
338
339 float float4::Length4() const
340 {
341 #ifdef MATH_AUTOMATIC_SSE
342         return vec4_length_float(v);
343 #else
344         return Sqrt(x*x + y*y + z*z + w*w);
345 #endif
346 }
347
348 float float4::Normalize3()
349 {
350 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
351         simd4f origLength;
352         v = vec4_safe_normalize3(v, origLength);
353         return s4f_x(origLength);
354 #else
355         assume(IsFinite());
356         float lengthSq = LengthSq3();
357         if (lengthSq > 1e-6f)
358         {
359                 float length = Sqrt(lengthSq);
360                 float invLength = 1.f / length;
361                 x *= invLength;
362                 y *= invLength;
363                 z *= invLength;
364                 return length;
365         }
366         else
367         {
368                 Set(1.f, 0.f, 0.f, w); // We will always produce a normalized vector.
369                 return 0; // But signal failure, so user knows we have generated an arbitrary normalization.
370         }
371 #endif
372 }
373
374 float4 float4::Normalized3() const
375 {
376 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
377         simd4f origLength;
378         return vec4_safe_normalize3(v, origLength);
379 #else
380         float4 copy = *this;
381         float length = copy.Normalize3();
382         assume(length > 0);
383         MARK_UNUSED(length);
384         return copy;
385 #endif
386 }
387
388 float float4::Normalize4()
389 {
390 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
391         simd4f len = Normalize4_SSE();
392         return s4f_x(len);
393 #else
394         assume(IsFinite());
395         float lengthSq = LengthSq4();
396         if (lengthSq > 1e-6f)
397         {
398                 float length = Sqrt(lengthSq);
399                 *this *= 1.f / length;
400                 return length;
401         }
402         else
403         {
404                 Set(1.f, 0.f, 0.f, 0.f); // We will always produce a normalized vector.
405                 return 0; // But signal failure, so user knows we have generated an arbitrary normalization.
406         }
407 #endif
408 }
409
410 float4 float4::Normalized4() const
411 {
412         float4 copy = *this;
413         float length = copy.Normalize4();
414         assume(length > 0);
415         MARK_UNUSED(length);
416         return copy;
417 }
418
419 void float4::NormalizeW()
420 {
421 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
422         NormalizeW_SSE();
423 #else
424         if (MATH_NS::Abs(w) > 1e-6f)
425         {
426                 float invW = 1.f / w;
427                 x *= invW;
428                 y *= invW;
429                 z *= invW;
430                 w = 1.f;
431         }
432 #endif
433 }
434
435 bool float4::IsWZeroOrOne(float epsilon) const
436 {
437         return EqualAbs(w, 0.f, epsilon) || EqualAbs(w, 1.f, epsilon);
438 }
439
440 bool float4::IsZero4(float epsilonSq) const
441 {
442         return LengthSq4() <= epsilonSq;
443 }
444
445 bool float4::IsZero3(float epsilonSq) const
446 {
447         return LengthSq3() <= epsilonSq;
448 }
449
450 bool float4::IsNormalized4(float epsilonSq) const
451 {
452         return MATH_NS::Abs(LengthSq4()-1.f) <= epsilonSq;
453 }
454
455 bool float4::IsNormalized3(float epsilonSq) const
456 {
457         return MATH_NS::Abs(LengthSq3()-1.f) <= epsilonSq;
458 }
459
460 void float4::Scale3(float scalar)
461 {
462 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
463         simd4f scale = setx_ps(scalar);
464         scale = _mm_shuffle_ps(scale, simd4fOne, _MM_SHUFFLE(0,0,0,0)); // scale = (1 1 s s)
465         scale = shuffle1_ps(scale, _MM_SHUFFLE(3,0,0,0)); // scale = (1 s s s)
466         v = _mm_mul_ps(v, scale);
467 #else
468         x *= scalar;
469         y *= scalar;
470         z *= scalar;
471 #endif
472 }
473
474 float float4::ScaleToLength3(float newLength)
475 {
476         ///\todo Add SSE-enabled version.
477         ///\todo Add ClampToLength3.
478         float length = LengthSq3();
479         if (length < 1e-6f)
480                 return 0.f;
481
482         length = Sqrt(length);
483         float scalar = newLength / length;
484         x *= scalar;
485         y *= scalar;
486         z *= scalar;
487         return length;
488 }
489
490 float4 float4::ScaledToLength3(float newLength) const
491 {
492         assume(!IsZero3());
493
494         float4 v = *this;
495         v.ScaleToLength3(newLength);
496         return v;
497 }
498
499 float float4::ScaleToLength(float newLength)
500 {
501         float length = Length();
502         float scalar = newLength / length;
503         *this *= scalar;
504         return length;
505 }
506
507 float4 float4::ScaledToLength(float newLength) const
508 {
509         float4 v = *this;
510         v.ScaleToLength(newLength);
511         return v;
512 }
513
514 bool float4::IsFinite() const
515 {
516         return MATH_NS::IsFinite(x) && MATH_NS::IsFinite(y) && MATH_NS::IsFinite(z) && MATH_NS::IsFinite(w);
517 }
518
519 bool float4::IsPerpendicular3(const float4 &other, float epsilonSq) const
520 {
521         float dot = Dot3(other);
522         return dot*dot <= epsilonSq * LengthSq() * other.LengthSq();
523 }
524
525 bool float4::IsPerpendicular(const float4 &other, float epsilonSq) const
526 {
527         float dot = Dot(other);
528         return dot*dot <= epsilonSq * LengthSq() * other.LengthSq();
529 }
530
531 bool IsNeutralCLocale();
532
533 #ifdef MATH_ENABLE_STL_SUPPORT
534 std::string float4::ToString() const
535 {
536         char str[256];
537         sprintf(str, "(%.3f, %.3f, %.3f, %.3f)", x, y, z, w);
538         return std::string(str);
539 }
540
541 std::string float4::SerializeToString() const
542 {
543         char str[256];
544         char *s = SerializeFloat(x, str); *s = ','; ++s;
545         s = SerializeFloat(y, s); *s = ','; ++s;
546         s = SerializeFloat(z, s); *s = ','; ++s;
547         s = SerializeFloat(w, s);
548         assert(s+1 - str < 256);
549         MARK_UNUSED(s);
550         return str;
551 }
552
553 std::string float4::SerializeToCodeString() const
554 {
555         return "float4(" + SerializeToString() + ")";
556 }
557 #endif
558
559 float4 float4::FromString(const char *str, const char **outEndStr)
560 {
561         assert(IsNeutralCLocale());
562         assume(str);
563         if (!str)
564                 return float4::nan;
565         MATH_SKIP_WORD(str, "float4");
566         MATH_SKIP_WORD(str, "(");
567         float4 f;
568         f.x = DeserializeFloat(str, &str);
569         f.y = DeserializeFloat(str, &str);
570         f.z = DeserializeFloat(str, &str);
571         f.w = DeserializeFloat(str, &str);
572         if (*str == ')')
573                 ++str;
574         if (*str == ',')
575                 ++str;
576         if (outEndStr)
577                 *outEndStr = str;
578         return f;
579 }
580
581 float float4::SumOfElements() const
582 {
583 #ifdef MATH_AUTOMATIC_SSE
584         return sum_xyzw_float(v);
585 #else
586         return x + y + z + w;
587 #endif
588 }
589
590 float float4::ProductOfElements() const
591 {
592 #ifdef MATH_AUTOMATIC_SSE
593         return mul_xyzw_float(v);
594 #else
595         return x * y * z * w;
596 #endif
597 }
598
599 float float4::AverageOfElements() const
600 {
601         return 0.25f * SumOfElements();
602 }
603
604 float float4::MinElement() const
605 {
606         return MATH_NS::Min(MATH_NS::Min(x, y), MATH_NS::Min(z, w));
607 }
608
609 int float4::MinElementIndex() const
610 {
611         if (x < y)
612         {
613                 if (z < w)
614                         return (x < z) ? 0 : 2;
615                 else
616                         return (x < w) ? 0 : 3;
617         }
618         else
619         {
620                 if (z < w)
621                         return (y < z) ? 1 : 2;
622                 else
623                         return (y < w) ? 1 : 3;
624         }
625 }
626
627 float float4::MaxElement() const
628 {
629         return MATH_NS::Max(MATH_NS::Max(x, y), MATH_NS::Min(z, w));
630 }
631
632 int float4::MaxElementIndex() const
633 {
634         if (x > y)
635         {
636                 if (z > w)
637                         return (x > z) ? 0 : 2;
638                 else
639                         return (x > w) ? 0 : 3;
640         }
641         else
642         {
643                 if (z > w)
644                         return (y > z) ? 1 : 2;
645                 else
646                         return (y > w) ? 1 : 3;
647         }
648 }
649
650 float4 float4::Abs() const
651 {
652 #ifdef MATH_AUTOMATIC_SSE
653         return abs_ps(v);
654 #else
655         return float4(MATH_NS::Abs(x), MATH_NS::Abs(y), MATH_NS::Abs(z), MATH_NS::Abs(w));
656 #endif
657 }
658
659 float4 float4::Neg3() const
660 {
661 #ifdef MATH_AUTOMATIC_SSE
662         return negate3_ps(v);
663 #else
664         return float4(-x, -y, -z, w);
665 #endif
666 }
667
668 float4 float4::Neg4() const
669 {
670 #ifdef MATH_AUTOMATIC_SSE
671         return negate_ps(v);
672 #else
673         return float4(-x, -y, -z, -w);
674 #endif
675 }
676
677 float4 float4::Recip3() const
678 {
679         ///\todo SSE.
680         return float4(1.f/x, 1.f/y, 1.f/z, w);
681 }
682
683 float4 float4::Recip4() const
684 {
685 #ifdef MATH_AUTOMATIC_SSE
686         return vec4_recip(v);
687 #else
688         return float4(1.f/x, 1.f/y, 1.f/z, 1.f/w);
689 #endif
690 }
691
692 float4 float4::RecipFast4() const
693 {
694 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
695         return float4(_mm_rcp_ps(v));
696 #else
697         return float4(1.f/x, 1.f/y, 1.f/z, 1.f/w);
698 #endif
699 }
700
701 float4 float4::Min(float ceil) const
702 {
703 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
704         return float4(min_ps(v, set1_ps(ceil)));
705 #else
706         return float4(MATH_NS::Min(x, ceil), MATH_NS::Min(y, ceil), MATH_NS::Min(z, ceil), MATH_NS::Min(w, ceil));
707 #endif
708 }
709
710 float4 float4::Min(const float4 &ceil) const
711 {
712 #ifdef MATH_AUTOMATIC_SSE
713         return float4(min_ps(v, ceil.v));
714 #else
715         return float4(MATH_NS::Min(x, ceil.x), MATH_NS::Min(y, ceil.y), MATH_NS::Min(z, ceil.z), MATH_NS::Min(w, ceil.w));
716 #endif
717 }
718
719 float4 float4::Max(float floor) const
720 {
721 #ifdef MATH_AUTOMATIC_SSE
722         return float4(max_ps(v, set1_ps(floor)));
723 #else
724         return float4(MATH_NS::Max(x, floor), MATH_NS::Max(y, floor), MATH_NS::Max(z, floor), MATH_NS::Max(w, floor));
725 #endif
726 }
727
728 float4 float4::Max(const float4 &floor) const
729 {
730 #ifdef MATH_AUTOMATIC_SSE
731         return float4(max_ps(v, floor.v));
732 #else
733         return float4(MATH_NS::Max(x, floor.x), MATH_NS::Max(y, floor.y), MATH_NS::Max(z, floor.z), MATH_NS::Max(w, floor.w));
734 #endif
735 }
736
737 float4 float4::Clamp(const float4 &floor, const float4 &ceil) const
738 {
739 #ifdef MATH_AUTOMATIC_SSE
740         return float4(max_ps(min_ps(v, ceil.v), floor.v));
741 #else
742         return float4(MATH_NS::Clamp(x, floor.x, ceil.x),
743                                   MATH_NS::Clamp(y, floor.y, ceil.y),
744                                   MATH_NS::Clamp(z, floor.z, ceil.z),
745                                   MATH_NS::Clamp(w, floor.w, ceil.w));
746 #endif
747 }
748
749 float4 float4::Clamp01() const
750 {
751 #ifdef MATH_AUTOMATIC_SSE
752         return float4(max_ps(min_ps(v, simd4fOne), simd4fZero));
753 #else
754         return float4(MATH_NS::Clamp(x, 0.f, 1.f),
755                                   MATH_NS::Clamp(y, 0.f, 1.f),
756                                   MATH_NS::Clamp(z, 0.f, 1.f),
757                                   MATH_NS::Clamp(w, 0.f, 1.f));
758 #endif
759 }
760
761 float4 float4::Clamp(float floor, float ceil) const
762 {
763 #ifdef MATH_AUTOMATIC_SSE
764         return float4(max_ps(min_ps(v, set1_ps(ceil)), set1_ps(floor)));
765 #else
766         return float4(MATH_NS::Clamp(x, floor, ceil),
767                                   MATH_NS::Clamp(y, floor, ceil),
768                                   MATH_NS::Clamp(z, floor, ceil),
769                                   MATH_NS::Clamp(w, floor, ceil));
770 #endif
771 }
772
773 float float4::Distance3Sq(const float4 &rhs) const
774 {
775 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
776         return vec3_length_sq_float(sub_ps(v, rhs.v));
777 #else
778         float dx = x - rhs.x;
779         float dy = y - rhs.y;
780         float dz = z - rhs.z;
781         return dx*dx + dy*dy + dz*dz;
782 #endif
783 }
784
785 float float4::Distance3(const float4 &rhs) const
786 {
787 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
788         return vec3_length_float(sub_ps(v, rhs.v));
789 #else
790         return Sqrt(Distance3Sq(rhs));
791 #endif
792 }
793
794 float float4::Distance4Sq(const float4 &rhs) const
795 {
796 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
797         return vec4_length_sq_float(sub_ps(v, rhs.v));
798 #else
799         float dx = x - rhs.x;
800         float dy = y - rhs.y;
801         float dz = z - rhs.z;
802         float dw = w - rhs.w;
803         return dx*dx + dy*dy + dz*dz + dw*dw;
804 #endif
805 }
806
807 float float4::Distance4(const float4 &rhs) const
808 {
809 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
810         return vec4_length_float(sub_ps(v, rhs.v));
811 #else
812         return Sqrt(Distance4Sq(rhs));
813 #endif
814 }
815
816 float float4::Dot3(const float3 &rhs) const
817 {
818 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
819         return dot3_float(v, float4(rhs, 0.f));
820 #else
821         return x * rhs.x + y * rhs.y + z * rhs.z;
822 #endif
823 }
824
825 float float4::Dot3(const float4 &rhs) const
826 {
827 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
828         return dot3_float(v, rhs.v);
829 #else
830         return x * rhs.x + y * rhs.y + z * rhs.z;
831 #endif
832 }
833
834 float float4::Dot4(const float4 &rhs) const
835 {
836 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
837         return dot4_float(v, rhs.v);
838 #else
839         return x * rhs.x + y * rhs.y + z * rhs.z + w * rhs.w;
840 #endif
841 }
842
843 /** dst = A x B - Apply the diagonal rule to derive the standard cross product formula:
844 \code
845             |a cross b| = |a||b|sin(alpha)
846
847             i            j            k            i            j            k        units (correspond to x,y,z)
848             a.x          a.y          a.z          a.x          a.y          a.z      vector a (this)
849             b.x          b.y          b.z          b.x          b.y          b.z      vector b
850         -a.z*b.y*i   -a.x*b.z*j   -a.y*b.x*k    a.y*b.z*i    a.z*b.x*j    a.x*b.y*k   result
851
852         Add up the results:
853             x = a.y*b.z - a.z*b.y
854             y = a.z*b.x - a.x*b.z
855             z = a.x*b.y - a.y*b.x
856 \endcode
857
858 Cross product is anti-commutative, i.e. a x b == -b x a.
859 It distributes over addition, meaning that a x (b + c) == a x b + a x c,
860 and combines with scalar multiplication: (sa) x b == a x (sb).
861 i x j == -(j x i) == k,
862 (j x k) == -(k x j) == i,
863 (k x i) == -(i x k) == j. */
864 float4 float4::Cross3(const float3 &rhs) const
865 {
866 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
867         return float4(cross_ps(v, load_vec3(rhs.ptr(), 0.f)));
868 #else
869         float4 dst;
870         dst.x = y * rhs.z - z * rhs.y;
871         dst.y = z * rhs.x - x * rhs.z;
872         dst.z = x * rhs.y - y * rhs.x;
873         dst.w = 0.f;
874         return dst;
875 #endif
876 }
877
878 float4 float4::Cross3(const float4 &rhs) const
879 {
880 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
881         assert((((uintptr_t)&rhs) & 15) == 0); // For SSE ops we must be 16-byte aligned.
882         assert((((uintptr_t)this) & 15) == 0);
883         return float4(cross_ps(v, rhs.v));
884 #else
885         return Cross3(rhs.xyz());
886 #endif
887 }
888
889 float4x4 float4::OuterProduct(const float4 &rhs) const
890 {
891         const float4 &u = *this;
892         const float4 &v = rhs;
893         return float4x4(u[0]*v[0], u[0]*v[1], u[0]*v[2], u[0]*v[3],
894                                         u[1]*v[0], u[1]*v[1], u[1]*v[2], u[1]*v[3],
895                                         u[2]*v[0], u[2]*v[1], u[2]*v[2], u[2]*v[3],
896                                         u[3]*v[0], u[3]*v[1], u[3]*v[2], u[3]*v[3]);
897 }
898
899 float4 float4::Perpendicular3(const float3 &hint, const float3 &hint2) const
900 {
901         assume(!this->IsZero3());
902         assume(EqualAbs(w, 0));
903         assume(hint.IsNormalized());
904         assume(hint2.IsNormalized());
905         float3 v = this->Cross3(hint).xyz();
906         float len = v.Normalize();
907         if (len == 0)
908                 return float4(hint2, 0);
909         else
910                 return float4(v, 0);
911 }
912
913 float4 float4::Perpendicular(const float4 &hint, const float4 &hint2) const
914 {
915         assume(!this->IsZero3());
916         assume(EqualAbs(w, 0));
917         assume(hint.IsNormalized());
918         assume(hint2.IsNormalized());
919         float4 v = this->Cross(hint);
920         float len = v.Normalize();
921         if (len == 0)
922                 return hint2;
923         else
924                 return v;
925 }
926
927 float4 float4::AnotherPerpendicular3(const float3 &hint, const float3 &hint2) const
928 {
929         float4 firstPerpendicular = Perpendicular3(hint, hint2);
930         float4 v = this->Cross3(firstPerpendicular);
931         return v.Normalized3();
932 }
933
934 float4 float4::AnotherPerpendicular(const float4 &hint, const float4 &hint2) const
935 {
936         float4 firstPerpendicular = Perpendicular(hint, hint2);
937         float4 v = this->Cross(firstPerpendicular);
938         return v.Normalized();
939 }
940
941 void float4::PerpendicularBasis(float4 &outB, float4 &outC) const
942 {
943 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
944         // Benchmark 'float4_PerpendicularBasis': float4::PerpendicularBasis
945         //   Best: 17.468 nsecs / 29.418 ticks, Avg: 17.703 nsecs, Worst: 19.275 nsecs
946         basis_ps(this->v, &outB.v, &outC.v);
947 #else
948         // Benchmark 'float4_PerpendicularBasis': float4::PerpendicularBasis
949         //   Best: 33.731 nsecs / 57.715 ticks, Avg: 35.080 nsecs, Worst: 39.152 nsecs
950         float4 a = this->Abs();
951         // Choose from (1,0,0), (0,1,0), and (0,0,1) the one that's most perpendicular to this vector.
952         float4 q;
953         if (a.x <= a.y)
954         {
955                 if (a.x <= a.z) q = float4(1,0,0,0);
956                 else q = float4(0,0,1,0);
957         }
958         else if (a.y <= a.z) q = float4(0,1,0,0);
959         else q = float4(0,0,1,0);
960
961         outB = this->Cross(q).Normalized();
962         outC = this->Cross(outB).Normalized();
963 #endif
964 }
965
966 float4 float4::RandomPerpendicular(LCG &rng) const
967 {
968         return Perpendicular(RandomDir(rng));
969 }
970
971 float4 float4::Reflect3(const float3 &normal) const
972 {
973         assume2(normal.IsNormalized(), normal.SerializeToCodeString(), normal.Length());
974         assume(EqualAbs(w, 0));
975         return 2.f * this->ProjectToNorm3(normal) - *this;
976 }
977
978 float4 float4::Reflect(const float4 &normal) const
979 {
980         assume2(normal.IsNormalized(), normal.SerializeToCodeString(), normal.Length());
981         assume(EqualAbs(w, 0));
982         return 2.f * this->ProjectToNorm(normal) - *this;
983 }
984
985 /// Implementation from http://www.flipcode.com/archives/reflection_transmission.pdf .
986 float4 float4::Refract(const float4 &normal, float negativeSideRefractionIndex, float positiveSideRefractionIndex) const
987 {
988         // This code is duplicated in float2::Refract.
989         float n = negativeSideRefractionIndex / positiveSideRefractionIndex;
990         float cosI = this->Dot(normal);
991         float sinT2 = n*n*(1.f - cosI*cosI);
992         if (sinT2 > 1.f) // Total internal reflection occurs?
993                 return (-*this).Reflect(normal);
994         return n * *this - (n + Sqrt(1.f - sinT2)) * normal;
995 }
996
997 float float4::AngleBetween3(const float4 &other) const
998 {
999         float cosa = Dot3(other) / Sqrt(LengthSq3() * other.LengthSq3());
1000         if (cosa >= 1.f)
1001                 return 0.f;
1002         else if (cosa <= -1.f)
1003                 return pi;
1004         else
1005                 return acos(cosa);
1006 }
1007
1008 float float4::AngleBetweenNorm3(const float4 &other) const
1009 {
1010         assume(this->IsNormalized3());
1011         assume(other.IsNormalized3());
1012         return acos(Dot3(other));
1013 }
1014
1015 float float4::AngleBetween4(const float4 &other) const
1016 {
1017         float cosa = Dot4(other) / Sqrt(LengthSq4() * other.LengthSq4());
1018         if (cosa >= 1.f)
1019                 return 0.f;
1020         else if (cosa <= -1.f)
1021                 return pi;
1022         else
1023                 return acos(cosa);
1024 }
1025
1026 float float4::AngleBetweenNorm4(const float4 &other) const
1027 {
1028         assume(this->IsNormalized4());
1029         assume(other.IsNormalized4());
1030         return acos(Dot4(other));
1031 }
1032
1033 float4 float4::ProjectTo3(const float3 &target) const
1034 {
1035         assume(!target.IsZero());
1036         assume(this->IsWZeroOrOne());
1037         return float4(target * MATH_NS::Dot(xyz(), target) / target.LengthSq(), w);
1038 }
1039
1040 float4 float4::ProjectTo(const float4 &target) const
1041 {
1042         assume(!target.IsZero());
1043         assume(this->IsWZeroOrOne());
1044         return target * (this->Dot(target) / target.LengthSq());
1045 }
1046
1047 float4 float4::ProjectToNorm3(const float3 &target) const
1048 {
1049         assume(target.IsNormalized());
1050         assume(this->IsWZeroOrOne());
1051         return float4(target * MATH_NS::Dot(xyz(), target), w);
1052 }
1053
1054 float4 float4::ProjectToNorm(const float4 &target) const
1055 {
1056         assume(target.IsNormalized());
1057         assume(this->IsWZeroOrOne());
1058         return target * this->Dot(target);
1059 }
1060
1061 bool MUST_USE_RESULT float4::AreCollinear(const float4 &p1, const float4 &p2, const float4 &p3, float epsilon)
1062 {
1063         return (p2 - p1).Cross(p3 - p1).LengthSq() <= epsilon;
1064 }
1065
1066 float4 float4::Lerp(const float4 &b, float t) const
1067 {
1068         assume(EqualAbs(this->w, b.w));
1069         assume(0.f <= t && t <= 1.f);
1070 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
1071         return vec4_lerp(v, b.v, t);
1072 #else
1073         return (1.f - t) * *this + t * b;
1074 #endif
1075 }
1076
1077 float4 float4::Lerp(const float4 &a, const float4 &b, float t)
1078 {
1079         return a.Lerp(b, t);
1080 }
1081
1082 bool MUST_USE_RESULT float4::AreOrthogonal(const float4 &a, const float4 &b, float epsilon)
1083 {
1084         return a.IsPerpendicular(b, epsilon);
1085 }
1086
1087 bool MUST_USE_RESULT float4::AreOrthogonal(const float4 &a, const float4 &b, const float4 &c, float epsilon)
1088 {
1089         return a.IsPerpendicular(b, epsilon) &&
1090                a.IsPerpendicular(c, epsilon) &&
1091                b.IsPerpendicular(c, epsilon);
1092 }
1093
1094 void float4::Orthonormalize(float4 &a, float4 &b)
1095 {
1096         assume(!a.IsZero());
1097         assume(!b.IsZero());
1098         a.Normalize();
1099         b -= b.ProjectToNorm(a);
1100         b.Normalize();
1101 }
1102
1103 void float4::Orthonormalize(float4 &a, float4 &b, float4 &c)
1104 {
1105         assume(!a.IsZero());
1106         a.Normalize();
1107         b -= b.ProjectToNorm(a);
1108         assume(!b.IsZero());
1109         b.Normalize();
1110         c -= c.ProjectToNorm(a);
1111         c -= c.ProjectToNorm(b);
1112         assume(!c.IsZero());
1113         c.Normalize();
1114 }
1115
1116 bool MUST_USE_RESULT float4::AreOrthonormal(const float4 &a, const float4 &b, float epsilon)
1117 {
1118         return a.IsPerpendicular(b, epsilon) && a.IsNormalized(epsilon*epsilon) && b.IsNormalized(epsilon*epsilon);
1119 }
1120
1121 bool MUST_USE_RESULT float4::AreOrthonormal(const float4 &a, const float4 &b, const float4 &c, float epsilon)
1122 {
1123         return a.IsPerpendicular(b, epsilon) &&
1124                 a.IsPerpendicular(c, epsilon) &&
1125                 b.IsPerpendicular(c, epsilon) &&
1126                 a.IsNormalized(epsilon*epsilon) &&
1127                 b.IsNormalized(epsilon*epsilon) &&
1128                 c.IsNormalized(epsilon*epsilon);
1129 }
1130
1131 float4 float4::FromScalar(float scalar)
1132 {
1133 #ifdef MATH_AUTOMATIC_SSE
1134         return set1_ps(scalar);
1135 #else
1136         return float4(scalar, scalar, scalar, scalar);
1137 #endif
1138 }
1139
1140 float4 float4::FromScalar(float scalar, float w_)
1141 {
1142 #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
1143         simd4f s = set1_ps(scalar);
1144         simd4f highPart = _mm_unpacklo_ps(s, _mm_set_ss(w_)); // [_ _ w s]
1145         return _mm_movelh_ps(s, highPart); // [w s s s]
1146 #else
1147         return float4(scalar, scalar, scalar, w_);
1148 #endif
1149 }
1150
1151 void float4::SetFromScalar(float scalar)
1152 {
1153 #ifdef MATH_AUTOMATIC_SSE
1154         v = set1_ps(scalar);
1155 #else
1156         x = scalar;
1157         y = scalar;
1158         z = scalar;
1159         w = scalar;
1160 #endif
1161 }
1162
1163 void float4::Set(float x_, float y_, float z_, float w_)
1164 {
1165 #ifdef MATH_AUTOMATIC_SSE
1166         v = set_ps(w_, z_, y_, x_);
1167 #else
1168         x = x_;
1169         y = y_;
1170         z = z_;
1171         w = w_;
1172 #endif
1173 }
1174
1175 void float4::SetFromScalar(float scalar, float w_)
1176 {
1177 #ifdef MATH_AUTOMATIC_SSE
1178 #ifdef MATH_SSE
1179         simd4f s = set1_ps(scalar);
1180         simd4f highPart = _mm_unpacklo_ps(s, _mm_set_ss(w_)); // [_ _ w s]
1181         v = _mm_movelh_ps(s, highPart); // [w s s s]
1182 #else
1183         v = set_ps(w_, scalar, scalar, scalar);
1184 #endif
1185 #else
1186         x = scalar;
1187         y = scalar;
1188         z = scalar;
1189         w = w_;
1190 #endif
1191 }
1192
1193 void float4::SetFromSphericalCoordinates(float azimuth, float inclination, float radius)
1194 {
1195         float cx = Cos(inclination);
1196         float sin, cos;
1197         SinCos(azimuth, sin, cos);
1198         x = cx * sin * radius;
1199         y = -Sin(inclination) * radius;
1200         z = cx * cos * radius;
1201         w = 0.f;
1202 }
1203
1204 float4 MUST_USE_RESULT float4::FromSphericalCoordinates(float azimuth, float inclination, float radius)
1205 {
1206         float4 v;
1207         v.SetFromSphericalCoordinates(azimuth, inclination, radius);
1208         return v;
1209 }
1210
1211 void float4::SetFromSphericalCoordinates(float azimuth, float inclination)
1212 {
1213         float4 v, s, c;
1214         v.x = inclination;
1215         v.y = azimuth;
1216         SinCos2(v, s, c);
1217         x = c.x * s.y;
1218         y = -s.x;
1219         z = c.x * c.y;
1220         w = 0.f;
1221 }
1222
1223 float4 MUST_USE_RESULT float4::FromSphericalCoordinates(float azimuth, float inclination)
1224 {
1225         float4 v;
1226         v.SetFromSphericalCoordinates(azimuth, inclination);
1227         return v;
1228 }
1229
1230 float3 float4::ToSphericalCoordinates() const
1231 {
1232         // R_y * R_x * (0,0,length) = (cosx*siny, -sinx, cosx*cosy).
1233         float4 v = *this;
1234         float len = v.Normalize();
1235         if (len <= 1e-5f)
1236                 return float3::zero;
1237         float azimuth = atan2(v.x, v.z);
1238         float inclination = asin(-v.y);
1239         return float3(azimuth, inclination, len);
1240 }
1241
1242 float2 float4::ToSphericalCoordinatesNormalized() const
1243 {
1244         assume(IsNormalized());
1245         float azimuth = atan2(x, z);
1246         float inclination = asin(-y);
1247         return float2(azimuth, inclination);
1248 }
1249
1250 bool float4::Equals(const float4 &other, float epsilon) const
1251 {
1252         return MATH_NS::Abs(x - other.x) < epsilon &&
1253                MATH_NS::Abs(y - other.y) < epsilon &&
1254                MATH_NS::Abs(z - other.z) < epsilon &&
1255                MATH_NS::Abs(w - other.w) < epsilon;
1256 }
1257
1258 bool float4::Equals(float x_, float y_, float z_, float w_, float epsilon) const
1259 {
1260         return MATH_NS::Abs(x - x_) < epsilon &&
1261                MATH_NS::Abs(y - y_) < epsilon &&
1262                MATH_NS::Abs(z - z_) < epsilon &&
1263                MATH_NS::Abs(w - w_) < epsilon;
1264 }
1265
1266 bool float4::BitEquals(const float4 &other) const
1267 {
1268         return ReinterpretAsU32(x) == ReinterpretAsU32(other.x) &&
1269                 ReinterpretAsU32(y) == ReinterpretAsU32(other.y) &&
1270                 ReinterpretAsU32(z) == ReinterpretAsU32(other.z) &&
1271                 ReinterpretAsU32(w) == ReinterpretAsU32(other.w);
1272 }
1273
1274 float4 MUST_USE_RESULT float4::RandomDir(LCG &lcg, float length)
1275 {
1276         return DIR_TO_FLOAT4(Sphere(POINT_VEC_SCALAR(0.f), length).RandomPointOnSurface(lcg) - vec(POINT_VEC_SCALAR(0.f)));
1277 }
1278
1279 float4 MUST_USE_RESULT float4::RandomSphere(LCG &lcg, const float4 &center, float radius)
1280 {
1281         return POINT_TO_FLOAT4(Sphere(FLOAT4_TO_POINT(center), radius).RandomPointInside(lcg));
1282 }
1283
1284 float4 MUST_USE_RESULT float4::RandomBox(LCG &lcg, float xmin, float xmax, float ymin, float ymax, float zmin, float zmax)
1285 {
1286         return RandomBox(lcg, float4(xmin, ymin, zmin, 1.f), float4(xmax, ymax, zmax, 1.f));
1287 }
1288
1289 float4 MUST_USE_RESULT float4::RandomBox(LCG &lcg, float minElem, float maxElem)
1290 {
1291         return RandomBox(lcg, float4(minElem, minElem, minElem, 1.f), float4(maxElem, maxElem, maxElem, 1.f));
1292 }
1293
1294 float4 MUST_USE_RESULT float4::RandomBox(LCG &lcg, const float4 &minValues, const float4 &maxValues)
1295 {
1296         return POINT_TO_FLOAT4(AABB(FLOAT4_TO_POINT(minValues), FLOAT4_TO_POINT(maxValues)).RandomPointInside(lcg));
1297 }
1298
1299 float4 float4::RandomGeneral(LCG &lcg, float minElem, float maxElem)
1300 {
1301         return float4(lcg.Float(minElem, maxElem), lcg.Float(minElem, maxElem), lcg.Float(minElem, maxElem), lcg.Float(minElem, maxElem));
1302 }
1303
1304 float4 float4::operator +(const float4 &rhs) const
1305 {
1306 #ifdef MATH_AUTOMATIC_SSE
1307         return add_ps(v, rhs.v);
1308 #else
1309         return float4(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w);
1310 #endif
1311 }
1312
1313 float4 float4::operator -(const float4 &rhs) const
1314 {
1315 #ifdef MATH_AUTOMATIC_SSE
1316         return sub_ps(v, rhs.v);
1317 #else
1318         return float4(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w);
1319 #endif
1320 }
1321
1322 float4 float4::operator -() const
1323 {
1324 #ifdef MATH_AUTOMATIC_SSE
1325         return negate_ps(v);
1326 #else
1327         return float4(-x, -y, -z, -w);
1328 #endif
1329 }
1330
1331 float4 float4::operator *(float scalar) const
1332 {
1333 #ifdef MATH_AUTOMATIC_SSE
1334         return vec4_mul_float(v, scalar);
1335 #else
1336         return float4(x * scalar, y * scalar, z * scalar, w * scalar);
1337 #endif
1338 }
1339
1340 float4 operator *(float scalar, const float4 &rhs)
1341 {
1342 #ifdef MATH_AUTOMATIC_SSE
1343         return vec4_mul_float(rhs.v, scalar);
1344 #else
1345         return float4(scalar * rhs.x, scalar * rhs.y, scalar * rhs.z, scalar * rhs.w);
1346 #endif
1347 }
1348
1349 float4 float4::operator /(float scalar) const
1350 {
1351 #ifdef MATH_AUTOMATIC_SSE
1352         return vec4_div_float(v, scalar);
1353 #else
1354         float invScalar = 1.f / scalar;
1355         return float4(x * invScalar, y * invScalar, z * invScalar, w * invScalar);
1356 #endif
1357 }
1358
1359 float4 &float4::operator +=(const float4 &rhs)
1360 {
1361 #ifdef MATH_AUTOMATIC_SSE
1362         v = add_ps(v, rhs.v);
1363 #else
1364         x += rhs.x;
1365         y += rhs.y;
1366         z += rhs.z;
1367         w += rhs.w;
1368 #endif
1369
1370         return *this;
1371 }
1372
1373 float4 &float4::operator -=(const float4 &rhs)
1374 {
1375 #ifdef MATH_AUTOMATIC_SSE
1376         v = sub_ps(v, rhs.v);
1377 #else
1378         x -= rhs.x;
1379         y -= rhs.y;
1380         z -= rhs.z;
1381         w -= rhs.w;
1382 #endif
1383
1384         return *this;
1385 }
1386
1387 float4 &float4::operator *=(float scalar)
1388 {
1389 #ifdef MATH_AUTOMATIC_SSE
1390         v = vec4_mul_float(v, scalar);
1391 #else
1392         x *= scalar;
1393         y *= scalar;
1394         z *= scalar;
1395         w *= scalar;
1396 #endif
1397
1398         return *this;
1399 }
1400
1401 float4 &float4::operator /=(float scalar)
1402 {
1403 #ifdef MATH_AUTOMATIC_SSE
1404         v = vec4_div_float(v, scalar);
1405 #else
1406         float invScalar = 1.f / scalar;
1407         x *= invScalar;
1408         y *= invScalar;
1409         z *= invScalar;
1410         w *= invScalar;
1411 #endif
1412
1413         return *this;
1414 }
1415
1416 float4 float4::Add(float s) const
1417 {
1418 #ifdef MATH_AUTOMATIC_SSE
1419         return vec4_add_float(v, s);
1420 #else
1421         return float4(x + s, y + s, z + s, w + s);
1422 #endif
1423 }
1424
1425 float4 float4::Sub(float s) const
1426 {
1427 #ifdef MATH_AUTOMATIC_SSE
1428         return vec4_sub_float(v, s);
1429 #else
1430         return float4(x - s, y - s, z - s, w - s);
1431 #endif
1432 }
1433
1434 float4 float4::SubLeft(float s) const
1435 {
1436 #ifdef MATH_AUTOMATIC_SSE
1437         return float_sub_vec4(s, v);
1438 #else
1439         return float4(s - x, s - y, s - z, s - w);
1440 #endif
1441 }
1442
1443 float4 float4::DivLeft(float s) const
1444 {
1445 #ifdef MATH_AUTOMATIC_SSE
1446         return float_div_vec4(s, v);
1447 #else
1448         return float4(s / x, s / y, s / z, s / w);
1449 #endif
1450 }
1451
1452 float4 float4::Mul(const float4 &rhs) const
1453 {
1454 #ifdef MATH_AUTOMATIC_SSE
1455         return mul_ps(v, rhs.v);
1456 #else
1457         return float4(x * rhs.x, y * rhs.y, z * rhs.z, w * rhs.w);
1458 #endif
1459 }
1460
1461 float4 float4::Div(const float4 &rhs) const
1462 {
1463 #ifdef MATH_AUTOMATIC_SSE
1464         return div_ps(v, rhs.v);
1465 #else
1466         return float4(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w);
1467 #endif
1468 }
1469
1470 #ifdef MATH_ENABLE_STL_SUPPORT
1471 std::ostream &operator <<(std::ostream &out, const float4 &rhs)
1472 {
1473         std::string str = rhs.ToString();
1474         out << str;
1475         return out;
1476 }
1477 #endif
1478
1479 const float4 float4::zero = float4(0, 0, 0, 0);
1480 const float4 float4::one = float4(1, 1, 1, 1);
1481 const float4 float4::unitX = float4(1, 0, 0, 0);
1482 const float4 float4::unitY = float4(0, 1, 0, 0);
1483 const float4 float4::unitZ = float4(0, 0, 1, 0);
1484 const float4 float4::unitW = float4(0, 0, 0, 1);
1485 const float4 float4::nan = float4(FLOAT_NAN, FLOAT_NAN, FLOAT_NAN, FLOAT_NAN);
1486 const float4 float4::inf = float4(FLOAT_INF, FLOAT_INF, FLOAT_INF, FLOAT_INF);
1487
1488 MATH_END_NAMESPACE