MathFunc.h

1 /* Copyright Jukka Jyl�nki
2
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7        http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License. */
14
15 /** @file MathFunc.h
16         @author Jukka Jyl�nki
17         @brief Common mathematical functions. */
18 #pragma once
19
20 #include "myassert.h"
21 #include <math.h>
22 #include <cmath>
23 #include <float.h>
24 #include <string.h>
25
26 #include "MathTypes.h"
27 #include "MathConstants.h"
28 #include "float3.h"
29 #include "Reinterpret.h"
30 #include "SSEMath.h"
31
32 #ifdef MATH_NEON
33 #include <arm_neon.h>
34 #endif
35
36 #include "assume.h"
37
38 MATH_BEGIN_NAMESPACE
39
40 /// Computes the dot product of two 2D vectors, the elements are accessed using array notation.
41 /// @param v1 A vector of type float2, or a C array of two elements.
42 /// @param v2 A vector of type float2, or a C array of two elements.
43 /// @see DOT3(), DOT4().
44 #define DOT2(v1, v2) ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1])
45
46 /// Computes the dot product of two 3D vectors, the elements are accessed using array notation.
47 /// @param v1 A vector of type float3, or a C array of three elements.
48 /// @param v2 A vector of type float3, or a C array of three elements.
49 /// @see DOT2(), DOT4(), ABSDOT3(), DOT3_xyz(), DOT3STRIDED().
50 #define DOT3(v1, v2) ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + (v1)[2] * (v2)[2])
51
52 /// Computes the dot product of two 3D vectors, but takes the absolute value of each element before summation.
53 /// @param v1 A vector of type float3, or a C array of three elements.
54 /// @param v2 A vector of type float3, or a C array of three elements.
55 /// @see DOT3(), DOT3_xyz(), DOT3STRIDED().
56 #define ABSDOT3(v1, v2) (Abs((v1)[0] * (v2)[0]) + Abs((v1)[1] * (v2)[1]) + Abs((v1)[2] * (v2)[2]))
57
58 /// Computes the dot product of a float3 and another vector given by three floats.
59 /// @param v1 A vector of type float3, or a C array of three elements.
60 /// @param x The x component of a second vector.
61 /// @param y The y component of a second vector.
62 /// @param z The z component of a second vector.
63 /// @see DOT3(), ABSDOT3(), DOT3STRIDED().
64 #define DOT3_xyz(v1, x, y, z) ((v1)[0] * (x) + (v1)[1] * (y) + (v1)[2] * (z))
65
66 /// Computes the dot product of two 3D vectors, but with the elements of the second vector being scattered noncontiguous in memory.
67 /// @param v1 The first vector in the dot product. This can either be a C array or a float3.
68 /// @param v2 The second vector in the dot product. As opposed to the DOT3() macro, which accesses the elements of this vector
69 ///      as v2[0], v2[1], v2[2], this function accesses the elements as v2[0], v2[stride], v2[2*stride].
70 /// @param stride The distance between between the subsequent vector elements in the array v2.
71 /// @see DOT3(), ABSDOT3(), DOT3_xyz(), DOT4STRIDED().
72 #define DOT3STRIDED(v1, v2, stride) ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[stride] + (v1)[2] * (v2)[2*stride])
73
74 /// Computes the dot product of two 4D vectors, the elements are accessed using array notation.
75 /// @param v1 A vector of type float4, or a C array of four elements.
76 /// @param v2 A vector of type float4, or a C array of four elements.
77 /// @see DOT2(), DOT3(), DOT4STRIDED(), DOT4POS(), DOT4POS_xyz(), DOT4DIR(), DOT4DIR_xyz().
78 #define DOT4(v1, v2) ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + (v1)[2] * (v2)[2] + (v1)[3] * (v2)[3])
79
80 /// Computes the dot product of two 4D vectors, but with the elements of the second vector being scattered noncontiguous in memory.
81 /// @param v1 The first vector in the dot product. This can either be a C array or a float4.
82 /// @param v2 The second vector in the dot product. As opposed to the DOT4() macro, which accesses the elements of this vector
83 ///      as v2[0], v2[1], v2[2], v2[3], this function accesses the elements as v2[0], v2[stride], v2[2*stride], v2[3*stride].
84 /// @param stride The distance between between the subsequent vector elements in the array v2.
85 /// @see DOT4(), DOT4POS(), DOT4POS_xyz(), DOT4DIR(), DOT4DIR_xyz().
86 #define DOT4STRIDED(v1, v2, stride) ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[stride] + (v1)[2] * (v2)[2*stride] + (v1)[3] * (v2)[3*stride])
87
88 /// Computes the dot product of a 4D vector and a 3D position vector (w == 1).
89 /// @param vec4D The 4D vector in the dot product, or a C array of four elements.
90 /// @param vecPos The 3D vector in the dot product. This vector is expanded to a 4D vector by setting w == 1.
91 /// @see DOT4(), DOT4STRIDED(), DOT4POS_xyz(), DOT4DIR(), DOT4DIR_xyz().
92 #define DOT4POS(vec4D, vecPos) ((vec4D)[0] * (vecPos)[0] + (vec4D)[1] * (vecPos)[1] + (vec4D)[2] * (vecPos)[2] + (vec4D)[3])
93
94 /// Computes the dot product of a 4D vector and a position vector (x,y,z,1).
95 /// @param vec4D The 4D vector in the dot product, or a C array of four elements.
96 /// @param x The x component of the second vector.
97 /// @param y The y component of the second vector.
98 /// @param z The z component of the second vector.
99 /// @see DOT4(), DOT4STRIDED(), DOT4POS(), DOT4DIR(), DOT4DIR_xyz().
100 #define DOT4POS_xyz(vec4D, x, y, z) ((vec4D)[0] * (x) + (vec4D)[1] * (y) + (vec4D)[2] * (z) + (vec4D)[3])
101
102 /// Computes the dot product of a 4D vector and a direction vector (x,y,z,0).
103 /// @note This function is only provided for convenience, since this is identical to DOT3.
104 /// @see DOT3(), DOT4(), DOT4STRIDED(), DOT4POS(), DOT4POS_xyz().
105 #define DOT4DIR(vec4D, vecDir) DOT3(vec4D, vecDir)
106
107 /// Computes the dot product of a 4D vector and a direction vector (x,y,z,0).
108 /// @note This function is only provided for convenience, since this is identical to DOT3_xyz.
109 /// @see DOT3_xyz(), DOT4(), DOT4STRIDED(), DOT4POS(), DOT4POS_xyz(), DOT4DIR().
110 #define DOT4DIR_xyz(vec4D, x, y, z) DOT3_xyz(vec4D, x, y, z)
111
112 /// Converts the given amount of degrees into radians.
113 /// 180 degrees equals pi, 360 degrees is a full circle, and equals 2pi.
114 inline float3 DegToRad(const float3 &degrees) { return degrees * (pi / 180.f); }
115 inline float DegToRad(float degrees) { return degrees * (pi / 180.f); }
116
117 /// Converts the given amount of radians into degrees.
118 inline float3 RadToDeg(const float3 &radians) { return radians * (180.f / pi); }
119 inline float RadToDeg(float radians) { return radians * (180.f / pi); }
120
121 /// Computes the function sin(x).
122 /** @see Cos(), Tan(), SinCos(), Asin(), Acos(), Atan(), Atan2(), Sinh(), Cosh(), Tanh(). */
123 float Sin(float angleRadians);
124 /// Computes the function cos(x).
125 /** @see Sin(), Tan(), SinCos(), Asin(), Acos(), Atan(), Atan2(), Sinh(), Cosh(), Tanh(). */
126 float Cos(float angleRadians);
127 /// Computes the function tan(x).
128 /** @see Sin(), Cos(), SinCos(), Asin(), Acos(), Atan(), Atan2(), Sinh(), Cosh(), Tanh(). */
129 float Tan(float angleRadians);
130 /// Simultaneously computes both sin(x) and cos(x), which yields a small performance increase over to
131 /// computing them separately.
132 /** @see Sin(), Cos(), Tan(), Asin(), Acos(), Atan(), Atan2(), Sinh(), Cosh(), Tanh(). */
133 void SinCos(float angleRadians, float &outSin, float &outCos);
134 /// Computes sin and cos of the .x and .y components of angleRadians, stored to the corresponding components of outSin and outCos.
135 /// The .y and .w components of the outputs are undefined.
136 void SinCos2(const float4 &angleRadians, float4 &outSin, float4 &outCos);
137 /// Computes sin and cos of the .x, .y and .z components of angleRadians, stored to the corresponding components of outSin and outCos.
138 /// The .w components of the outputs are undefined.
139 void SinCos3(const float4 &angleRadians, float4 &outSin, float4 &outCos);
140 /// Computes sin and cos of the four components of angleRadians, stored to the corresponding components of outSin and outCos.
141 void SinCos4(const float4 &angleRadians, float4 &outSin, float4 &outCos);
142 /// Computes the function arcsin(x), in radians.
143 /** @see Sin(), Cos(), Tan(), SinCos(), Acos(), Atan(), Atan2(), Sinh(), Cosh(), Tanh(). */
144 float Asin(float x);
145 /// Computes the function arccos(x), in radians.
146 /** @see Sin(), Cos(), Tan(), SinCos(), Asin(), Atan(), Atan2(), Sinh(), Cosh(), Tanh(). */
147 float Acos(float x);
148 /// Computes the function arctan(x), in radians.
149 /** @see Sin(), Cos(), Tan(), SinCos(), Asin(), Acos(), Atan2(), Sinh(), Cosh(), Tanh(). */
150 float Atan(float x);
151 /// Computes the signed (principal value) arc-tangent of y/x, in radians.
152 /** @see Sin(), Cos(), Tan(), SinCos(), Asin(), Acos(), Atan(), Sinh(), Cosh(), Tanh(). */
153 float Atan2(float y, float x);
154 /// Computes the hyperbolic sine of x.
155 /** @see Sin(), Cos(), Tan(), SinCos(), Asin(), Acos(), Atan(), Atan2(), Cosh(), Tanh(). */
156 float Sinh(float x);
157 /// Computes the hyperbolic cosine of x.
158 /** @see Sin(), Cos(), Tan(), SinCos(), Asin(), Acos(), Atan(), Atan2(), Sinh(), Tanh(). */
159 float Cosh(float x);
160 /// Computes the hyperbolic tangent of x.
161 /** @see Sin(), Cos(), Tan(), SinCos(), Asin(), Acos(), Atan(), Atan2(), Sinh(), Cosh(). */
162 float Tanh(float x);
163
164 /// Returns true if the given number is a power of 2.
165 /** @note IsPow2(0) == true.
166         @see RoundUpPow2(), RoundDownPow2(). */
167 bool IsPow2(u32 number);
168 bool IsPow2(u64 number);
169 FORCE_INLINE bool IsPow2(int /*s32*/ number) { assert(number >= 0); return IsPow2((u32)number); }
170 FORCE_INLINE bool IsPow2(s64 number) { assert(number >= 0); return IsPow2((u64)number); }
171 /// Returns the smallest power-of-2 number (1,2,4,8,16,32,...) greater or equal than the given number.
172 /** @note RoundUpPow2(0) == 0. Also, note that RoundUpPow2(x) == 0 if x >= 0x80000001 for the u32 version, or if x >= 0x8000000000000001 for the u64 version.
173         @see IsPow2(), RoundDownPow2(). */
174 u32 RoundUpPow2(u32 number);
175 u64 RoundUpPow2(u64 number);
176 FORCE_INLINE int /*s32*/ RoundUpPow2(int /*s32*/ number) { assert(number >= 0); return (int /*s32*/)RoundUpPow2((u32)number); }
177 FORCE_INLINE s64 RoundUpPow2(s64 number) { assert(number >= 0); return (s64)RoundUpPow2((u64)number); }
178 /// Returns the largest power-of-2 number (1,2,4,8,16,32,...) smaller or equal than the given number.
179 /** @note RoundDownPow2(0) == 0.
180         @see IsPow2(), RoundUpPow2(). */
181 u32 RoundDownPow2(u32 number);
182 u64 RoundDownPow2(u64 number);
183 FORCE_INLINE int /*s32*/ RoundDownPow2(int /*s32*/ number) { assert(number >= 0); return (int /*s32*/)RoundDownPow2((u32)number); }
184 FORCE_INLINE s64 RoundDownPow2(s64 number) { assert(number >= 0); return (s64)RoundDownPow2((u64)number); }
185
186 /// Returns the given number rounded up to the next multiple of n.
187 /** @param x The number to round up.
188         @param n The multiple to round x to. The value n must be a power-of-2. */
189 int RoundIntUpToMultipleOfPow2(int x, int n);
190 s64 RoundIntUpToMultipleOfPow2(s64 x, s64 n);
191
192 /// Raises the given base to an integral exponent.
193 /** @see Pow(), Exp(). */
194 float PowInt(float base, int exponent);
195 /// Raises the given base to a float exponent.
196 /** @see PowInt(), Exp(). */
197 float Pow(float base, float exponent);
198 /// Returns e (the constant 2.71828...) raised to the given power.
199 /** @see PowInt(), Pow(). */
200 float Exp(float exponent);
201 /// Computes a logarithm of the given value in the specified base.
202 /** @see Log2(), Ln(), Log10(). */
203 float Log(float base, float value);
204 /// Computes a logarithm in base-2.
205 /** @see Log(), Ln(), Log10(). */
206 float Log2(float value);
207 /// Computes a logarithm in the natural base (using e=2.71828... as the base)
208 /** @see Log(), Log2(), Log10(). */
209 float Ln(float value);
210 /// Computes a logarithm in base-10.
211 /** @see Log(), Log2(), Ln(). */
212 float Log10(float value);
213
214 /// Returns f rounded up to the next integer, as float.
215 /** @see CeilInt(), Floor(), FloorInt(), Round(), RoundInt(). */
216 float Ceil(float f);
217 /// Returns f rounded up to the next integer, as integer.
218 /** @see Ceil(), Floor(), FloorInt(), Round(), RoundInt(). */
219 int CeilInt(float f);
220 /// Returns f rounded down to the previous integer, as float.
221 /** @see Ceil(), CeilInt(), FloorInt(), Round(), RoundInt(). */
222 float Floor(float f);
223 /// Returns f rounded down to the previous integer, as integer.
224 /** @see Ceil(), CeilInt(), Floor(), Round(), RoundInt(). */
225 int FloorInt(float f);
226 /// Returns f rounded to the nearest integer, as float.
227 /** @see Ceil(), CeilInt(), Floor(), FloorInt(), RoundInt(). */
228 float Round(float f);
229 /// Returns f rounded to the nearest integer, as integer.
230 /** @see Ceil(), CeilInt(), Floor(), FloorInt(), Round(). */
231 int RoundInt(float f);
232
233 /// Returns -1 or 1 depending on the sign of f.
234 /** @see SignOrZero(). */
235 float Sign(float f);
236 /// Returns 0 if f is zero up to the given epsilon. Otherwise returns -1 or 1 depending on the sign of f.
237 /** @see Sign(). */
238 float SignOrZero(float f, float epsilon = 1e-8f);
239
240 /// Linearly interpolates between a and b.
241 /** @param t A value between [0,1].
242         @param a The first endpoint to lerp between.
243         @param b The second endpoint to lerp between.
244         @return This function computes a + t*(b-a). That is, if t==0, this function returns a. If t==1, this function returns b.
245                 Otherwise, the returned value linearly moves from a to b as t ranges from 0 to 1.
246         @see LerpMod(), InvLerp(), Step(), SmoothStep(), PingPongMod(), Mod(), ModPos(), Frac(). */
247 float Lerp(float a, float b, float t);
248 /// Linearly interpolates from a to b, under the modulus mod.
249 /** This function takes evaluates a and b in the range [0, mod] and takes the shorter path to reach from a to b.
250         @see Lerp(), InvLerp(), Step(), SmoothStep(), PingPongMod(), Mod(), ModPos(), Frac(). */
251 float LerpMod(float a, float b, float mod, float t);
252 /// Computes the lerp factor a and b have to be Lerp()ed to get x.
253 /** @see Lerp(), LerpMod(), Step(), SmoothStep(), PingPongMod(), Mod(), ModPos(), Frac(). */
254 float InvLerp(float a, float b, float x);
255 /// See http://msdn.microsoft.com/en-us/library/bb509665(v=VS.85).aspx
256 /** @see Lerp(), LerpMod(), InvLerp(), SmoothStep(), PingPongMod(), Mod(), ModPos(), Frac(). */
257 float Step(float y, float x);
258 /// See http://msdn.microsoft.com/en-us/library/bb509658(v=vs.85).aspx
259 /** @see Lerp(), LerpMod(), InvLerp(), Step(), PingPongMod(), Mod(), ModPos(), Frac(). */
260 float SmoothStep(float min, float max, float x);
261 /// Limits x to the range [0, mod], but instead of wrapping around from mod to 0, the result will move back
262 /// from mod to 0 as x goes from mod to 2*mod.
263 /** @see Lerp(), LerpMod(), InvLerp(), Step(), SmoothStep(), Mod(), ModPos(), Frac(). */
264 float PingPongMod(float x, float mod);
265 /// Computes a floating-point modulus.
266 /// This function returns a value in the range ]-mod, mod[.
267 /** @see Lerp(), LerpMod(), InvLerp(), Step(), SmoothStep(), PingPongMod(), ModPos(), Frac(). */
268 float Mod(float x, float mod);
269 /// Computes a floating-point modulus using an integer as the modulus.
270 float Mod(float x, int mod);
271 /// Computes a floating-point modulus, but restricts the output to the range [0, mod[.
272 /** @see Lerp(), LerpMod(), InvLerp(), Step(), SmoothStep(), PingPongMod(), Mod(), Frac(). */
273 float ModPos(float x, float mod);
274 /// Computes a floating-point modulus, but restricts the output to the range [0, mod[.
275 float ModPos(float x, int mod);
276 /// Returns the fractional part of x.
277 /** @see Lerp(), LerpMod(), InvLerp(), Step(), SmoothStep(), PingPongMod(), Mod(), ModPos(). */
278 float Frac(float x);
279
280 /// Returns the square root of x.
281 FORCE_INLINE float Sqrt(float x)
282 {
283 #ifdef MATH_NEON
284         float result;
285         asm("vsqrt.f32 %0, %1" : "=w"(result) : "w"(x));
286         return result;
287 #elif defined(MATH_SSE)
288         return s4f_x(_mm_sqrt_ss(setx_ps(x)));
289 #else
290         return sqrtf(x);
291 #endif
292 }
293
294 /// Computes a fast approximation of the square root of x.
295 FORCE_INLINE float SqrtFast(float x)
296 {
297 #ifdef MATH_NEON
298         float result;
299         asm("vsqrt.f32 %0, %1" : "=w"(result) : "w"(x));
300         return result;
301 #elif defined(MATH_SSE)
302         simd4f X = setx_ps(x);
303         return s4f_x(_mm_mul_ss(X, _mm_rsqrt_ss(X)));
304 #else
305         return sqrtf(x);
306 #endif
307 }
308
309 /// Returns 1/Sqrt(x). (The reciprocal of the square root of x)
310 FORCE_INLINE float RSqrt(float x)
311 {
312 #ifdef MATH_NEON
313         // Note: This is a two-wide operation - there is no scalar reciprocal sqrt instruction in ARM/VFP/NEON.
314         float32x2_t X = vdup_n_f32(x);
315         float32x2_t e = vrsqrte_f32(X);
316         e = vmul_f32(e, vrsqrts_f32(X, vmul_f32(e, e)));
317         e = vmul_f32(e, vrsqrts_f32(X, vmul_f32(e, e)));
318         return vget_lane_f32(e, 0);
319 #elif defined(MATH_SSE)
320         simd4f X = setx_ps(x);
321         simd4f e = _mm_rsqrt_ss(X);
322
323         // Do one iteration of Newton-Rhapson:
324         // e_n = e + 0.5 * (e - x * e^3)
325         simd4f e3 = _mm_mul_ss(_mm_mul_ss(e,e), e);
326         simd4f half = _mm_set_ss(0.5f);
327
328         return s4f_x(_mm_add_ss(e, _mm_mul_ss(half, _mm_sub_ss(e, _mm_mul_ss(X, e3)))));
329 #else
330         return 1.f / sqrtf(x);
331 #endif
332 }
333
334 /// SSE implementation of reciprocal square root.
335 FORCE_INLINE float RSqrtFast(float x)
336 {
337 #ifdef MATH_NEON
338         // Note: This is a two-wide operation, but perhaps it needn't be?
339         float32x2_t X = vdup_n_f32(x);
340         return vget_lane_f32(vrsqrte_f32(X), 0);
341 #elif defined(MATH_SSE)
342         return s4f_x(_mm_rsqrt_ss(setx_ps(x)));
343 #else
344         return 1.f / sqrtf(x);
345 #endif
346 }
347
348 /// Returns 1/x, the reciprocal of x.
349 FORCE_INLINE float Recip(float x)
350 {
351 #ifdef MATH_NEON
352         // Note: This is a two-wide operation - there is no scalar reciprocal instruction in ARM/VFP/NEON.
353         float32x2_t X = vdup_n_f32(x);
354         float32x2_t e = vrecpe_f32(X);
355         e = vmul_f32(e, vrecps_f32(X, e));
356         e = vmul_f32(e, vrecps_f32(X, e));
357         return vget_lane_f32(e, 0);
358 #elif defined(MATH_SSE)
359         simd4f X = setx_ps(x);
360         simd4f e = _mm_rcp_ss(X);
361         // Do one iteration of Newton-Rhapson:
362         // e_n = 2*e - x*e^2
363         simd4f e2 = _mm_mul_ss(e,e);
364         return s4f_x(_mm_sub_ss(_mm_add_ss(e, e), _mm_mul_ss(X, e2)));
365 #else
366         return 1.f / x;
367 #endif
368 }
369
370 /// Returns 1/x, the reciprocal of x, using a fast approximation (SSE rcp instruction).
371 FORCE_INLINE float RecipFast(float x)
372 {
373 #ifdef MATH_NEON
374         // Note: This is a two-wide operation, but perhaps it needn't be?
375         return vget_lane_f32(vrecpe_f32(vdup_n_f32(x)), 0);
376 #elif defined(MATH_SIMD)
377         return s4f_x(_mm_rcp_ss(setx_ps(x)));
378 #else
379         return 1.f / x;
380 #endif
381 }
382
383 /// Calculates n! at runtime. Use class FactorialT<N> to evaluate factorials at compile-time.
384 int Factorial(int n);
385
386 /// Calculates (N nCr K) at runtime with recursion, running time is exponential to n.
387 /// Use class Combinatorial<N, K> to evaluate combinatorials at compile-time.
388 int CombinatorialRec(int n, int k);
389
390 /// Calculates (N nCr K) at runtime, running time is proportional to n*k.
391 /// Use class Combinatorial<N, K> to evaluate combinatorials at compile-time.
392 int CombinatorialTab(int n, int k);
393
394 /// Clamps the given input value to the range [min, max].
395 /** @see Clamp01(), Min(), Max(). */
396 template<typename T>
397 inline T Clamp(const T &val, const T &floor, const T &ceil)
398 {
399         assume(floor <= ceil);
400         return val <= ceil ? (val >= floor ? val : floor) : ceil;
401 }
402
403 /// Clamps the given input value to the range [0, 1].
404 /** @see Clamp(), Min(), Max(). */
405 template<typename T>
406 inline T Clamp01(const T &val) { return Clamp(val, T(0), T(1)); }
407
408 /// Computes the smaller of two values.
409 /** @see Clamp(), Clamp01(), Max(). */
410 template<typename T>
411 inline T Min(const T &a, const T &b)
412 {
413         return a <= b ? a : b;
414 }
415
416 /// Computes the larger of two values.
417 /** @see Clamp(), Clamp01(), Min(). */
418 template<typename T>
419 inline T Max(const T &a, const T &b)
420 {
421         return a >= b ? a : b;
422 }
423
424 template<>
425 inline float Max(const float &a, const float &b)
426 {
427 #ifdef MATH_SSE
428         return s4f_x(_mm_max_ss(setx_ps(a), setx_ps(b)));
429 #else
430         return a >= b ? a : b;
431 #endif
432 }
433
434 /// Computes the smallest of three values.
435 /** @see Clamp(), Clamp01(), Max(). */
436 template<typename T>
437 inline T Min(const T &a, const T &b, const T &c)
438 {
439         return Min(Min(a, b), c);
440 }
441
442 template<>
443 inline float Min(const float &a, const float &b)
444 {
445 #ifdef MATH_SSE
446         return s4f_x(_mm_min_ss(setx_ps(a), setx_ps(b)));
447 #else
448         return a <= b ? a : b;
449 #endif
450 }
451
452 /// Computes the largest of three values.
453 /** @see Clamp(), Clamp01(), Min(). */
454 template<typename T>
455 inline T Max(const T &a, const T &b, const T &c)
456 {
457         return Max(Max(a, b), c);
458 }
459
460 /// Computes the smallest of four values.
461 /** @see Clamp(), Clamp01(), Max(). */
462 template<typename T>
463 inline T Min(const T &a, const T &b, const T &c, const T &d)
464 {
465         return Min(Min(a, b), Min(c, d));
466 }
467
468 /// Computes the largest of four values.
469 /** @see Clamp(), Clamp01(), Min(). */
470 template<typename T>
471 inline T Max(const T &a, const T &b, const T &c, const T &d)
472 {
473         return Max(Max(a, b), Max(c, d));
474 }
475
476 /// Swaps the two values.
477 template<typename T>
478 inline void Swap(T &a, T &b)
479 {
480         T temp = a;
481         a = b;
482         b = temp;
483 }
484
485 /** @return True if a > b. */
486 template<typename T>
487 inline bool GreaterThan(const T &a, const T &b)
488 {
489         return a > b;
490 }
491
492 /** @return True if a < b. */
493 template<typename T>
494 inline bool LessThan(const T &a, const T &b)
495 {
496         return a < b;
497 }
498
499 /** @return The absolute value of a. */
500 template<typename T>
501 inline T Abs(const T &a)
502 {
503         return a >= 0 ? a : -a;
504 }
505
506 template<>
507 inline float Abs(const float &a)
508 {
509 #ifdef MATH_SSE
510         return s4f_x(abs_ps(setx_ps(a)));
511 #else
512         return a >= 0 ? a : -a;
513 #endif
514 }
515
516 /// @return True if a and b are equal, using operator ==().
517 template<typename T>
518 FORCE_INLINE bool Equal(const T &a, const T &b)
519 {
520         return a == b;
521 }
522
523 /** Compares the two values for equality up to a small epsilon. */
524 template<> bool FORCE_INLINE Equal(const float &a, const float &b) { return Abs(a-b) <= eps; }
525 template<> bool FORCE_INLINE Equal(const double &a, const double &b) { return Abs(a-b) <= eps; }
526 #ifndef EMSCRIPTEN // long double is not supported.
527 template<> bool FORCE_INLINE Equal(const long double &a, const long double &b) { return Abs(a-b) <= eps; }
528 #endif
529
530 /** Compares the two values for equality, allowing the given amount of absolute error. */
531 bool EqualAbs(float a, float b, float epsilon = 1e-4f);
532
533 /// Computes the relative error of the two variables.
534 float RelativeError(float a, float b);
535
536 /** Compares the two values for equality, allowing the given amount of relative error.
537         Beware that for values very near 0, the relative error is significant. */
538 bool EqualRel(float a, float b, float maxRelError = 1e-4f);
539
540 /** Compares two floats interpreted as integers, see
541         http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
542         Warning: This comparison is not safe with NANs or INFs. */
543 bool EqualUlps(float a, float b, int maxUlps = 10000);
544
545 /// Returns true if the given value is not an inf or a nan.
546 template<typename T> FORCE_INLINE bool IsFinite(T /*value*/) { return true; }
547
548 template<> FORCE_INLINE bool IsFinite<float>(float f) { return (ReinterpretAsU32(f) << 1) < 0xFF000000u; }
549 template<> FORCE_INLINE bool IsFinite<double>(double d) { return (ReinterpretAsU64(d) << 1) < 0xFFE0000000000000ULL; }
550
551 /// Returns true if the given value is a not-a-number.
552 FORCE_INLINE bool IsNan(float f) { return (ReinterpretAsU32(f) << 1) > 0xFF000000u; }
553 FORCE_INLINE bool IsNan(double d) { return (ReinterpretAsU64(d) << 1) > 0xFFE0000000000000ULL; }
554
555 /// Returns true if the given value is +inf or -inf.
556 FORCE_INLINE bool IsInf(float f) { return (ReinterpretAsU32(f) << 1) == 0xFF000000u; }
557 FORCE_INLINE bool IsInf(double d) { return (ReinterpretAsU64(d) << 1) == 0xFFE0000000000000ULL; }
558
559 #ifdef _MSC_VER
560 template<> FORCE_INLINE bool IsFinite<long double>(long double value) { return _finite((double)value) != 0; }
561 FORCE_INLINE bool IsInf(long double value) { return IsInf((double)value); }
562 FORCE_INLINE bool IsNan(long double value) { return IsNan((double)value); }
563 #elif !defined(EMSCRIPTEN) // long double is not supported.
564 //template<> FORCE_INLINE bool IsFinite<long double>(long double value) { asserteq(sizeof(long double), 16); u64 val[2]; memcpy(val, &value, sizeof(u64)*2); return (val[1] & 0x7FFF) != 0x7FFF || val[0] < 0x8000000000000000ULL; }
565 //FORCE_INLINE bool IsInf(long double value) { asserteq(sizeof(long double), 16); u64 val[2]; memcpy(val, &value, sizeof(u64)*2); return (val[1] & 0x7FFF) == 0x7FFF && val[0] == 0x8000000000000000ULL; }
566 //FORCE_INLINE bool IsNan(long double value) { asserteq(sizeof(long double), 16); u64 val[2]; memcpy(val, &value, sizeof(u64)*2); return (val[1] & 0x7FFF) == 0x7FFF && val[0] >  0x8000000000000000ULL; }
567 template<> FORCE_INLINE bool IsFinite<long double>(long double value) { return IsFinite<double>((double)value); }
568 FORCE_INLINE bool IsInf(long double value) { return IsInf((double)value); }
569 FORCE_INLINE bool IsNan(long double value) { return IsNan((double)value); }
570 #endif
571
572 /// Serializes a float to a string.
573 char *SerializeFloat(float f, char *dstStr);
574
575 /// Deserializes a float from the given string.
576 /** @param str The source string buffer to deserialize. If this is a null pointer or an empty string, then NaN is returned.
577         @param outEndStr [out] Optional. If present, a pointer to the string position where reading ended is outputted. You can use this pointer
578                 to advance to reading a next element in a sequence of multiple serialized entries. */
579 float DeserializeFloat(const char *str, const char **outEndStr = 0);
580
581 /// Deserializes a double from the given string.
582 /** @param str The source string buffer to deserialize. If this is a null pointer or an empty string, then NaN is returned.
583         @param outEndStr [out] Optional. If present, a pointer to the string position where reading ended is outputted. You can use this pointer
584                 to advance to reading a next element in a sequence of multiple serialized entries. */
585 double DeserializeDouble(const char *str, const char **outEndStr = 0);
586
587 // A deserialization helper.
588 #define MATH_SKIP_WORD(str, word) if (!strncmp(str, word, strlen(word))) str += strlen(word);
589 #define MATH_NEXT_WORD_IS(str, word) !strncmp(str, word, strlen(word))
590
591 MATH_END_NAMESPACE