1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 #pragma once19 20 #include "[myassert.h]"21 #include <math.h>22 #include <cmath>23 #include <float.h>24 #include <string.h>25 26 #include "[MathTypes.h]"27 #include "[MathConstants.h]"28 #include "[float3.h]"29 #include "[Reinterpret.h]"30 #include "[SSEMath.h]"31 32 #ifdef MATH_NEON33 #include <arm_neon.h>34 #endif35 36 #include "[assume.h]"37 38 [MATH_BEGIN_NAMESPACE]39 40 41 42 43 44 #define DOT2(v1, v2) ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1])45 46 47 48 49 50 #define DOT3(v1, v2) ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + (v1)[2] * (v2)[2])51 52 53 54 55 56 #define ABSDOT3(v1, v2) (Abs((v1)[0] * (v2)[0]) + Abs((v1)[1] * (v2)[1]) + Abs((v1)[2] * (v2)[2]))57 58 59 60 61 62 63 64 #define DOT3_xyz(v1, x, y, z) ((v1)[0] * (x) + (v1)[1] * (y) + (v1)[2] * (z))65 66 67 68 69 70 71 72 #define DOT3STRIDED(v1, v2, stride) ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[stride] + (v1)[2] * (v2)[2*stride])73 74 75 76 77 78 #define DOT4(v1, v2) ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + (v1)[2] * (v2)[2] + (v1)[3] * (v2)[3])79 80 81 82 83 84 85 86 #define DOT4STRIDED(v1, v2, stride) ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[stride] + (v1)[2] * (v2)[2*stride] + (v1)[3] * (v2)[3*stride])87 88 89 90 91 92 #define DOT4POS(vec4D, vecPos) ((vec4D)[0] * (vecPos)[0] + (vec4D)[1] * (vecPos)[1] + (vec4D)[2] * (vecPos)[2] + (vec4D)[3])93 94 95 96 97 98 99 100 #define DOT4POS_xyz(vec4D, x, y, z) ((vec4D)[0] * (x) + (vec4D)[1] * (y) + (vec4D)[2] * (z) + (vec4D)[3])101 102 103 104 105 #define DOT4DIR(vec4D, vecDir) DOT3(vec4D, vecDir)106 107 108 109 110 #define DOT4DIR_xyz(vec4D, x, y, z) DOT3_xyz(vec4D, x, y, z)111 112 113 114 inline [float3] [DegToRad](const [float3] °rees) { return degrees * ([pi] / 180.f); }115 inline float [DegToRad](float degrees) { return degrees * ([pi] / 180.f); }116 117 118 inline [float3] [RadToDeg](const [float3] &radians) { return radians * (180.f / [pi]); }119 inline float [RadToDeg](float radians) { return radians * (180.f / [pi]); }120 121 122 123 float [Sin](float angleRadians);124 125 126 float [Cos](float angleRadians);127 128 129 float [Tan](float angleRadians);130 131 132 133 void [SinCos](float angleRadians, float &outSin, float &outCos);134 135 136 void [SinCos2](const [float4] &angleRadians, [float4] &outSin, [float4] &outCos);137 138 139 void [SinCos3](const [float4] &angleRadians, [float4] &outSin, [float4] &outCos);140 141 void [SinCos4](const [float4] &angleRadians, [float4] &outSin, [float4] &outCos);142 143 144 float [Asin](float x);145 146 147 float [Acos](float x);148 149 150 float [Atan](float x);151 152 153 float [Atan2](float y, float x);154 155 156 float [Sinh](float x);157 158 159 float [Cosh](float x);160 161 162 float [Tanh](float x);163 164 165 166 167 bool [IsPow2](u32 number);168 bool [IsPow2]([u64] number);169 [FORCE_INLINE] bool [IsPow2](int number) { [assert](number >= 0); return [IsPow2]((u32)number); }170 [FORCE_INLINE] bool [IsPow2]([s64] number) { [assert](number >= 0); return [IsPow2](([u64])number); }171 172 173 174 u32 [RoundUpPow2](u32 number);175 [u64] [RoundUpPow2]([u64] number);176 [FORCE_INLINE] int [RoundUpPow2](int number) { [assert](number >= 0); return (int )[RoundUpPow2]((u32)number); }177 [FORCE_INLINE] [s64] [RoundUpPow2]([s64] number) { [assert](number >= 0); return ([s64])[RoundUpPow2](([u64])number); }178 179 180 181 u32 [RoundDownPow2](u32 number);182 [u64] [RoundDownPow2]([u64] number);183 [FORCE_INLINE] int [RoundDownPow2](int number) { [assert](number >= 0); return (int )[RoundDownPow2]((u32)number); }184 [FORCE_INLINE] [s64] [RoundDownPow2]([s64] number) { [assert](number >= 0); return ([s64])[RoundDownPow2](([u64])number); }185 186 187 188 189 int [RoundIntUpToMultipleOfPow2](int x, int n);190 [s64] [RoundIntUpToMultipleOfPow2]([s64] x, [s64] n);191 192 193 194 float [PowInt](float base, int exponent);195 196 197 float [Pow](float base, float exponent);198 199 200 float [Exp](float exponent);201 202 203 float [Log](float base, float value);204 205 206 float [Log2](float value);207 208 209 float [Ln](float value);210 211 212 float [Log10](float value);213 214 215 216 float [Ceil](float f);217 218 219 int [CeilInt](float f);220 221 222 float [Floor](float f);223 224 225 int [FloorInt](float f);226 227 228 float [Round](float f);229 230 231 int [RoundInt](float f);232 233 234 235 float [Sign](float f);236 237 238 float [SignOrZero](float f, float [epsilon] = 1[e]-8f);239 240 241 242 243 244 245 246 247 float [Lerp](float a, float b, float t);248 249 250 251 float [LerpMod](float a, float b, float mod, float t);252 253 254 float [InvLerp](float a, float b, float x);255 256 257 float [Step](float y, float x);258 259 260 float [SmoothStep](float min, float max, float x);261 262 263 264 float [PingPongMod](float x, float mod);265 266 267 268 float [Mod](float x, float mod);269 270 float [Mod](float x, int mod);271 272 273 float [ModPos](float x, float mod);274 275 float [ModPos](float x, int mod);276 277 278 float [Frac](float x);279 280 281 [FORCE_INLINE] float [Sqrt](float x)282 {283 #ifdef MATH_NEON284 float result;285 asm("vsqrt.f32 %0, %1" : "=w"(result) : "w"(x));286 return result;287 #elif defined(MATH_SSE)288 return s4f_x(_mm_sqrt_ss(setx_ps(x)));289 #else290 return sqrtf(x);291 #endif292 }293 294 295 [FORCE_INLINE] float [SqrtFast](float x)296 {297 #ifdef MATH_NEON298 float result;299 asm("vsqrt.f32 %0, %1" : "=w"(result) : "w"(x));300 return result;301 #elif defined(MATH_SSE)302 simd4f X = setx_ps(x);303 return s4f_x(_mm_mul_ss(X, _mm_rsqrt_ss(X)));304 #else305 return sqrtf(x);306 #endif307 }308 309 310 [FORCE_INLINE] float [RSqrt](float x)311 {312 #ifdef MATH_NEON313 314 float32x2_t X = vdup_n_f32(x);315 float32x2_t [e] = vrsqrte_f32(X);316 e = vmul_f32(e, vrsqrts_f32(X, vmul_f32(e, e)));317 e = vmul_f32(e, vrsqrts_f32(X, vmul_f32(e, e)));318 return vget_lane_f32(e, 0);319 #elif defined(MATH_SSE)320 simd4f X = setx_ps(x);321 simd4f e = _mm_rsqrt_ss(X);322 323 324 325 simd4f e3 = _mm_mul_ss(_mm_mul_ss(e,e), e);326 simd4f half = _mm_set_ss(0.5f);327 328 return s4f_x(_mm_add_ss(e, _mm_mul_ss(half, _mm_sub_ss(e, _mm_mul_ss(X, e3)))));329 #else330 return 1.f / sqrtf(x);331 #endif332 }333 334 335 [FORCE_INLINE] float [RSqrtFast](float x)336 {337 #ifdef MATH_NEON338 339 float32x2_t X = vdup_n_f32(x);340 return vget_lane_f32(vrsqrte_f32(X), 0);341 #elif defined(MATH_SSE)342 return s4f_x(_mm_rsqrt_ss(setx_ps(x)));343 #else344 return 1.f / sqrtf(x);345 #endif346 }347 348 349 [FORCE_INLINE] float [Recip](float x)350 {351 #ifdef MATH_NEON352 353 float32x2_t X = vdup_n_f32(x);354 float32x2_t [e] = vrecpe_f32(X);355 e = vmul_f32(e, vrecps_f32(X, e));356 e = vmul_f32(e, vrecps_f32(X, e));357 return vget_lane_f32(e, 0);358 #elif defined(MATH_SSE)359 simd4f X = setx_ps(x);360 simd4f e = _mm_rcp_ss(X);361 362 363 simd4f [e2] = _mm_mul_ss(e,e);364 return s4f_x(_mm_sub_ss(_mm_add_ss(e, e), _mm_mul_ss(X, e2)));365 #else366 return 1.f / x;367 #endif368 }369 370 371 [FORCE_INLINE] float [RecipFast](float x)372 {373 #ifdef MATH_NEON374 375 return vget_lane_f32(vrecpe_f32(vdup_n_f32(x)), 0);376 #elif defined(MATH_SIMD)377 return s4f_x(_mm_rcp_ss(setx_ps(x)));378 #else379 return 1.f / x;380 #endif381 }382 383 384 int [Factorial](int n);385 386 387 388 int [CombinatorialRec](int n, int k);389 390 391 392 int [CombinatorialTab](int n, int k);393 394 395 396 template<typename T>397 inline T [Clamp](const T &val, const T &floor, const T &ceil)398 {399 [assume](floor <= ceil);400 return val <= ceil ? (val >= floor ? val : floor) : ceil;401 }402 403 404 405 template<typename T>406 inline T [Clamp01](const T &val) { return [Clamp](val, T(0), T(1)); }407 408 409 410 template<typename T>411 inline T [Min](const T &a, const T &b)412 {413 return a <= b ? a : b;414 }415 416 417 418 template<typename T>419 inline T [Max](const T &a, const T &b)420 {421 return a >= b ? a : b;422 }423 424 template<>425 inline float [Max](const float &a, const float &b)426 {427 #ifdef MATH_SSE428 return s4f_x(_mm_max_ss(setx_ps(a), setx_ps(b)));429 #else430 return a >= b ? a : b;431 #endif432 }433 434 435 436 template<typename T>437 inline T [Min](const T &a, const T &b, const T &c)438 {439 return [Min]([Min](a, b), c);440 }441 442 template<>443 inline float [Min](const float &a, const float &b)444 {445 #ifdef MATH_SSE446 return s4f_x(_mm_min_ss(setx_ps(a), setx_ps(b)));447 #else448 return a <= b ? a : b;449 #endif450 }451 452 453 454 template<typename T>455 inline T [Max](const T &a, const T &b, const T &c)456 {457 return [Max]([Max](a, b), c);458 }459 460 461 462 template<typename T>463 inline T [Min](const T &a, const T &b, const T &c, const T &[d])464 {465 return [Min]([Min](a, b), [Min](c, d));466 }467 468 469 470 template<typename T>471 inline T [Max](const T &a, const T &b, const T &c, const T &[d])472 {473 return [Max]([Max](a, b), [Max](c, d));474 }475 476 477 template<typename T>478 inline void [Swap](T &a, T &b)479 {480 T temp = a;481 a = b;482 b = temp;483 }484 485 486 template<typename T>487 inline bool [GreaterThan](const T &a, const T &b)488 {489 return a > b;490 }491 492 493 template<typename T>494 inline bool [LessThan](const T &a, const T &b)495 {496 return a < b;497 }498 499 500 template<typename T>501 inline T [Abs](const T &a)502 {503 return a >= 0 ? a : -a;504 }505 506 template<>507 inline float [Abs](const float &a)508 {509 #ifdef MATH_SSE510 return s4f_x(abs_ps(setx_ps(a)));511 #else512 return a >= 0 ? a : -a;513 #endif514 }515 516 517 template<typename T>518 [FORCE_INLINE] bool [Equal](const T &a, const T &b)519 {520 return a == b;521 }522 523 524 template<> bool [FORCE_INLINE] [Equal](const float &a, const float &b) { return [Abs](a-b) <= [eps]; }525 template<> bool [FORCE_INLINE] [Equal](const double &a, const double &b) { return [Abs](a-b) <= [eps]; }526 #ifndef EMSCRIPTEN // long double is not supported.527 template<> bool [FORCE_INLINE] [Equal](const long double &a, const long double &b) { return [Abs](a-b) <= [eps]; }528 #endif529 530 531 bool [EqualAbs](float a, float b, float [epsilon] = 1[e]-4f);532 533 534 float [RelativeError](float a, float b);535 536 537 538 bool [EqualRel](float a, float b, float maxRelError = 1[e]-4f);539 540 541 542 543 bool [EqualUlps](float a, float b, int maxUlps = 10000);544 545 546 template<typename T> [FORCE_INLINE] bool [IsFinite](T ) { return true; }547 548 template<> [FORCE_INLINE] bool [IsFinite<float>](float f) { return ([ReinterpretAsU32](f) << 1) < 0xFF000000u; }549 template<> [FORCE_INLINE] bool [IsFinite<double>](double [d]) { return ([ReinterpretAsU64]([d]) << 1) < 0xFFE0000000000000ULL; }550 551 552 [FORCE_INLINE] bool [IsNan](float f) { return ([ReinterpretAsU32](f) << 1) > 0xFF000000u; }553 [FORCE_INLINE] bool [IsNan](double [d]) { return ([ReinterpretAsU64](d) << 1) > 0xFFE0000000000000ULL; }554 555 556 [FORCE_INLINE] bool [IsInf](float f) { return ([ReinterpretAsU32](f) << 1) == 0xFF000000u; }557 [FORCE_INLINE] bool [IsInf](double [d]) { return ([ReinterpretAsU64](d) << 1) == 0xFFE0000000000000ULL; }558 559 #ifdef _MSC_VER560 template<> [FORCE_INLINE] bool [IsFinite<long double>](long double value) { return _finite((double)value) != 0; }561 [FORCE_INLINE] bool [IsInf](long double value) { return [IsInf]((double)value); }562 [FORCE_INLINE] bool [IsNan](long double value) { return [IsNan]((double)value); }563 #elif !defined(EMSCRIPTEN) // long double is not supported.564 565 566 567 template<> [FORCE_INLINE] bool [IsFinite<long double>](long double value) { return [IsFinite<double>]((double)value); }568 [FORCE_INLINE] bool [IsInf](long double value) { return [IsInf]((double)value); }569 [FORCE_INLINE] bool [IsNan](long double value) { return [IsNan]((double)value); }570 #endif571 572 573 char *[SerializeFloat](float f, char *dstStr);574 575 576 577 578 579 float [DeserializeFloat](const char *str, const char **outEndStr = 0);580 581 582 583 584 585 double [DeserializeDouble](const char *str, const char **outEndStr = 0);586 587 588 #define MATH_SKIP_WORD(str, word) if (!strncmp(str, word, strlen(word))) str += strlen(word);589 #define MATH_NEXT_WORD_IS(str, word) !strncmp(str, word, strlen(word))590 591 [MATH_END_NAMESPACE] Go back to previous page