1 #ifndef VARIANT2_INT_SQRT_H 2 #define VARIANT2_INT_SQRT_H 7 #define VARIANT2_INTEGER_MATH_SQRT_STEP_SSE2() \ 9 const __m128i exp_double_bias = _mm_set_epi64x(0, 1023ULL << 52); \ 10 __m128d x = _mm_castsi128_pd(_mm_add_epi64(_mm_cvtsi64_si128(sqrt_input >> 12), exp_double_bias)); \ 11 x = _mm_sqrt_sd(_mm_setzero_pd(), x); \ 12 sqrt_result = (uint64_t)(_mm_cvtsi128_si64(_mm_sub_epi64(_mm_castpd_si128(x), exp_double_bias))) >> 19; \ 15 #define VARIANT2_INTEGER_MATH_SQRT_STEP_FP64() \ 17 sqrt_result = sqrt(sqrt_input + 18446744073709551616.0) * 2.0 - 8589934592.0; \ 20 #define VARIANT2_INTEGER_MATH_SQRT_STEP_REF() \ 21 sqrt_result = integer_square_root_v2(sqrt_input) 51 for (
uint64_t bit = 1ULL << 60; bit; bit >>= 2)
53 const bool b = (n < r + bit);
54 const uint64_t n_next = n - (r + bit);
61 return r * 2 + ((n > r) ? 1 : 0);
155 #define VARIANT2_INTEGER_MATH_SQRT_FIXUP(r) \ 157 const uint64_t s = r >> 1; \ 158 const uint64_t b = r & 1; \ 159 const uint64_t r2 = (uint64_t)(s) * (s + b) + (r << 32); \ 160 r += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \
unsigned __int64 uint64_t