23 #ifndef LSST_SPHGEOM_Q3CPIXELIZATIONIMPL_H_ 
   24 #define LSST_SPHGEOM_Q3CPIXELIZATIONIMPL_H_ 
   31 #if defined(NO_SIMD) || !defined(__x86_64__) 
   34     #include <x86intrin.h> 
   46 double const ST_MAX[31] = {
 
   82 double const GRID_SCALE[31] = {
 
  118 double const FACE_SCALE[31] = {
 
  142     2.384185791015625e-7,
 
  143     1.1920928955078125e-7,
 
  144     5.9604644775390625e-8,
 
  145     2.98023223876953125e-8,
 
  146     1.490116119384765625e-8,
 
  147     7.450580596923828125e-9,
 
  148     3.7252902984619140625e-9,
 
  149     1.86264514923095703125e-9,
 
  224 #if defined(NO_SIMD) || !defined(__x86_64__) 
  226     int faceNumber(UnitVector3d 
const & v, uint8_t 
const (&faceNumbers)[64]) {
 
  227         int index = ((v.x() >  v.y()) << 5) +
 
  228                     ((v.x() > -v.y()) << 4) +
 
  229                     ((v.x() >  v.z()) << 3) +
 
  230                     ((v.x() > -v.z()) << 2) +
 
  231                     ((v.y() >  v.z()) << 1) +
 
  233         return faceNumbers[index];
 
  236     UnitVector3d faceToSphere(
int face,
 
  239                               uint8_t 
const (&faceComponents)[6][4],
 
  240                               double const (&faceConstants)[6][4])
 
  243         double d = u * u + v * v;
 
  245         p[faceComponents[face][0]] = (u * faceConstants[face][0]) / n;
 
  246         p[faceComponents[face][1]] = (v * faceConstants[face][1]) / n;
 
  247         p[faceComponents[face][2]] = faceConstants[face][2] / n;
 
  252         double const gridScale = GRID_SCALE[level];
 
  253         double const stMax = ST_MAX[level];
 
  254         double s = u * gridScale + gridScale;
 
  255         double t = v * gridScale + gridScale;
 
  259                                static_cast<int32_t
>(t));
 
  263         double const faceScale = FACE_SCALE[level];
 
  265                                static_cast<double>(t) * faceScale - 1.0);
 
  270             u * (1.3333333333333333 - 0.3333333333333333 * 
std::fabs(u)),
 
  271             v * (1.3333333333333333 - 0.3333333333333333 * 
std::fabs(v))
 
  284     int faceNumber(UnitVector3d 
const & v, uint8_t 
const (&faceNumbers)[64]) {
 
  285         __m128d 
const m00 = _mm_set_pd(0.0, -0.0);
 
  286         __m128d xx = _mm_set1_pd(v.x());
 
  287         __m128d yy = _mm_set1_pd(v.y());
 
  288         __m128d zz = _mm_set1_pd(v.z());
 
  289         __m128d myy = _mm_xor_pd(yy, m00);
 
  290         __m128d mzz = _mm_xor_pd(zz, m00);
 
  291         int index = (_mm_movemask_pd(_mm_cmpgt_pd(xx, myy)) << 4) +
 
  292                     (_mm_movemask_pd(_mm_cmpgt_pd(xx, mzz)) << 2) +
 
  293                      _mm_movemask_pd(_mm_cmpgt_pd(yy, mzz));
 
  294         return faceNumbers[index];
 
  297     UnitVector3d faceToSphere(
int face,
 
  299                               uint8_t 
const (&faceComponents)[6][4],
 
  300                               double const (&faceConstants)[6][4])
 
  303         __m128d 
norm = _mm_mul_pd(uv, uv);
 
  308                 _mm_add_sd(
norm, _mm_unpackhi_pd(
norm, _mm_setzero_pd()))
 
  311         __m128d 
w = _mm_set_sd(faceConstants[face][2]);
 
  314             _mm_div_pd(uv, _mm_shuffle_pd(
norm, 
norm, 0)),
 
  315             _mm_load_pd(&faceConstants[face][0])
 
  317         _mm_store_sd(&p[faceComponents[face][0]], uv);
 
  318         _mm_storeh_pd(&p[faceComponents[face][1]], uv);
 
  319         _mm_store_sd(&p[faceComponents[face][2]], 
w);
 
  323     __m128i faceToGrid(
int level, __m128d uv) {
 
  324         __m128d 
const gridScale = _mm_set1_pd(GRID_SCALE[level]);
 
  325         __m128d 
const stMax = _mm_set1_pd(ST_MAX[level]);
 
  326         __m128d st = _mm_add_pd(_mm_mul_pd(uv, gridScale), gridScale);
 
  327         st = _mm_min_pd(_mm_max_pd(st, _mm_setzero_pd()), stMax);
 
  328         return _mm_shuffle_epi32(_mm_cvttpd_epi32(st), 0x98);
 
  331     __m128d gridToFace(
int level, __m128i st) {
 
  332         __m128d 
const faceScale = _mm_set1_pd(FACE_SCALE[level]);
 
  333         __m128d xy = _mm_cvtepi32_pd(_mm_shuffle_epi32(st, 8));
 
  334         return _mm_sub_pd(_mm_mul_pd(xy, faceScale), _mm_set1_pd(1.0));
 
  337     __m128d atanApprox(__m128d uv) {
 
  338         __m128d abs_uv = _mm_andnot_pd(_mm_set_pd(-0.0, -0.0), uv);
 
  342                 _mm_set1_pd(1.3333333333333333),
 
  343                 _mm_mul_pd(_mm_set1_pd(0.3333333333333333), abs_uv)
 
  348     __m128d atanApproxInverse(__m128d uv) {
 
  349         __m128d signbits = _mm_and_pd(uv, _mm_set_pd(-0.0, -0.0));
 
  350         __m128d abs_uv = _mm_andnot_pd(_mm_set_pd(-0.0, -0.0), uv);
 
  351         __m128d tmp = _mm_sub_pd(_mm_set1_pd(4.0),
 
  352                                  _mm_mul_pd(_mm_set1_pd(3.0), abs_uv));
 
  353         uv = _mm_sub_pd(_mm_set1_pd(2.0), _mm_sqrt_pd(tmp));
 
  354         return _mm_or_pd(signbits, uv);
 
  362 #endif // LSST_SPHGEOM_Q3CPIXELIZATIONIMPL_H_