23 #ifndef LSST_SPHGEOM_Q3CPIXELIZATIONIMPL_H_
24 #define LSST_SPHGEOM_Q3CPIXELIZATIONIMPL_H_
31 #if defined(NO_SIMD) || !defined(__x86_64__)
34 #include <x86intrin.h>
46 double const ST_MAX[31] = {
82 double const GRID_SCALE[31] = {
118 double const FACE_SCALE[31] = {
142 2.384185791015625e-7,
143 1.1920928955078125e-7,
144 5.9604644775390625e-8,
145 2.98023223876953125e-8,
146 1.490116119384765625e-8,
147 7.450580596923828125e-9,
148 3.7252902984619140625e-9,
149 1.86264514923095703125e-9,
224 #if defined(NO_SIMD) || !defined(__x86_64__)
226 int faceNumber(UnitVector3d
const & v, uint8_t
const (&faceNumbers)[64]) {
227 int index = ((v.x() > v.y()) << 5) +
228 ((v.x() > -v.y()) << 4) +
229 ((v.x() > v.z()) << 3) +
230 ((v.x() > -v.z()) << 2) +
231 ((v.y() > v.z()) << 1) +
233 return faceNumbers[index];
236 UnitVector3d faceToSphere(
int face,
239 uint8_t
const (&faceComponents)[6][4],
240 double const (&faceConstants)[6][4])
243 double d = u * u + v * v;
245 p[faceComponents[face][0]] = (u * faceConstants[face][0]) / n;
246 p[faceComponents[face][1]] = (v * faceConstants[face][1]) / n;
247 p[faceComponents[face][2]] = faceConstants[face][2] / n;
252 double const gridScale = GRID_SCALE[level];
253 double const stMax = ST_MAX[level];
254 double s = u * gridScale + gridScale;
255 double t = v * gridScale + gridScale;
259 static_cast<int32_t
>(t));
263 double const faceScale = FACE_SCALE[level];
265 static_cast<double>(t) * faceScale - 1.0);
270 u * (1.3333333333333333 - 0.3333333333333333 *
std::fabs(u)),
271 v * (1.3333333333333333 - 0.3333333333333333 *
std::fabs(v))
284 int faceNumber(UnitVector3d
const & v, uint8_t
const (&faceNumbers)[64]) {
285 __m128d
const m00 = _mm_set_pd(0.0, -0.0);
286 __m128d xx = _mm_set1_pd(v.x());
287 __m128d yy = _mm_set1_pd(v.y());
288 __m128d zz = _mm_set1_pd(v.z());
289 __m128d myy = _mm_xor_pd(yy, m00);
290 __m128d mzz = _mm_xor_pd(zz, m00);
291 int index = (_mm_movemask_pd(_mm_cmpgt_pd(xx, myy)) << 4) +
292 (_mm_movemask_pd(_mm_cmpgt_pd(xx, mzz)) << 2) +
293 _mm_movemask_pd(_mm_cmpgt_pd(yy, mzz));
294 return faceNumbers[index];
297 UnitVector3d faceToSphere(
int face,
299 uint8_t
const (&faceComponents)[6][4],
300 double const (&faceConstants)[6][4])
303 __m128d
norm = _mm_mul_pd(uv, uv);
308 _mm_add_sd(
norm, _mm_unpackhi_pd(
norm, _mm_setzero_pd()))
311 __m128d
w = _mm_set_sd(faceConstants[face][2]);
314 _mm_div_pd(uv, _mm_shuffle_pd(
norm,
norm, 0)),
315 _mm_load_pd(&faceConstants[face][0])
317 _mm_store_sd(&p[faceComponents[face][0]], uv);
318 _mm_storeh_pd(&p[faceComponents[face][1]], uv);
319 _mm_store_sd(&p[faceComponents[face][2]],
w);
323 __m128i faceToGrid(
int level, __m128d uv) {
324 __m128d
const gridScale = _mm_set1_pd(GRID_SCALE[level]);
325 __m128d
const stMax = _mm_set1_pd(ST_MAX[level]);
326 __m128d st = _mm_add_pd(_mm_mul_pd(uv, gridScale), gridScale);
327 st = _mm_min_pd(_mm_max_pd(st, _mm_setzero_pd()), stMax);
328 return _mm_shuffle_epi32(_mm_cvttpd_epi32(st), 0x98);
331 __m128d gridToFace(
int level, __m128i st) {
332 __m128d
const faceScale = _mm_set1_pd(FACE_SCALE[level]);
333 __m128d xy = _mm_cvtepi32_pd(_mm_shuffle_epi32(st, 8));
334 return _mm_sub_pd(_mm_mul_pd(xy, faceScale), _mm_set1_pd(1.0));
337 __m128d atanApprox(__m128d uv) {
338 __m128d abs_uv = _mm_andnot_pd(_mm_set_pd(-0.0, -0.0), uv);
342 _mm_set1_pd(1.3333333333333333),
343 _mm_mul_pd(_mm_set1_pd(0.3333333333333333), abs_uv)
348 __m128d atanApproxInverse(__m128d uv) {
349 __m128d signbits = _mm_and_pd(uv, _mm_set_pd(-0.0, -0.0));
350 __m128d abs_uv = _mm_andnot_pd(_mm_set_pd(-0.0, -0.0), uv);
351 __m128d tmp = _mm_sub_pd(_mm_set1_pd(4.0),
352 _mm_mul_pd(_mm_set1_pd(3.0), abs_uv));
353 uv = _mm_sub_pd(_mm_set1_pd(2.0), _mm_sqrt_pd(tmp));
354 return _mm_or_pd(signbits, uv);
This file declares a class for representing unit vectors in ℝ³.
static UnitVector3d fromNormalized(Vector3d const &v)
fromNormalized returns the unit vector equal to v, which is assumed to be normalized.
A base class for image defects.