LSST Applications g070148d5b3+33e5256705,g0d53e28543+25c8b88941,g0da5cf3356+2dd1178308,g1081da9e2a+62d12e78cb,g17e5ecfddb+7e422d6136,g1c76d35bf8+ede3a706f7,g295839609d+225697d880,g2e2c1a68ba+cc1f6f037e,g2ffcdf413f+853cd4dcde,g38293774b4+62d12e78cb,g3b44f30a73+d953f1ac34,g48ccf36440+885b902d19,g4b2f1765b6+7dedbde6d2,g5320a0a9f6+0c5d6105b6,g56b687f8c9+ede3a706f7,g5c4744a4d9+ef6ac23297,g5ffd174ac0+0c5d6105b6,g6075d09f38+66af417445,g667d525e37+2ced63db88,g670421136f+2ced63db88,g71f27ac40c+2ced63db88,g774830318a+463cbe8d1f,g7876bc68e5+1d137996f1,g7985c39107+62d12e78cb,g7fdac2220c+0fd8241c05,g96f01af41f+368e6903a7,g9ca82378b8+2ced63db88,g9d27549199+ef6ac23297,gabe93b2c52+e3573e3735,gb065e2a02a+3dfbe639da,gbc3249ced9+0c5d6105b6,gbec6a3398f+0c5d6105b6,gc9534b9d65+35b9f25267,gd01420fc67+0c5d6105b6,geee7ff78d7+a14128c129,gf63283c776+ede3a706f7,gfed783d017+0c5d6105b6,w.2022.47
LSST Data Management Base Package
Loading...
Searching...
No Matches
Q3cPixelizationImpl.h
Go to the documentation of this file.
1/*
2 * LSST Data Management System
3 * Copyright 2016 AURA/LSST.
4 *
5 * This product includes software developed by the
6 * LSST Project (http://www.lsst.org/).
7 *
8 * This program is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the LSST License Statement and
19 * the GNU General Public License along with this program. If not,
20 * see <https://www.lsstcorp.org/LegalNotices/>.
21 */
22
23#ifndef LSST_SPHGEOM_Q3CPIXELIZATIONIMPL_H_
24#define LSST_SPHGEOM_Q3CPIXELIZATIONIMPL_H_
25
29
30#include <cstdint>
31#if defined(NO_SIMD) || !defined(__x86_64__)
32 #include <tuple>
33#else
34 #include <x86intrin.h>
35#endif
36
38
39
40namespace lsst {
41namespace sphgeom {
42namespace {
43
44// LUT that provides the maximum grid coordinate value M in terms of
45// the subdivision level L; M = 2^L - 1.
46double const ST_MAX[31] = {
47 0.0,
48 1.0,
49 3.0,
50 7.0,
51 15.0,
52 31.0,
53 63.0,
54 127.0,
55 255.0,
56 511.0,
57 1023.0,
58 2047.0,
59 4095.0,
60 8191.0,
61 16383.0,
62 32767.0,
63 65535.0,
64 131071.0,
65 262143.0,
66 524287.0,
67 1048575.0,
68 2097151.0,
69 4194303.0,
70 8388607.0,
71 16777215.0,
72 33554431.0,
73 67108863.0,
74 134217727.0,
75 268435455.0,
76 536870911.0,
77 1073741823.0
78};
79
80// LUT that provides the face to grid coordinate scaling factor F in terms of
81// the subdivision level L; F = 2^(L - 1).
82double const GRID_SCALE[31] = {
83 0.5,
84 1.0,
85 2.0,
86 4.0,
87 8.0,
88 16.0,
89 32.0,
90 64.0,
91 128.0,
92 256.0,
93 512.0,
94 1024.0,
95 2048.0,
96 4096.0,
97 8192.0,
98 16384.0,
99 32768.0,
100 65536.0,
101 131072.0,
102 262144.0,
103 524288.0,
104 1048576.0,
105 2097152.0,
106 4194304.0,
107 8388608.0,
108 16777216.0,
109 33554432.0,
110 67108864.0,
111 134217728.0,
112 268435456.0,
113 536870912.0
114};
115
116// LUT that provides the grid to face coordinate scaling factor F in terms of
117// the subdivision level L; F = 2^(1 - L).
118double const FACE_SCALE[31] = {
119 2.0,
120 1.0,
121 0.5,
122 0.25,
123 0.125,
124 0.0625,
125 0.03125,
126 0.015625,
127 0.0078125,
128 0.00390625,
129 0.001953125,
130 0.0009765625,
131 0.00048828125,
132 0.000244140625,
133 0.0001220703125,
134 6.103515625e-5,
135 3.0517578125e-5,
136 1.52587890625e-5,
137 7.62939453125e-6,
138 3.814697265625e-6,
139 1.9073486328125e-6,
140 9.5367431640625e-7,
141 4.76837158203125e-7,
142 2.384185791015625e-7,
143 1.1920928955078125e-7,
144 5.9604644775390625e-8,
145 2.98023223876953125e-8,
146 1.490116119384765625e-8,
147 7.450580596923828125e-9,
148 3.7252902984619140625e-9,
149 1.86264514923095703125e-9,
150};
151
152// The functions below use a number of additional lookup tables
153// for performance.
154//
155// The first LUT, faceNumbers, is used to find the cube face that a unit
156// vector p belongs to. In particular, component comparisons are used to
157// build an index into the LUT. That index is computed as follows:
158//
159// int index = ((p.x() > p.y()) << 5) +
160// ((p.x() > -p.y()) << 4) +
161// ((p.x() > p.z()) << 3) +
162// ((p.x() > -p.z()) << 2) +
163// ((p.y() > p.z()) << 1) +
164// (p.y() > -p.z());
165//
166// and the Q3C (or modified-Q3C) face number is just:
167//
168// int face = faceNumbers[index];
169//
170// Compare this to something like:
171//
172// int face;
173// double a;
174// if (p.x() > -p.y()) {
175// if (p.x() > p.y()) {
176// face = 1;
177// a = p.x();
178// } else {
179// face = 2;
180// a = p.y();
181// }
182// } else {
183// if (p.x() > p.y()) {
184// face = 4;
185// a = -p.y();
186// } else {
187// face = 3;
188// a = -p.x();
189// }
190// }
191// if (p.z() > a) {
192// face = 0;
193// } else if (p.z() < -a) {
194// face = 5;
195// }
196//
197// In the latter case, the comparisons performed depend on the data, and the
198// branches involved will be hard to predict for random input. The LUT-based
199// approach allows comparisons to be performed in parallel without speculation
200// by introducing some redundant computation. It replaces control dependencies
201// (branching) with data dependencies.
202//
203// The second LUT, faceComponents, maps from a face number F (0-5) to a
204// 4-element array A that contains the following quantities for a unit vector
205// V belonging to F:
206//
207// A[0]: index of the component of V corresponding to the face u coordinate
208// A[1]: index of the component of V corresponding to the face v coordinate
209// A[2]: index of the component of V with maximum absolute value
210// A[3]: unused (padding)
211//
212// Finally, the third LUT, faceConstants, maps from a face number F (0-5) to
213// an array of doubles A that contains the following quantities for a unit
214// vector V belonging to F:
215//
216// A[0]: scaling factor (±1) that, when multiplied by the component of V with
217// index faceComponents[face][0], yields the u coordinate of V
218// A[1]: scaling factor (±1) that, when multiplied by the component of V with
219// index faceComponents[face][1], yields the v coordinate of V
220// A[2]: value (±1) that has the same sign as the component of V with
221// maximum absolute value
222// A[3]: unused (padding)
223
224#if defined(NO_SIMD) || !defined(__x86_64__)
225
226 int faceNumber(UnitVector3d const & v, uint8_t const (&faceNumbers)[64]) {
227 int index = ((v.x() > v.y()) << 5) +
228 ((v.x() > -v.y()) << 4) +
229 ((v.x() > v.z()) << 3) +
230 ((v.x() > -v.z()) << 2) +
231 ((v.y() > v.z()) << 1) +
232 (v.y() > -v.z());
233 return faceNumbers[index];
234 }
235
236 UnitVector3d faceToSphere(int face,
237 double u,
238 double v,
239 uint8_t const (&faceComponents)[6][4],
240 double const (&faceConstants)[6][4])
241 {
242 double p[3];
243 double d = u * u + v * v;
244 double n = std::sqrt(1.0 + d);
245 p[faceComponents[face][0]] = (u * faceConstants[face][0]) / n;
246 p[faceComponents[face][1]] = (v * faceConstants[face][1]) / n;
247 p[faceComponents[face][2]] = faceConstants[face][2] / n;
248 return UnitVector3d::fromNormalized(p[0], p[1], p[2]);
249 }
250
251 std::tuple<int32_t, int32_t> faceToGrid(int level, double u, double v) {
252 double const gridScale = GRID_SCALE[level];
253 double const stMax = ST_MAX[level];
254 double s = u * gridScale + gridScale;
255 double t = v * gridScale + gridScale;
256 s = std::max(std::min(s, stMax), 0.0);
257 t = std::max(std::min(t, stMax), 0.0);
258 return std::make_tuple(static_cast<int32_t>(s),
259 static_cast<int32_t>(t));
260 }
261
262 std::tuple<double, double> gridToFace(int level, int32_t s, int32_t t) {
263 double const faceScale = FACE_SCALE[level];
264 return std::make_tuple(static_cast<double>(s) * faceScale - 1.0,
265 static_cast<double>(t) * faceScale - 1.0);
266 }
267
268 std::tuple<double, double> atanApprox(double u, double v) {
269 return std::make_tuple(
270 u * (1.3333333333333333 - 0.3333333333333333 * std::fabs(u)),
271 v * (1.3333333333333333 - 0.3333333333333333 * std::fabs(v))
272 );
273 }
274
275 std::tuple<double, double> atanApproxInverse(double u, double v) {
276 return std::make_tuple(
277 std::copysign(2.0 - std::sqrt(4.0 - 3.0 * std::fabs(u)), u),
278 std::copysign(2.0 - std::sqrt(4.0 - 3.0 * std::fabs(v)), v)
279 );
280 }
281
282#else
283
284 int faceNumber(UnitVector3d const & v, uint8_t const (&faceNumbers)[64]) {
285 __m128d const m00 = _mm_set_pd(0.0, -0.0);
286 __m128d xx = _mm_set1_pd(v.x());
287 __m128d yy = _mm_set1_pd(v.y());
288 __m128d zz = _mm_set1_pd(v.z());
289 __m128d myy = _mm_xor_pd(yy, m00);
290 __m128d mzz = _mm_xor_pd(zz, m00);
291 int index = (_mm_movemask_pd(_mm_cmpgt_pd(xx, myy)) << 4) +
292 (_mm_movemask_pd(_mm_cmpgt_pd(xx, mzz)) << 2) +
293 _mm_movemask_pd(_mm_cmpgt_pd(yy, mzz));
294 return faceNumbers[index];
295 }
296
297 UnitVector3d faceToSphere(int face,
298 __m128d uv,
299 uint8_t const (&faceComponents)[6][4],
300 double const (&faceConstants)[6][4])
301 {
302 double p[3];
303 __m128d norm = _mm_mul_pd(uv, uv);
304 norm = _mm_sqrt_sd(
305 _mm_setzero_pd(),
306 _mm_add_sd(
307 _mm_set_sd(1.0),
308 _mm_add_sd(norm, _mm_unpackhi_pd(norm, _mm_setzero_pd()))
309 )
310 );
311 __m128d w = _mm_set_sd(faceConstants[face][2]);
312 w = _mm_div_sd(w, norm);
313 uv = _mm_mul_pd(
314 _mm_div_pd(uv, _mm_shuffle_pd(norm, norm, 0)),
315 _mm_load_pd(&faceConstants[face][0])
316 );
317 _mm_store_sd(&p[faceComponents[face][0]], uv);
318 _mm_storeh_pd(&p[faceComponents[face][1]], uv);
319 _mm_store_sd(&p[faceComponents[face][2]], w);
320 return UnitVector3d::fromNormalized(p[0], p[1], p[2]);
321 }
322
323 __m128i faceToGrid(int level, __m128d uv) {
324 __m128d const gridScale = _mm_set1_pd(GRID_SCALE[level]);
325 __m128d const stMax = _mm_set1_pd(ST_MAX[level]);
326 __m128d st = _mm_add_pd(_mm_mul_pd(uv, gridScale), gridScale);
327 st = _mm_min_pd(_mm_max_pd(st, _mm_setzero_pd()), stMax);
328 return _mm_shuffle_epi32(_mm_cvttpd_epi32(st), 0x98);
329 }
330
331 __m128d gridToFace(int level, __m128i st) {
332 __m128d const faceScale = _mm_set1_pd(FACE_SCALE[level]);
333 __m128d xy = _mm_cvtepi32_pd(_mm_shuffle_epi32(st, 8));
334 return _mm_sub_pd(_mm_mul_pd(xy, faceScale), _mm_set1_pd(1.0));
335 }
336
337 __m128d atanApprox(__m128d uv) {
338 __m128d abs_uv = _mm_andnot_pd(_mm_set_pd(-0.0, -0.0), uv);
339 return _mm_mul_pd(
340 uv,
341 _mm_sub_pd(
342 _mm_set1_pd(1.3333333333333333),
343 _mm_mul_pd(_mm_set1_pd(0.3333333333333333), abs_uv)
344 )
345 );
346 }
347
348 __m128d atanApproxInverse(__m128d uv) {
349 __m128d signbits = _mm_and_pd(uv, _mm_set_pd(-0.0, -0.0));
350 __m128d abs_uv = _mm_andnot_pd(_mm_set_pd(-0.0, -0.0), uv);
351 __m128d tmp = _mm_sub_pd(_mm_set1_pd(4.0),
352 _mm_mul_pd(_mm_set1_pd(3.0), abs_uv));
353 uv = _mm_sub_pd(_mm_set1_pd(2.0), _mm_sqrt_pd(tmp));
354 return _mm_or_pd(signbits, uv);
355 }
356
357#endif
358
359} // unnamed namespace
360}} // namespace lsst::sphgeom
361
362#endif // LSST_SPHGEOM_Q3CPIXELIZATIONIMPL_H_
This file declares a class for representing unit vectors in ℝ³.
static UnitVector3d fromNormalized(Vector3d const &v)
fromNormalized returns the unit vector equal to v, which is assumed to be normalized.
Definition: UnitVector3d.h:82
T copysign(T... args)
T fabs(T... args)
T make_tuple(T... args)
T max(T... args)
T min(T... args)
T norm(const T &x)
Definition: Integrate.h:160
T sqrt(T... args)
double w
Definition: CoaddPsf.cc:69