LSST Applications g0f08755f38+82efc23009,g12f32b3c4e+e7bdf1200e,g1653933729+a8ce1bb630,g1a0ca8cf93+50eff2b06f,g28da252d5a+52db39f6a5,g2bbee38e9b+37c5a29d61,g2bc492864f+37c5a29d61,g2cdde0e794+c05ff076ad,g3156d2b45e+41e33cbcdc,g347aa1857d+37c5a29d61,g35bb328faa+a8ce1bb630,g3a166c0a6a+37c5a29d61,g3e281a1b8c+fb992f5633,g414038480c+7f03dfc1b0,g41af890bb2+11b950c980,g5fbc88fb19+17cd334064,g6b1c1869cb+12dd639c9a,g781aacb6e4+a8ce1bb630,g80478fca09+72e9651da0,g82479be7b0+04c31367b4,g858d7b2824+82efc23009,g9125e01d80+a8ce1bb630,g9726552aa6+8047e3811d,ga5288a1d22+e532dc0a0b,gae0086650b+a8ce1bb630,gb58c049af0+d64f4d3760,gc28159a63d+37c5a29d61,gcf0d15dbbd+2acd6d4d48,gd7358e8bfb+778a810b6e,gda3e153d99+82efc23009,gda6a2b7d83+2acd6d4d48,gdaeeff99f8+1711a396fd,ge2409df99d+6b12de1076,ge79ae78c31+37c5a29d61,gf0baf85859+d0a5978c5a,gf3967379c6+4954f8c433,gfb92a5be7c+82efc23009,gfec2e1e490+2aaed99252,w.2024.46
LSST Data Management Base Package
Loading...
Searching...
No Matches
Q3cPixelizationImpl.h
Go to the documentation of this file.
1/*
2 * This file is part of sphgeom.
3 *
4 * Developed for the LSST Data Management System.
5 * This product includes software developed by the LSST Project
6 * (http://www.lsst.org).
7 * See the COPYRIGHT file at the top-level directory of this distribution
8 * for details of code ownership.
9 *
10 * This software is dual licensed under the GNU General Public License and also
11 * under a 3-clause BSD license. Recipients may choose which of these licenses
12 * to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
13 * respectively. If you choose the GPL option then the following text applies
14 * (but note that there is still no warranty even if you opt for BSD instead):
15 *
16 * This program is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program. If not, see <http://www.gnu.org/licenses/>.
28 */
29
30#ifndef LSST_SPHGEOM_Q3CPIXELIZATIONIMPL_H_
31#define LSST_SPHGEOM_Q3CPIXELIZATIONIMPL_H_
32
36
37#include <cstdint>
38#if defined(NO_SIMD) || !defined(__x86_64__)
39 #include <tuple>
40#else
41 #include <x86intrin.h>
42#endif
43
45
46
47namespace lsst {
48namespace sphgeom {
49namespace {
50
51// LUT that provides the maximum grid coordinate value M in terms of
52// the subdivision level L; M = 2^L - 1.
53double const ST_MAX[31] = {
54 0.0,
55 1.0,
56 3.0,
57 7.0,
58 15.0,
59 31.0,
60 63.0,
61 127.0,
62 255.0,
63 511.0,
64 1023.0,
65 2047.0,
66 4095.0,
67 8191.0,
68 16383.0,
69 32767.0,
70 65535.0,
71 131071.0,
72 262143.0,
73 524287.0,
74 1048575.0,
75 2097151.0,
76 4194303.0,
77 8388607.0,
78 16777215.0,
79 33554431.0,
80 67108863.0,
81 134217727.0,
82 268435455.0,
83 536870911.0,
84 1073741823.0
85};
86
87// LUT that provides the face to grid coordinate scaling factor F in terms of
88// the subdivision level L; F = 2^(L - 1).
89double const GRID_SCALE[31] = {
90 0.5,
91 1.0,
92 2.0,
93 4.0,
94 8.0,
95 16.0,
96 32.0,
97 64.0,
98 128.0,
99 256.0,
100 512.0,
101 1024.0,
102 2048.0,
103 4096.0,
104 8192.0,
105 16384.0,
106 32768.0,
107 65536.0,
108 131072.0,
109 262144.0,
110 524288.0,
111 1048576.0,
112 2097152.0,
113 4194304.0,
114 8388608.0,
115 16777216.0,
116 33554432.0,
117 67108864.0,
118 134217728.0,
119 268435456.0,
120 536870912.0
121};
122
123// LUT that provides the grid to face coordinate scaling factor F in terms of
124// the subdivision level L; F = 2^(1 - L).
125double const FACE_SCALE[31] = {
126 2.0,
127 1.0,
128 0.5,
129 0.25,
130 0.125,
131 0.0625,
132 0.03125,
133 0.015625,
134 0.0078125,
135 0.00390625,
136 0.001953125,
137 0.0009765625,
138 0.00048828125,
139 0.000244140625,
140 0.0001220703125,
141 6.103515625e-5,
142 3.0517578125e-5,
143 1.52587890625e-5,
144 7.62939453125e-6,
145 3.814697265625e-6,
146 1.9073486328125e-6,
147 9.5367431640625e-7,
148 4.76837158203125e-7,
149 2.384185791015625e-7,
150 1.1920928955078125e-7,
151 5.9604644775390625e-8,
152 2.98023223876953125e-8,
153 1.490116119384765625e-8,
154 7.450580596923828125e-9,
155 3.7252902984619140625e-9,
156 1.86264514923095703125e-9,
157};
158
159// The functions below use a number of additional lookup tables
160// for performance.
161//
162// The first LUT, faceNumbers, is used to find the cube face that a unit
163// vector p belongs to. In particular, component comparisons are used to
164// build an index into the LUT. That index is computed as follows:
165//
166// int index = ((p.x() > p.y()) << 5) +
167// ((p.x() > -p.y()) << 4) +
168// ((p.x() > p.z()) << 3) +
169// ((p.x() > -p.z()) << 2) +
170// ((p.y() > p.z()) << 1) +
171// (p.y() > -p.z());
172//
173// and the Q3C (or modified-Q3C) face number is just:
174//
175// int face = faceNumbers[index];
176//
177// Compare this to something like:
178//
179// int face;
180// double a;
181// if (p.x() > -p.y()) {
182// if (p.x() > p.y()) {
183// face = 1;
184// a = p.x();
185// } else {
186// face = 2;
187// a = p.y();
188// }
189// } else {
190// if (p.x() > p.y()) {
191// face = 4;
192// a = -p.y();
193// } else {
194// face = 3;
195// a = -p.x();
196// }
197// }
198// if (p.z() > a) {
199// face = 0;
200// } else if (p.z() < -a) {
201// face = 5;
202// }
203//
204// In the latter case, the comparisons performed depend on the data, and the
205// branches involved will be hard to predict for random input. The LUT-based
206// approach allows comparisons to be performed in parallel without speculation
207// by introducing some redundant computation. It replaces control dependencies
208// (branching) with data dependencies.
209//
210// The second LUT, faceComponents, maps from a face number F (0-5) to a
211// 4-element array A that contains the following quantities for a unit vector
212// V belonging to F:
213//
214// A[0]: index of the component of V corresponding to the face u coordinate
215// A[1]: index of the component of V corresponding to the face v coordinate
216// A[2]: index of the component of V with maximum absolute value
217// A[3]: unused (padding)
218//
219// Finally, the third LUT, faceConstants, maps from a face number F (0-5) to
220// an array of doubles A that contains the following quantities for a unit
221// vector V belonging to F:
222//
223// A[0]: scaling factor (±1) that, when multiplied by the component of V with
224// index faceComponents[face][0], yields the u coordinate of V
225// A[1]: scaling factor (±1) that, when multiplied by the component of V with
226// index faceComponents[face][1], yields the v coordinate of V
227// A[2]: value (±1) that has the same sign as the component of V with
228// maximum absolute value
229// A[3]: unused (padding)
230
231#if defined(NO_SIMD) || !defined(__x86_64__)
232
233 int faceNumber(UnitVector3d const & v, std::uint8_t const (&faceNumbers)[64]) {
234 int index = ((v.x() > v.y()) << 5) +
235 ((v.x() > -v.y()) << 4) +
236 ((v.x() > v.z()) << 3) +
237 ((v.x() > -v.z()) << 2) +
238 ((v.y() > v.z()) << 1) +
239 (v.y() > -v.z());
240 return faceNumbers[index];
241 }
242
243 UnitVector3d faceToSphere(int face,
244 double u,
245 double v,
246 std::uint8_t const (&faceComponents)[6][4],
247 double const (&faceConstants)[6][4])
248 {
249 double p[3];
250 double d = u * u + v * v;
251 double n = std::sqrt(1.0 + d);
252 p[faceComponents[face][0]] = (u * faceConstants[face][0]) / n;
253 p[faceComponents[face][1]] = (v * faceConstants[face][1]) / n;
254 p[faceComponents[face][2]] = faceConstants[face][2] / n;
255 return UnitVector3d::fromNormalized(p[0], p[1], p[2]);
256 }
257
258 std::tuple<std::int32_t, std::int32_t> faceToGrid(int level, double u, double v) {
259 double const gridScale = GRID_SCALE[level];
260 double const stMax = ST_MAX[level];
261 double s = u * gridScale + gridScale;
262 double t = v * gridScale + gridScale;
263 s = std::max(std::min(s, stMax), 0.0);
264 t = std::max(std::min(t, stMax), 0.0);
265 return std::make_tuple(static_cast<std::int32_t>(s),
266 static_cast<std::int32_t>(t));
267 }
268
269 std::tuple<double, double> gridToFace(int level, std::int32_t s, std::int32_t t) {
270 double const faceScale = FACE_SCALE[level];
271 return std::make_tuple(static_cast<double>(s) * faceScale - 1.0,
272 static_cast<double>(t) * faceScale - 1.0);
273 }
274
275 std::tuple<double, double> atanApprox(double u, double v) {
276 return std::make_tuple(
277 u * (1.3333333333333333 - 0.3333333333333333 * std::fabs(u)),
278 v * (1.3333333333333333 - 0.3333333333333333 * std::fabs(v))
279 );
280 }
281
282 std::tuple<double, double> atanApproxInverse(double u, double v) {
283 return std::make_tuple(
284 std::copysign(2.0 - std::sqrt(4.0 - 3.0 * std::fabs(u)), u),
285 std::copysign(2.0 - std::sqrt(4.0 - 3.0 * std::fabs(v)), v)
286 );
287 }
288
289#else
290
291 int faceNumber(UnitVector3d const & v, std::uint8_t const (&faceNumbers)[64]) {
292 __m128d const m00 = _mm_set_pd(0.0, -0.0);
293 __m128d xx = _mm_set1_pd(v.x());
294 __m128d yy = _mm_set1_pd(v.y());
295 __m128d zz = _mm_set1_pd(v.z());
296 __m128d myy = _mm_xor_pd(yy, m00);
297 __m128d mzz = _mm_xor_pd(zz, m00);
298 int index = (_mm_movemask_pd(_mm_cmpgt_pd(xx, myy)) << 4) +
299 (_mm_movemask_pd(_mm_cmpgt_pd(xx, mzz)) << 2) +
300 _mm_movemask_pd(_mm_cmpgt_pd(yy, mzz));
301 return faceNumbers[index];
302 }
303
304 UnitVector3d faceToSphere(int face,
305 __m128d uv,
306 std::uint8_t const (&faceComponents)[6][4],
307 double const (&faceConstants)[6][4])
308 {
309 double p[3];
310 __m128d norm = _mm_mul_pd(uv, uv);
311 norm = _mm_sqrt_sd(
312 _mm_setzero_pd(),
313 _mm_add_sd(
314 _mm_set_sd(1.0),
315 _mm_add_sd(norm, _mm_unpackhi_pd(norm, _mm_setzero_pd()))
316 )
317 );
318 __m128d w = _mm_set_sd(faceConstants[face][2]);
319 w = _mm_div_sd(w, norm);
320 uv = _mm_mul_pd(
321 _mm_div_pd(uv, _mm_shuffle_pd(norm, norm, 0)),
322 _mm_load_pd(&faceConstants[face][0])
323 );
324 _mm_store_sd(&p[faceComponents[face][0]], uv);
325 _mm_storeh_pd(&p[faceComponents[face][1]], uv);
326 _mm_store_sd(&p[faceComponents[face][2]], w);
327 return UnitVector3d::fromNormalized(p[0], p[1], p[2]);
328 }
329
330 __m128i faceToGrid(int level, __m128d uv) {
331 __m128d const gridScale = _mm_set1_pd(GRID_SCALE[level]);
332 __m128d const stMax = _mm_set1_pd(ST_MAX[level]);
333 __m128d st = _mm_add_pd(_mm_mul_pd(uv, gridScale), gridScale);
334 st = _mm_min_pd(_mm_max_pd(st, _mm_setzero_pd()), stMax);
335 return _mm_shuffle_epi32(_mm_cvttpd_epi32(st), 0x98);
336 }
337
338 __m128d gridToFace(int level, __m128i st) {
339 __m128d const faceScale = _mm_set1_pd(FACE_SCALE[level]);
340 __m128d xy = _mm_cvtepi32_pd(_mm_shuffle_epi32(st, 8));
341 return _mm_sub_pd(_mm_mul_pd(xy, faceScale), _mm_set1_pd(1.0));
342 }
343
344 __m128d atanApprox(__m128d uv) {
345 __m128d abs_uv = _mm_andnot_pd(_mm_set_pd(-0.0, -0.0), uv);
346 return _mm_mul_pd(
347 uv,
348 _mm_sub_pd(
349 _mm_set1_pd(1.3333333333333333),
350 _mm_mul_pd(_mm_set1_pd(0.3333333333333333), abs_uv)
351 )
352 );
353 }
354
355 __m128d atanApproxInverse(__m128d uv) {
356 __m128d signbits = _mm_and_pd(uv, _mm_set_pd(-0.0, -0.0));
357 __m128d abs_uv = _mm_andnot_pd(_mm_set_pd(-0.0, -0.0), uv);
358 __m128d tmp = _mm_sub_pd(_mm_set1_pd(4.0),
359 _mm_mul_pd(_mm_set1_pd(3.0), abs_uv));
360 uv = _mm_sub_pd(_mm_set1_pd(2.0), _mm_sqrt_pd(tmp));
361 return _mm_or_pd(signbits, uv);
362 }
363
364#endif
365
366} // unnamed namespace
367}} // namespace lsst::sphgeom
368
369#endif // LSST_SPHGEOM_Q3CPIXELIZATIONIMPL_H_
This file declares a class for representing unit vectors in ℝ³.
static UnitVector3d fromNormalized(Vector3d const &v)
fromNormalized returns the unit vector equal to v, which is assumed to be normalized.
T copysign(T... args)
T fabs(T... args)
T make_tuple(T... args)
T max(T... args)
T min(T... args)
T sqrt(T... args)
double w
Definition CoaddPsf.cc:70