LSSTApplications  17.0+11,17.0+34,17.0+56,17.0+57,17.0+59,17.0+7,17.0-1-g377950a+33,17.0.1-1-g114240f+2,17.0.1-1-g4d4fbc4+28,17.0.1-1-g55520dc+49,17.0.1-1-g5f4ed7e+52,17.0.1-1-g6dd7d69+17,17.0.1-1-g8de6c91+11,17.0.1-1-gb9095d2+7,17.0.1-1-ge9fec5e+5,17.0.1-1-gf4e0155+55,17.0.1-1-gfc65f5f+50,17.0.1-1-gfc6fb1f+20,17.0.1-10-g87f9f3f+1,17.0.1-11-ge9de802+16,17.0.1-16-ga14f7d5c+4,17.0.1-17-gc79d625+1,17.0.1-17-gdae4c4a+8,17.0.1-2-g26618f5+29,17.0.1-2-g54f2ebc+9,17.0.1-2-gf403422+1,17.0.1-20-g2ca2f74+6,17.0.1-23-gf3eadeb7+1,17.0.1-3-g7e86b59+39,17.0.1-3-gb5ca14a,17.0.1-3-gd08d533+40,17.0.1-30-g596af8797,17.0.1-4-g59d126d+4,17.0.1-4-gc69c472+5,17.0.1-6-g5afd9b9+4,17.0.1-7-g35889ee+1,17.0.1-7-gc7c8782+18,17.0.1-9-gc4bbfb2+3,w.2019.22
LSSTDataManagementBasePackage
Q3cPixelizationImpl.h
Go to the documentation of this file.
1 /*
2  * LSST Data Management System
3  * Copyright 2016 AURA/LSST.
4  *
5  * This product includes software developed by the
6  * LSST Project (http://www.lsst.org/).
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the LSST License Statement and
19  * the GNU General Public License along with this program. If not,
20  * see <https://www.lsstcorp.org/LegalNotices/>.
21  */
22 
23 #ifndef LSST_SPHGEOM_Q3CPIXELIZATIONIMPL_H_
24 #define LSST_SPHGEOM_Q3CPIXELIZATIONIMPL_H_
25 
29 
30 #include <cstdint>
31 #if defined(NO_SIMD) || !defined(__x86_64__)
32  #include <tuple>
33 #else
34  #include <x86intrin.h>
35 #endif
36 
38 
39 
40 namespace lsst {
41 namespace sphgeom {
42 namespace {
43 
44 // LUT that provides the maximum grid coordinate value M in terms of
45 // the subdivision level L; M = 2^L - 1.
46 double const ST_MAX[31] = {
47  0.0,
48  1.0,
49  3.0,
50  7.0,
51  15.0,
52  31.0,
53  63.0,
54  127.0,
55  255.0,
56  511.0,
57  1023.0,
58  2047.0,
59  4095.0,
60  8191.0,
61  16383.0,
62  32767.0,
63  65535.0,
64  131071.0,
65  262143.0,
66  524287.0,
67  1048575.0,
68  2097151.0,
69  4194303.0,
70  8388607.0,
71  16777215.0,
72  33554431.0,
73  67108863.0,
74  134217727.0,
75  268435455.0,
76  536870911.0,
77  1073741823.0
78 };
79 
80 // LUT that provides the face to grid coordinate scaling factor F in terms of
81 // the subdivision level L; F = 2^(L - 1).
82 double const GRID_SCALE[31] = {
83  0.5,
84  1.0,
85  2.0,
86  4.0,
87  8.0,
88  16.0,
89  32.0,
90  64.0,
91  128.0,
92  256.0,
93  512.0,
94  1024.0,
95  2048.0,
96  4096.0,
97  8192.0,
98  16384.0,
99  32768.0,
100  65536.0,
101  131072.0,
102  262144.0,
103  524288.0,
104  1048576.0,
105  2097152.0,
106  4194304.0,
107  8388608.0,
108  16777216.0,
109  33554432.0,
110  67108864.0,
111  134217728.0,
112  268435456.0,
113  536870912.0
114 };
115 
116 // LUT that provides the grid to face coordinate scaling factor F in terms of
117 // the subdivision level L; F = 2^(1 - L).
118 double const FACE_SCALE[31] = {
119  2.0,
120  1.0,
121  0.5,
122  0.25,
123  0.125,
124  0.0625,
125  0.03125,
126  0.015625,
127  0.0078125,
128  0.00390625,
129  0.001953125,
130  0.0009765625,
131  0.00048828125,
132  0.000244140625,
133  0.0001220703125,
134  6.103515625e-5,
135  3.0517578125e-5,
136  1.52587890625e-5,
137  7.62939453125e-6,
138  3.814697265625e-6,
139  1.9073486328125e-6,
140  9.5367431640625e-7,
141  4.76837158203125e-7,
142  2.384185791015625e-7,
143  1.1920928955078125e-7,
144  5.9604644775390625e-8,
145  2.98023223876953125e-8,
146  1.490116119384765625e-8,
147  7.450580596923828125e-9,
148  3.7252902984619140625e-9,
149  1.86264514923095703125e-9,
150 };
151 
152 // The functions below use a number of additional lookup tables
153 // for performance.
154 //
155 // The first LUT, faceNumbers, is used to find the cube face that a unit
156 // vector p belongs to. In particular, component comparisons are used to
157 // build an index into the LUT. That index is computed as follows:
158 //
159 // int index = ((p.x() > p.y()) << 5) +
160 // ((p.x() > -p.y()) << 4) +
161 // ((p.x() > p.z()) << 3) +
162 // ((p.x() > -p.z()) << 2) +
163 // ((p.y() > p.z()) << 1) +
164 // (p.y() > -p.z());
165 //
166 // and the Q3C (or modified-Q3C) face number is just:
167 //
168 // int face = faceNumbers[index];
169 //
170 // Compare this to something like:
171 //
172 // int face;
173 // double a;
174 // if (p.x() > -p.y()) {
175 // if (p.x() > p.y()) {
176 // face = 1;
177 // a = p.x();
178 // } else {
179 // face = 2;
180 // a = p.y();
181 // }
182 // } else {
183 // if (p.x() > p.y()) {
184 // face = 4;
185 // a = -p.y();
186 // } else {
187 // face = 3;
188 // a = -p.x();
189 // }
190 // }
191 // if (p.z() > a) {
192 // face = 0;
193 // } else if (p.z() < -a) {
194 // face = 5;
195 // }
196 //
197 // In the latter case, the comparisons performed depend on the data, and the
198 // branches involved will be hard to predict for random input. The LUT-based
199 // approach allows comparisons to be performed in parallel without speculation
200 // by introducing some redundant computation. It replaces control dependencies
201 // (branching) with data dependencies.
202 //
203 // The second LUT, faceComponents, maps from a face number F (0-5) to a
204 // 4-element array A that contains the following quantities for a unit vector
205 // V belonging to F:
206 //
207 // A[0]: index of the component of V corresponding to the face u coordinate
208 // A[1]: index of the component of V corresponding to the face v coordinate
209 // A[2]: index of the component of V with maximum absolute value
210 // A[3]: unused (padding)
211 //
212 // Finally, the third LUT, faceConstants, maps from a face number F (0-5) to
213 // an array of doubles A that contains the following quantities for a unit
214 // vector V belonging to F:
215 //
216 // A[0]: scaling factor (±1) that, when multiplied by the component of V with
217 // index faceComponents[face][0], yields the u coordinate of V
218 // A[1]: scaling factor (±1) that, when multiplied by the component of V with
219 // index faceComponents[face][1], yields the v coordinate of V
220 // A[2]: value (±1) that has the same sign as the component of V with
221 // maximum absolute value
222 // A[3]: unused (padding)
223 
224 #if defined(NO_SIMD) || !defined(__x86_64__)
225 
226  int faceNumber(UnitVector3d const & v, uint8_t const (&faceNumbers)[64]) {
227  int index = ((v.x() > v.y()) << 5) +
228  ((v.x() > -v.y()) << 4) +
229  ((v.x() > v.z()) << 3) +
230  ((v.x() > -v.z()) << 2) +
231  ((v.y() > v.z()) << 1) +
232  (v.y() > -v.z());
233  return faceNumbers[index];
234  }
235 
236  UnitVector3d faceToSphere(int face,
237  double u,
238  double v,
239  uint8_t const (&faceComponents)[6][4],
240  double const (&faceConstants)[6][4])
241  {
242  double p[3];
243  double d = u * u + v * v;
244  double n = std::sqrt(1.0 + d);
245  p[faceComponents[face][0]] = (u * faceConstants[face][0]) / n;
246  p[faceComponents[face][1]] = (v * faceConstants[face][1]) / n;
247  p[faceComponents[face][2]] = faceConstants[face][2] / n;
248  return UnitVector3d::fromNormalized(p[0], p[1], p[2]);
249  }
250 
251  std::tuple<int32_t, int32_t> faceToGrid(int level, double u, double v) {
252  double const gridScale = GRID_SCALE[level];
253  double const stMax = ST_MAX[level];
254  double s = u * gridScale + gridScale;
255  double t = v * gridScale + gridScale;
256  s = std::max(std::min(s, stMax), 0.0);
257  t = std::max(std::min(t, stMax), 0.0);
258  return std::make_tuple(static_cast<int32_t>(s),
259  static_cast<int32_t>(t));
260  }
261 
262  std::tuple<double, double> gridToFace(int level, int32_t s, int32_t t) {
263  double const faceScale = FACE_SCALE[level];
264  return std::make_tuple(static_cast<double>(s) * faceScale - 1.0,
265  static_cast<double>(t) * faceScale - 1.0);
266  }
267 
268  std::tuple<double, double> atanApprox(double u, double v) {
269  return std::make_tuple(
270  u * (1.3333333333333333 - 0.3333333333333333 * std::fabs(u)),
271  v * (1.3333333333333333 - 0.3333333333333333 * std::fabs(v))
272  );
273  }
274 
275  std::tuple<double, double> atanApproxInverse(double u, double v) {
276  return std::make_tuple(
277  std::copysign(2.0 - std::sqrt(4.0 - 3.0 * std::fabs(u)), u),
278  std::copysign(2.0 - std::sqrt(4.0 - 3.0 * std::fabs(v)), v)
279  );
280  }
281 
282 #else
283 
284  int faceNumber(UnitVector3d const & v, uint8_t const (&faceNumbers)[64]) {
285  __m128d const m00 = _mm_set_pd(0.0, -0.0);
286  __m128d xx = _mm_set1_pd(v.x());
287  __m128d yy = _mm_set1_pd(v.y());
288  __m128d zz = _mm_set1_pd(v.z());
289  __m128d myy = _mm_xor_pd(yy, m00);
290  __m128d mzz = _mm_xor_pd(zz, m00);
291  int index = (_mm_movemask_pd(_mm_cmpgt_pd(xx, myy)) << 4) +
292  (_mm_movemask_pd(_mm_cmpgt_pd(xx, mzz)) << 2) +
293  _mm_movemask_pd(_mm_cmpgt_pd(yy, mzz));
294  return faceNumbers[index];
295  }
296 
297  UnitVector3d faceToSphere(int face,
298  __m128d uv,
299  uint8_t const (&faceComponents)[6][4],
300  double const (&faceConstants)[6][4])
301  {
302  double p[3];
303  __m128d norm = _mm_mul_pd(uv, uv);
304  norm = _mm_sqrt_sd(
305  _mm_setzero_pd(),
306  _mm_add_sd(
307  _mm_set_sd(1.0),
308  _mm_add_sd(norm, _mm_unpackhi_pd(norm, _mm_setzero_pd()))
309  )
310  );
311  __m128d w = _mm_set_sd(faceConstants[face][2]);
312  w = _mm_div_sd(w, norm);
313  uv = _mm_mul_pd(
314  _mm_div_pd(uv, _mm_shuffle_pd(norm, norm, 0)),
315  _mm_load_pd(&faceConstants[face][0])
316  );
317  _mm_store_sd(&p[faceComponents[face][0]], uv);
318  _mm_storeh_pd(&p[faceComponents[face][1]], uv);
319  _mm_store_sd(&p[faceComponents[face][2]], w);
320  return UnitVector3d::fromNormalized(p[0], p[1], p[2]);
321  }
322 
323  __m128i faceToGrid(int level, __m128d uv) {
324  __m128d const gridScale = _mm_set1_pd(GRID_SCALE[level]);
325  __m128d const stMax = _mm_set1_pd(ST_MAX[level]);
326  __m128d st = _mm_add_pd(_mm_mul_pd(uv, gridScale), gridScale);
327  st = _mm_min_pd(_mm_max_pd(st, _mm_setzero_pd()), stMax);
328  return _mm_shuffle_epi32(_mm_cvttpd_epi32(st), 0x98);
329  }
330 
331  __m128d gridToFace(int level, __m128i st) {
332  __m128d const faceScale = _mm_set1_pd(FACE_SCALE[level]);
333  __m128d xy = _mm_cvtepi32_pd(_mm_shuffle_epi32(st, 8));
334  return _mm_sub_pd(_mm_mul_pd(xy, faceScale), _mm_set1_pd(1.0));
335  }
336 
337  __m128d atanApprox(__m128d uv) {
338  __m128d abs_uv = _mm_andnot_pd(_mm_set_pd(-0.0, -0.0), uv);
339  return _mm_mul_pd(
340  uv,
341  _mm_sub_pd(
342  _mm_set1_pd(1.3333333333333333),
343  _mm_mul_pd(_mm_set1_pd(0.3333333333333333), abs_uv)
344  )
345  );
346  }
347 
348  __m128d atanApproxInverse(__m128d uv) {
349  __m128d signbits = _mm_and_pd(uv, _mm_set_pd(-0.0, -0.0));
350  __m128d abs_uv = _mm_andnot_pd(_mm_set_pd(-0.0, -0.0), uv);
351  __m128d tmp = _mm_sub_pd(_mm_set1_pd(4.0),
352  _mm_mul_pd(_mm_set1_pd(3.0), abs_uv));
353  uv = _mm_sub_pd(_mm_set1_pd(2.0), _mm_sqrt_pd(tmp));
354  return _mm_or_pd(signbits, uv);
355  }
356 
357 #endif
358 
359 } // unnamed namespace
360 }} // namespace lsst::sphgeom
361 
362 #endif // LSST_SPHGEOM_Q3CPIXELIZATIONIMPL_H_
T norm(const T &x)
Definition: Integrate.h:194
T make_tuple(T... args)
T min(T... args)
This file declares a class for representing unit vectors in ℝ³.
A base class for image defects.
T copysign(T... args)
T fabs(T... args)
T max(T... args)
solver_t * s
double w
Definition: CoaddPsf.cc:69
T sqrt(T... args)
static UnitVector3d fromNormalized(Vector3d const &v)
fromNormalized returns the unit vector equal to v, which is assumed to be normalized.
Definition: UnitVector3d.h:82