LSST Applications  21.0.0-172-gfb10e10a+18fedfabac,22.0.0+297cba6710,22.0.0+80564b0ff1,22.0.0+8d77f4f51a,22.0.0+a28f4c53b1,22.0.0+dcf3732eb2,22.0.1-1-g7d6de66+2a20fdde0d,22.0.1-1-g8e32f31+297cba6710,22.0.1-1-geca5380+7fa3b7d9b6,22.0.1-12-g44dc1dc+2a20fdde0d,22.0.1-15-g6a90155+515f58c32b,22.0.1-16-g9282f48+790f5f2caa,22.0.1-2-g92698f7+dcf3732eb2,22.0.1-2-ga9b0f51+7fa3b7d9b6,22.0.1-2-gd1925c9+bf4f0e694f,22.0.1-24-g1ad7a390+a9625a72a8,22.0.1-25-g5bf6245+3ad8ecd50b,22.0.1-25-gb120d7b+8b5510f75f,22.0.1-27-g97737f7+2a20fdde0d,22.0.1-32-gf62ce7b1+aa4237961e,22.0.1-4-g0b3f228+2a20fdde0d,22.0.1-4-g243d05b+871c1b8305,22.0.1-4-g3a563be+32dcf1063f,22.0.1-4-g44f2e3d+9e4ab0f4fa,22.0.1-42-gca6935d93+ba5e5ca3eb,22.0.1-5-g15c806e+85460ae5f3,22.0.1-5-g58711c4+611d128589,22.0.1-5-g75bb458+99c117b92f,22.0.1-6-g1c63a23+7fa3b7d9b6,22.0.1-6-g50866e6+84ff5a128b,22.0.1-6-g8d3140d+720564cf76,22.0.1-6-gd805d02+cc5644f571,22.0.1-8-ge5750ce+85460ae5f3,master-g6e05de7fdc+babf819c66,master-g99da0e417a+8d77f4f51a,w.2021.48
LSST Data Management Base Package
curve.h
Go to the documentation of this file.
1 /*
2  * LSST Data Management System
3  * Copyright 2016 AURA/LSST.
4  *
5  * This product includes software developed by the
6  * LSST Project (http://www.lsst.org/).
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the LSST License Statement and
19  * the GNU General Public License along with this program. If not,
20  * see <https://www.lsstcorp.org/LegalNotices/>.
21  */
22 
23 #ifndef LSST_SPHGEOM_CURVE_H_
24 #define LSST_SPHGEOM_CURVE_H_
25 
78 
79 #if !defined(NO_SIMD) && defined(__x86_64__)
80  #include <x86intrin.h>
81 #endif
82 #include <cstdint>
83 #include <tuple>
84 
85 
86 namespace lsst {
87 namespace sphgeom {
88 
98 inline uint8_t log2(uint64_t x) {
99  alignas(64) static uint8_t const PERFECT_HASH_TABLE[64] = {
100  0, 1, 2, 7, 3, 13, 8, 19, 4, 25, 14, 28, 9, 34, 20, 40,
101  5, 17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57,
102  63, 6, 12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56,
103  62, 11, 23, 32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58
104  };
105  uint64_t const DE_BRUIJN_SEQUENCE = UINT64_C(0x0218a392cd3d5dbf);
106  // First ensure that all bits below the MSB are set.
107  x |= (x >> 1);
108  x |= (x >> 2);
109  x |= (x >> 4);
110  x |= (x >> 8);
111  x |= (x >> 16);
112  x |= (x >> 32);
113  // Then, subtract them away.
114  x = x - (x >> 1);
115  // Multiplication by x is now a shift by the index i of the MSB.
116  //
117  // By definition, the value of the upper 6 bits of a 64-bit De Bruijn
118  // sequence left shifted by i is different for every value of i in
119  // [0, 64). It can therefore be used as an an index into a lookup table
120  // that recovers i. In other words, (DE_BRUIJN_SEQUENCE * x) >> 58 is a
121  // minimal perfect hash function for 64 bit powers of 2.
122  return PERFECT_HASH_TABLE[(DE_BRUIJN_SEQUENCE * x) >> 58];
123 }
124 
125 inline uint8_t log2(uint32_t x) {
126  // See https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
127  alignas(32) static uint8_t const PERFECT_HASH_TABLE[32] = {
128  0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
129  8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
130  };
131  uint32_t const DE_BRUIJN_SEQUENCE = UINT32_C(0x07c4acdd);
132  x |= (x >> 1);
133  x |= (x >> 2);
134  x |= (x >> 4);
135  x |= (x >> 8);
136  x |= (x >> 16);
137  return PERFECT_HASH_TABLE[(DE_BRUIJN_SEQUENCE * x) >> 27];
138 }
140 
147 #if defined(NO_SIMD) || !defined(__x86_64__)
148  inline uint64_t mortonIndex(uint32_t x, uint32_t y) {
149  // This is just a 64-bit extension of:
150  // http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
151  uint64_t b = y;
152  uint64_t a = x;
153  b = (b | (b << 16)) & UINT64_C(0x0000ffff0000ffff);
154  a = (a | (a << 16)) & UINT64_C(0x0000ffff0000ffff);
155  b = (b | (b << 8)) & UINT64_C(0x00ff00ff00ff00ff);
156  a = (a | (a << 8)) & UINT64_C(0x00ff00ff00ff00ff);
157  b = (b | (b << 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
158  a = (a | (a << 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
159  b = (b | (b << 2)) & UINT64_C(0x3333333333333333);
160  a = (a | (a << 2)) & UINT64_C(0x3333333333333333);
161  b = (b | (b << 1)) & UINT64_C(0x5555555555555555);
162  a = (a | (a << 1)) & UINT64_C(0x5555555555555555);
163  return a | (b << 1);
164  }
165 #else
166  inline uint64_t mortonIndex(__m128i xy) {
167  xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 16)),
168  _mm_set1_epi32(0x0000ffff));
169  xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 8)),
170  _mm_set1_epi32(0x00ff00ff));
171  xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 4)),
172  _mm_set1_epi32(0x0f0f0f0f));
173  xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 2)),
174  _mm_set1_epi32(0x33333333));
175  xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 1)),
176  _mm_set1_epi32(0x55555555));
177  __m128i y = _mm_unpackhi_epi64(xy, _mm_setzero_si128());
178  __m128i r = _mm_or_si128(xy, _mm_slli_epi64(y, 1));
179  return static_cast<uint64_t>(_mm_cvtsi128_si64(r));
180  }
181 
182  inline uint64_t mortonIndex(uint32_t x, uint32_t y) {
183  __m128i xy = _mm_set_epi64x(static_cast<int64_t>(y),
184  static_cast<int64_t>(x));
185  return mortonIndex(xy);
186  }
187 #endif
188 
194 #if defined(NO_SIMD) || !defined(__x86_64__)
196  uint64_t x = z & UINT64_C(0x5555555555555555);
197  uint64_t y = (z >> 1) & UINT64_C(0x5555555555555555);
198  x = (x | (x >> 1)) & UINT64_C(0x3333333333333333);
199  y = (y | (y >> 1)) & UINT64_C(0x3333333333333333);
200  x = (x | (x >> 2)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
201  y = (y | (y >> 2)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
202  x = (x | (x >> 4)) & UINT64_C(0x00ff00ff00ff00ff);
203  y = (y | (y >> 4)) & UINT64_C(0x00ff00ff00ff00ff);
204  x = (x | (x >> 8)) & UINT64_C(0x0000ffff0000ffff);
205  y = (y | (y >> 8)) & UINT64_C(0x0000ffff0000ffff);
206  return std::make_tuple(static_cast<uint32_t>(x | (x >> 16)),
207  static_cast<uint32_t>(y | (y >> 16)));
208  }
209 #else
210  inline __m128i mortonIndexInverseSimd(uint64_t z) {
211  __m128i xy = _mm_set_epi64x(static_cast<int64_t>(z >> 1),
212  static_cast<int64_t>(z));
213  xy = _mm_and_si128(xy, _mm_set1_epi32(0x55555555));
214  xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 1)),
215  _mm_set1_epi32(0x33333333));
216  xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 2)),
217  _mm_set1_epi32(0x0f0f0f0f));
218  xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 4)),
219  _mm_set1_epi32(0x00ff00ff));
220  xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 8)),
221  _mm_set1_epi32(0x0000ffff));
222  xy = _mm_or_si128(xy, _mm_srli_epi64(xy, 16));
223  return xy;
224  }
225 
227  __m128i xy = mortonIndexInverseSimd(z);
228  uint64_t r = _mm_cvtsi128_si64(_mm_shuffle_epi32(xy, 8));
229  return std::make_tuple(static_cast<uint32_t>(r & 0xffffffff),
230  static_cast<uint32_t>(r >> 32));
231  }
232 #endif
233 
236 inline uint64_t mortonToHilbert(uint64_t z, int m) {
237  alignas(64) static uint8_t const HILBERT_LUT_3[256] = {
238  0x40, 0xc3, 0x01, 0x02, 0x04, 0x45, 0x87, 0x46,
239  0x8e, 0x8d, 0x4f, 0xcc, 0x08, 0x49, 0x8b, 0x4a,
240  0xfa, 0x3b, 0xf9, 0xb8, 0x7c, 0xff, 0x3d, 0x3e,
241  0xf6, 0x37, 0xf5, 0xb4, 0xb2, 0xb1, 0x73, 0xf0,
242  0x10, 0x51, 0x93, 0x52, 0xde, 0x1f, 0xdd, 0x9c,
243  0x54, 0xd7, 0x15, 0x16, 0x58, 0xdb, 0x19, 0x1a,
244  0x20, 0x61, 0xa3, 0x62, 0xee, 0x2f, 0xed, 0xac,
245  0x64, 0xe7, 0x25, 0x26, 0x68, 0xeb, 0x29, 0x2a,
246  0x00, 0x41, 0x83, 0x42, 0xce, 0x0f, 0xcd, 0x8c,
247  0x44, 0xc7, 0x05, 0x06, 0x48, 0xcb, 0x09, 0x0a,
248  0x50, 0xd3, 0x11, 0x12, 0x14, 0x55, 0x97, 0x56,
249  0x9e, 0x9d, 0x5f, 0xdc, 0x18, 0x59, 0x9b, 0x5a,
250  0xba, 0xb9, 0x7b, 0xf8, 0xb6, 0xb5, 0x77, 0xf4,
251  0x3c, 0x7d, 0xbf, 0x7e, 0xf2, 0x33, 0xf1, 0xb0,
252  0x60, 0xe3, 0x21, 0x22, 0x24, 0x65, 0xa7, 0x66,
253  0xae, 0xad, 0x6f, 0xec, 0x28, 0x69, 0xab, 0x6a,
254  0xaa, 0xa9, 0x6b, 0xe8, 0xa6, 0xa5, 0x67, 0xe4,
255  0x2c, 0x6d, 0xaf, 0x6e, 0xe2, 0x23, 0xe1, 0xa0,
256  0x9a, 0x99, 0x5b, 0xd8, 0x96, 0x95, 0x57, 0xd4,
257  0x1c, 0x5d, 0x9f, 0x5e, 0xd2, 0x13, 0xd1, 0x90,
258  0x70, 0xf3, 0x31, 0x32, 0x34, 0x75, 0xb7, 0x76,
259  0xbe, 0xbd, 0x7f, 0xfc, 0x38, 0x79, 0xbb, 0x7a,
260  0xca, 0x0b, 0xc9, 0x88, 0x4c, 0xcf, 0x0d, 0x0e,
261  0xc6, 0x07, 0xc5, 0x84, 0x82, 0x81, 0x43, 0xc0,
262  0xea, 0x2b, 0xe9, 0xa8, 0x6c, 0xef, 0x2d, 0x2e,
263  0xe6, 0x27, 0xe5, 0xa4, 0xa2, 0xa1, 0x63, 0xe0,
264  0x30, 0x71, 0xb3, 0x72, 0xfe, 0x3f, 0xfd, 0xbc,
265  0x74, 0xf7, 0x35, 0x36, 0x78, 0xfb, 0x39, 0x3a,
266  0xda, 0x1b, 0xd9, 0x98, 0x5c, 0xdf, 0x1d, 0x1e,
267  0xd6, 0x17, 0xd5, 0x94, 0x92, 0x91, 0x53, 0xd0,
268  0x8a, 0x89, 0x4b, 0xc8, 0x86, 0x85, 0x47, 0xc4,
269  0x0c, 0x4d, 0x8f, 0x4e, 0xc2, 0x03, 0xc1, 0x80
270  };
271  uint64_t h = 0;
272  uint64_t i = 0;
273  for (m = 2 * m; m >= 6;) {
274  m -= 6;
275  uint8_t j = HILBERT_LUT_3[i | ((z >> m) & 0x3f)];
276  h = (h << 6) | (j & 0x3f);
277  i = j & 0xc0;
278  }
279  if (m != 0) {
280  // m = 2 or 4
281  int r = 6 - m;
282  uint8_t j = HILBERT_LUT_3[i | ((z << r) & 0x3f)];
283  h = (h << m) | ((j & 0x3f) >> r);
284  }
285  return h;
286 }
287 
290 inline uint64_t hilbertToMorton(uint64_t h, int m) {
291  alignas(64) static uint8_t const HILBERT_INVERSE_LUT_3[256] = {
292  0x40, 0x02, 0x03, 0xc1, 0x04, 0x45, 0x47, 0x86,
293  0x0c, 0x4d, 0x4f, 0x8e, 0xcb, 0x89, 0x88, 0x4a,
294  0x20, 0x61, 0x63, 0xa2, 0x68, 0x2a, 0x2b, 0xe9,
295  0x6c, 0x2e, 0x2f, 0xed, 0xa7, 0xe6, 0xe4, 0x25,
296  0x30, 0x71, 0x73, 0xb2, 0x78, 0x3a, 0x3b, 0xf9,
297  0x7c, 0x3e, 0x3f, 0xfd, 0xb7, 0xf6, 0xf4, 0x35,
298  0xdf, 0x9d, 0x9c, 0x5e, 0x9b, 0xda, 0xd8, 0x19,
299  0x93, 0xd2, 0xd0, 0x11, 0x54, 0x16, 0x17, 0xd5,
300  0x00, 0x41, 0x43, 0x82, 0x48, 0x0a, 0x0b, 0xc9,
301  0x4c, 0x0e, 0x0f, 0xcd, 0x87, 0xc6, 0xc4, 0x05,
302  0x50, 0x12, 0x13, 0xd1, 0x14, 0x55, 0x57, 0x96,
303  0x1c, 0x5d, 0x5f, 0x9e, 0xdb, 0x99, 0x98, 0x5a,
304  0x70, 0x32, 0x33, 0xf1, 0x34, 0x75, 0x77, 0xb6,
305  0x3c, 0x7d, 0x7f, 0xbe, 0xfb, 0xb9, 0xb8, 0x7a,
306  0xaf, 0xee, 0xec, 0x2d, 0xe7, 0xa5, 0xa4, 0x66,
307  0xe3, 0xa1, 0xa0, 0x62, 0x28, 0x69, 0x6b, 0xaa,
308  0xff, 0xbd, 0xbc, 0x7e, 0xbb, 0xfa, 0xf8, 0x39,
309  0xb3, 0xf2, 0xf0, 0x31, 0x74, 0x36, 0x37, 0xf5,
310  0x9f, 0xde, 0xdc, 0x1d, 0xd7, 0x95, 0x94, 0x56,
311  0xd3, 0x91, 0x90, 0x52, 0x18, 0x59, 0x5b, 0x9a,
312  0x8f, 0xce, 0xcc, 0x0d, 0xc7, 0x85, 0x84, 0x46,
313  0xc3, 0x81, 0x80, 0x42, 0x08, 0x49, 0x4b, 0x8a,
314  0x60, 0x22, 0x23, 0xe1, 0x24, 0x65, 0x67, 0xa6,
315  0x2c, 0x6d, 0x6f, 0xae, 0xeb, 0xa9, 0xa8, 0x6a,
316  0xbf, 0xfe, 0xfc, 0x3d, 0xf7, 0xb5, 0xb4, 0x76,
317  0xf3, 0xb1, 0xb0, 0x72, 0x38, 0x79, 0x7b, 0xba,
318  0xef, 0xad, 0xac, 0x6e, 0xab, 0xea, 0xe8, 0x29,
319  0xa3, 0xe2, 0xe0, 0x21, 0x64, 0x26, 0x27, 0xe5,
320  0xcf, 0x8d, 0x8c, 0x4e, 0x8b, 0xca, 0xc8, 0x09,
321  0x83, 0xc2, 0xc0, 0x01, 0x44, 0x06, 0x07, 0xc5,
322  0x10, 0x51, 0x53, 0x92, 0x58, 0x1a, 0x1b, 0xd9,
323  0x5c, 0x1e, 0x1f, 0xdd, 0x97, 0xd6, 0xd4, 0x15
324  };
325  uint64_t z = 0;
326  uint64_t i = 0;
327  for (m = 2 * m; m >= 6;) {
328  m -= 6;
329  uint8_t j = HILBERT_INVERSE_LUT_3[i | ((h >> m) & 0x3f)];
330  z = (z << 6) | (j & 0x3f);
331  i = j & 0xc0;
332  }
333  if (m != 0) {
334  // m = 2 or 4
335  int r = 6 - m;
336  uint8_t j = HILBERT_INVERSE_LUT_3[i | ((h << r) & 0x3f)];
337  z = (z << m) | ((j & 0x3f) >> r);
338  }
339  return z;
340 }
341 
349 inline uint64_t hilbertIndex(uint32_t x, uint32_t y, int m) {
350  return mortonToHilbert(mortonIndex(x, y), m);
351 }
352 
353 #if !defined(NO_SIMD) && defined(__x86_64__)
354  inline uint64_t hilbertIndex(__m128i xy, int m) {
355  return mortonToHilbert(mortonIndex(xy), m);
356  }
357 #endif
358 
363 }
364 
365 #if !defined(NO_SIMD) && defined(__x86_64__)
366  inline __m128i hilbertIndexInverseSimd(uint64_t h, int m) {
367  return mortonIndexInverseSimd(hilbertToMorton(h, m));
368  }
369 #endif
370 
371 }} // namespace lsst::sphgeom
372 
373 #endif // LSST_SPHGEOM_CURVE_H_
double x
double z
Definition: Match.cc:44
int y
Definition: SpanSet.cc:48
int m
Definition: SpanSet.cc:48
table::Key< int > b
table::Key< int > a
T make_tuple(T... args)
std::tuple< uint32_t, uint32_t > hilbertIndexInverse(uint64_t h, int m)
hilbertIndexInverse returns the point (x, y) with Hilbert index h, where x and y are m bit integers.
Definition: curve.h:361
uint8_t log2(uint64_t x)
Definition: curve.h:98
uint64_t hilbertIndex(uint32_t x, uint32_t y, int m)
hilbertIndex returns the index of (x, y) in a 2-D Hilbert curve.
Definition: curve.h:349
uint64_t mortonIndex(uint32_t x, uint32_t y)
mortonIndex interleaves the bits of x and y.
Definition: curve.h:148
uint64_t hilbertToMorton(uint64_t h, int m)
hilbertToMorton converts the 2m-bit Hilbert index h to the corresponding Morton index.
Definition: curve.h:290
uint64_t mortonToHilbert(uint64_t z, int m)
mortonToHilbert converts the 2m-bit Morton index z to the corresponding Hilbert index.
Definition: curve.h:236
std::tuple< uint32_t, uint32_t > mortonIndexInverse(uint64_t z)
mortonIndexInverse separates the even and odd bits of z.
Definition: curve.h:195
A base class for image defects.