23#ifndef LSST_SPHGEOM_CURVE_H_
24#define LSST_SPHGEOM_CURVE_H_
79#if !defined(NO_SIMD) && defined(__x86_64__)
80 #include <x86intrin.h>
98inline uint8_t
log2(uint64_t
x) {
99 alignas(64)
static uint8_t
const PERFECT_HASH_TABLE[64] = {
100 0, 1, 2, 7, 3, 13, 8, 19, 4, 25, 14, 28, 9, 34, 20, 40,
101 5, 17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57,
102 63, 6, 12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56,
103 62, 11, 23, 32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58
105 uint64_t
const DE_BRUIJN_SEQUENCE = UINT64_C(0x0218a392cd3d5dbf);
122 return PERFECT_HASH_TABLE[(DE_BRUIJN_SEQUENCE *
x) >> 58];
127 alignas(32)
static uint8_t
const PERFECT_HASH_TABLE[32] = {
128 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
129 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
131 uint32_t
const DE_BRUIJN_SEQUENCE = UINT32_C(0x07c4acdd);
137 return PERFECT_HASH_TABLE[(DE_BRUIJN_SEQUENCE *
x) >> 27];
147#if defined(NO_SIMD) || !defined(__x86_64__)
153 b = (
b | (
b << 16)) & UINT64_C(0x0000ffff0000ffff);
154 a = (
a | (
a << 16)) & UINT64_C(0x0000ffff0000ffff);
155 b = (
b | (
b << 8)) & UINT64_C(0x00ff00ff00ff00ff);
156 a = (
a | (
a << 8)) & UINT64_C(0x00ff00ff00ff00ff);
157 b = (
b | (
b << 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
158 a = (
a | (
a << 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
159 b = (
b | (
b << 2)) & UINT64_C(0x3333333333333333);
160 a = (
a | (
a << 2)) & UINT64_C(0x3333333333333333);
161 b = (
b | (
b << 1)) & UINT64_C(0x5555555555555555);
162 a = (
a | (
a << 1)) & UINT64_C(0x5555555555555555);
167 xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 16)),
168 _mm_set1_epi32(0x0000ffff));
169 xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 8)),
170 _mm_set1_epi32(0x00ff00ff));
171 xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 4)),
172 _mm_set1_epi32(0x0f0f0f0f));
173 xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 2)),
174 _mm_set1_epi32(0x33333333));
175 xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 1)),
176 _mm_set1_epi32(0x55555555));
177 __m128i
y = _mm_unpackhi_epi64(xy, _mm_setzero_si128());
178 __m128i r = _mm_or_si128(xy, _mm_slli_epi64(
y, 1));
179 return static_cast<uint64_t
>(_mm_cvtsi128_si64(r));
183 __m128i xy = _mm_set_epi64x(
static_cast<int64_t
>(
y),
184 static_cast<int64_t
>(
x));
194#if defined(NO_SIMD) || !defined(__x86_64__)
196 uint64_t
x =
z & UINT64_C(0x5555555555555555);
197 uint64_t
y = (
z >> 1) & UINT64_C(0x5555555555555555);
198 x = (
x | (
x >> 1)) & UINT64_C(0x3333333333333333);
199 y = (
y | (
y >> 1)) & UINT64_C(0x3333333333333333);
200 x = (
x | (
x >> 2)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
201 y = (
y | (
y >> 2)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
202 x = (
x | (
x >> 4)) & UINT64_C(0x00ff00ff00ff00ff);
203 y = (
y | (
y >> 4)) & UINT64_C(0x00ff00ff00ff00ff);
204 x = (
x | (
x >> 8)) & UINT64_C(0x0000ffff0000ffff);
205 y = (
y | (
y >> 8)) & UINT64_C(0x0000ffff0000ffff);
207 static_cast<uint32_t
>(
y | (
y >> 16)));
210 inline __m128i mortonIndexInverseSimd(uint64_t
z) {
211 __m128i xy = _mm_set_epi64x(
static_cast<int64_t
>(
z >> 1),
212 static_cast<int64_t
>(
z));
213 xy = _mm_and_si128(xy, _mm_set1_epi32(0x55555555));
214 xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 1)),
215 _mm_set1_epi32(0x33333333));
216 xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 2)),
217 _mm_set1_epi32(0x0f0f0f0f));
218 xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 4)),
219 _mm_set1_epi32(0x00ff00ff));
220 xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 8)),
221 _mm_set1_epi32(0x0000ffff));
222 xy = _mm_or_si128(xy, _mm_srli_epi64(xy, 16));
227 __m128i xy = mortonIndexInverseSimd(
z);
228 uint64_t r = _mm_cvtsi128_si64(_mm_shuffle_epi32(xy, 8));
230 static_cast<uint32_t
>(r >> 32));
237 alignas(64)
static uint8_t
const HILBERT_LUT_3[256] = {
238 0x40, 0xc3, 0x01, 0x02, 0x04, 0x45, 0x87, 0x46,
239 0x8e, 0x8d, 0x4f, 0xcc, 0x08, 0x49, 0x8b, 0x4a,
240 0xfa, 0x3b, 0xf9, 0xb8, 0x7c, 0xff, 0x3d, 0x3e,
241 0xf6, 0x37, 0xf5, 0xb4, 0xb2, 0xb1, 0x73, 0xf0,
242 0x10, 0x51, 0x93, 0x52, 0xde, 0x1f, 0xdd, 0x9c,
243 0x54, 0xd7, 0x15, 0x16, 0x58, 0xdb, 0x19, 0x1a,
244 0x20, 0x61, 0xa3, 0x62, 0xee, 0x2f, 0xed, 0xac,
245 0x64, 0xe7, 0x25, 0x26, 0x68, 0xeb, 0x29, 0x2a,
246 0x00, 0x41, 0x83, 0x42, 0xce, 0x0f, 0xcd, 0x8c,
247 0x44, 0xc7, 0x05, 0x06, 0x48, 0xcb, 0x09, 0x0a,
248 0x50, 0xd3, 0x11, 0x12, 0x14, 0x55, 0x97, 0x56,
249 0x9e, 0x9d, 0x5f, 0xdc, 0x18, 0x59, 0x9b, 0x5a,
250 0xba, 0xb9, 0x7b, 0xf8, 0xb6, 0xb5, 0x77, 0xf4,
251 0x3c, 0x7d, 0xbf, 0x7e, 0xf2, 0x33, 0xf1, 0xb0,
252 0x60, 0xe3, 0x21, 0x22, 0x24, 0x65, 0xa7, 0x66,
253 0xae, 0xad, 0x6f, 0xec, 0x28, 0x69, 0xab, 0x6a,
254 0xaa, 0xa9, 0x6b, 0xe8, 0xa6, 0xa5, 0x67, 0xe4,
255 0x2c, 0x6d, 0xaf, 0x6e, 0xe2, 0x23, 0xe1, 0xa0,
256 0x9a, 0x99, 0x5b, 0xd8, 0x96, 0x95, 0x57, 0xd4,
257 0x1c, 0x5d, 0x9f, 0x5e, 0xd2, 0x13, 0xd1, 0x90,
258 0x70, 0xf3, 0x31, 0x32, 0x34, 0x75, 0xb7, 0x76,
259 0xbe, 0xbd, 0x7f, 0xfc, 0x38, 0x79, 0xbb, 0x7a,
260 0xca, 0x0b, 0xc9, 0x88, 0x4c, 0xcf, 0x0d, 0x0e,
261 0xc6, 0x07, 0xc5, 0x84, 0x82, 0x81, 0x43, 0xc0,
262 0xea, 0x2b, 0xe9, 0xa8, 0x6c, 0xef, 0x2d, 0x2e,
263 0xe6, 0x27, 0xe5, 0xa4, 0xa2, 0xa1, 0x63, 0xe0,
264 0x30, 0x71, 0xb3, 0x72, 0xfe, 0x3f, 0xfd, 0xbc,
265 0x74, 0xf7, 0x35, 0x36, 0x78, 0xfb, 0x39, 0x3a,
266 0xda, 0x1b, 0xd9, 0x98, 0x5c, 0xdf, 0x1d, 0x1e,
267 0xd6, 0x17, 0xd5, 0x94, 0x92, 0x91, 0x53, 0xd0,
268 0x8a, 0x89, 0x4b, 0xc8, 0x86, 0x85, 0x47, 0xc4,
269 0x0c, 0x4d, 0x8f, 0x4e, 0xc2, 0x03, 0xc1, 0x80
273 for (
m = 2 *
m;
m >= 6;) {
275 uint8_t j = HILBERT_LUT_3[i | ((
z >>
m) & 0x3f)];
276 h = (h << 6) | (j & 0x3f);
282 uint8_t j = HILBERT_LUT_3[i | ((
z << r) & 0x3f)];
283 h = (h <<
m) | ((j & 0x3f) >> r);
291 alignas(64)
static uint8_t
const HILBERT_INVERSE_LUT_3[256] = {
292 0x40, 0x02, 0x03, 0xc1, 0x04, 0x45, 0x47, 0x86,
293 0x0c, 0x4d, 0x4f, 0x8e, 0xcb, 0x89, 0x88, 0x4a,
294 0x20, 0x61, 0x63, 0xa2, 0x68, 0x2a, 0x2b, 0xe9,
295 0x6c, 0x2e, 0x2f, 0xed, 0xa7, 0xe6, 0xe4, 0x25,
296 0x30, 0x71, 0x73, 0xb2, 0x78, 0x3a, 0x3b, 0xf9,
297 0x7c, 0x3e, 0x3f, 0xfd, 0xb7, 0xf6, 0xf4, 0x35,
298 0xdf, 0x9d, 0x9c, 0x5e, 0x9b, 0xda, 0xd8, 0x19,
299 0x93, 0xd2, 0xd0, 0x11, 0x54, 0x16, 0x17, 0xd5,
300 0x00, 0x41, 0x43, 0x82, 0x48, 0x0a, 0x0b, 0xc9,
301 0x4c, 0x0e, 0x0f, 0xcd, 0x87, 0xc6, 0xc4, 0x05,
302 0x50, 0x12, 0x13, 0xd1, 0x14, 0x55, 0x57, 0x96,
303 0x1c, 0x5d, 0x5f, 0x9e, 0xdb, 0x99, 0x98, 0x5a,
304 0x70, 0x32, 0x33, 0xf1, 0x34, 0x75, 0x77, 0xb6,
305 0x3c, 0x7d, 0x7f, 0xbe, 0xfb, 0xb9, 0xb8, 0x7a,
306 0xaf, 0xee, 0xec, 0x2d, 0xe7, 0xa5, 0xa4, 0x66,
307 0xe3, 0xa1, 0xa0, 0x62, 0x28, 0x69, 0x6b, 0xaa,
308 0xff, 0xbd, 0xbc, 0x7e, 0xbb, 0xfa, 0xf8, 0x39,
309 0xb3, 0xf2, 0xf0, 0x31, 0x74, 0x36, 0x37, 0xf5,
310 0x9f, 0xde, 0xdc, 0x1d, 0xd7, 0x95, 0x94, 0x56,
311 0xd3, 0x91, 0x90, 0x52, 0x18, 0x59, 0x5b, 0x9a,
312 0x8f, 0xce, 0xcc, 0x0d, 0xc7, 0x85, 0x84, 0x46,
313 0xc3, 0x81, 0x80, 0x42, 0x08, 0x49, 0x4b, 0x8a,
314 0x60, 0x22, 0x23, 0xe1, 0x24, 0x65, 0x67, 0xa6,
315 0x2c, 0x6d, 0x6f, 0xae, 0xeb, 0xa9, 0xa8, 0x6a,
316 0xbf, 0xfe, 0xfc, 0x3d, 0xf7, 0xb5, 0xb4, 0x76,
317 0xf3, 0xb1, 0xb0, 0x72, 0x38, 0x79, 0x7b, 0xba,
318 0xef, 0xad, 0xac, 0x6e, 0xab, 0xea, 0xe8, 0x29,
319 0xa3, 0xe2, 0xe0, 0x21, 0x64, 0x26, 0x27, 0xe5,
320 0xcf, 0x8d, 0x8c, 0x4e, 0x8b, 0xca, 0xc8, 0x09,
321 0x83, 0xc2, 0xc0, 0x01, 0x44, 0x06, 0x07, 0xc5,
322 0x10, 0x51, 0x53, 0x92, 0x58, 0x1a, 0x1b, 0xd9,
323 0x5c, 0x1e, 0x1f, 0xdd, 0x97, 0xd6, 0xd4, 0x15
327 for (
m = 2 *
m;
m >= 6;) {
329 uint8_t j = HILBERT_INVERSE_LUT_3[i | ((h >>
m) & 0x3f)];
330 z = (
z << 6) | (j & 0x3f);
336 uint8_t j = HILBERT_INVERSE_LUT_3[i | ((h << r) & 0x3f)];
337 z = (
z <<
m) | ((j & 0x3f) >> r);
353#if !defined(NO_SIMD) && defined(__x86_64__)
365#if !defined(NO_SIMD) && defined(__x86_64__)
366 inline __m128i hilbertIndexInverseSimd(uint64_t h,
int m) {
std::tuple< uint32_t, uint32_t > mortonIndexInverse(uint64_t z)
mortonIndexInverse separates the even and odd bits of z.
std::tuple< uint32_t, uint32_t > hilbertIndexInverse(uint64_t h, int m)
hilbertIndexInverse returns the point (x, y) with Hilbert index h, where x and y are m bit integers.
uint64_t hilbertIndex(uint32_t x, uint32_t y, int m)
hilbertIndex returns the index of (x, y) in a 2-D Hilbert curve.
uint64_t mortonIndex(uint32_t x, uint32_t y)
mortonIndex interleaves the bits of x and y.
uint64_t hilbertToMorton(uint64_t h, int m)
hilbertToMorton converts the 2m-bit Hilbert index h to the corresponding Morton index.
uint64_t mortonToHilbert(uint64_t z, int m)
mortonToHilbert converts the 2m-bit Morton index z to the corresponding Hilbert index.