LSST Applications g070148d5b3+33e5256705,g0d53e28543+25c8b88941,g0da5cf3356+2dd1178308,g1081da9e2a+62d12e78cb,g17e5ecfddb+7e422d6136,g1c76d35bf8+ede3a706f7,g295839609d+225697d880,g2e2c1a68ba+cc1f6f037e,g2ffcdf413f+853cd4dcde,g38293774b4+62d12e78cb,g3b44f30a73+d953f1ac34,g48ccf36440+885b902d19,g4b2f1765b6+7dedbde6d2,g5320a0a9f6+0c5d6105b6,g56b687f8c9+ede3a706f7,g5c4744a4d9+ef6ac23297,g5ffd174ac0+0c5d6105b6,g6075d09f38+66af417445,g667d525e37+2ced63db88,g670421136f+2ced63db88,g71f27ac40c+2ced63db88,g774830318a+463cbe8d1f,g7876bc68e5+1d137996f1,g7985c39107+62d12e78cb,g7fdac2220c+0fd8241c05,g96f01af41f+368e6903a7,g9ca82378b8+2ced63db88,g9d27549199+ef6ac23297,gabe93b2c52+e3573e3735,gb065e2a02a+3dfbe639da,gbc3249ced9+0c5d6105b6,gbec6a3398f+0c5d6105b6,gc9534b9d65+35b9f25267,gd01420fc67+0c5d6105b6,geee7ff78d7+a14128c129,gf63283c776+ede3a706f7,gfed783d017+0c5d6105b6,w.2022.47
LSST Data Management Base Package
Loading...
Searching...
No Matches
curve.h
Go to the documentation of this file.
1/*
2 * LSST Data Management System
3 * Copyright 2016 AURA/LSST.
4 *
5 * This product includes software developed by the
6 * LSST Project (http://www.lsst.org/).
7 *
8 * This program is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the LSST License Statement and
19 * the GNU General Public License along with this program. If not,
20 * see <https://www.lsstcorp.org/LegalNotices/>.
21 */
22
23#ifndef LSST_SPHGEOM_CURVE_H_
24#define LSST_SPHGEOM_CURVE_H_
25
78
79#if !defined(NO_SIMD) && defined(__x86_64__)
80 #include <x86intrin.h>
81#endif
82#include <cstdint>
83#include <tuple>
84
85
86namespace lsst {
87namespace sphgeom {
88
98inline uint8_t log2(uint64_t x) {
99 alignas(64) static uint8_t const PERFECT_HASH_TABLE[64] = {
100 0, 1, 2, 7, 3, 13, 8, 19, 4, 25, 14, 28, 9, 34, 20, 40,
101 5, 17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57,
102 63, 6, 12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56,
103 62, 11, 23, 32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58
104 };
105 uint64_t const DE_BRUIJN_SEQUENCE = UINT64_C(0x0218a392cd3d5dbf);
106 // First ensure that all bits below the MSB are set.
107 x |= (x >> 1);
108 x |= (x >> 2);
109 x |= (x >> 4);
110 x |= (x >> 8);
111 x |= (x >> 16);
112 x |= (x >> 32);
113 // Then, subtract them away.
114 x = x - (x >> 1);
115 // Multiplication by x is now a shift by the index i of the MSB.
116 //
117 // By definition, the value of the upper 6 bits of a 64-bit De Bruijn
118 // sequence left shifted by i is different for every value of i in
119 // [0, 64). It can therefore be used as an an index into a lookup table
120 // that recovers i. In other words, (DE_BRUIJN_SEQUENCE * x) >> 58 is a
121 // minimal perfect hash function for 64 bit powers of 2.
122 return PERFECT_HASH_TABLE[(DE_BRUIJN_SEQUENCE * x) >> 58];
123}
124
125inline uint8_t log2(uint32_t x) {
126 // See https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
127 alignas(32) static uint8_t const PERFECT_HASH_TABLE[32] = {
128 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
129 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
130 };
131 uint32_t const DE_BRUIJN_SEQUENCE = UINT32_C(0x07c4acdd);
132 x |= (x >> 1);
133 x |= (x >> 2);
134 x |= (x >> 4);
135 x |= (x >> 8);
136 x |= (x >> 16);
137 return PERFECT_HASH_TABLE[(DE_BRUIJN_SEQUENCE * x) >> 27];
138}
140
147#if defined(NO_SIMD) || !defined(__x86_64__)
148 inline uint64_t mortonIndex(uint32_t x, uint32_t y) {
149 // This is just a 64-bit extension of:
150 // http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
151 uint64_t b = y;
152 uint64_t a = x;
153 b = (b | (b << 16)) & UINT64_C(0x0000ffff0000ffff);
154 a = (a | (a << 16)) & UINT64_C(0x0000ffff0000ffff);
155 b = (b | (b << 8)) & UINT64_C(0x00ff00ff00ff00ff);
156 a = (a | (a << 8)) & UINT64_C(0x00ff00ff00ff00ff);
157 b = (b | (b << 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
158 a = (a | (a << 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
159 b = (b | (b << 2)) & UINT64_C(0x3333333333333333);
160 a = (a | (a << 2)) & UINT64_C(0x3333333333333333);
161 b = (b | (b << 1)) & UINT64_C(0x5555555555555555);
162 a = (a | (a << 1)) & UINT64_C(0x5555555555555555);
163 return a | (b << 1);
164 }
165#else
166 inline uint64_t mortonIndex(__m128i xy) {
167 xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 16)),
168 _mm_set1_epi32(0x0000ffff));
169 xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 8)),
170 _mm_set1_epi32(0x00ff00ff));
171 xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 4)),
172 _mm_set1_epi32(0x0f0f0f0f));
173 xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 2)),
174 _mm_set1_epi32(0x33333333));
175 xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 1)),
176 _mm_set1_epi32(0x55555555));
177 __m128i y = _mm_unpackhi_epi64(xy, _mm_setzero_si128());
178 __m128i r = _mm_or_si128(xy, _mm_slli_epi64(y, 1));
179 return static_cast<uint64_t>(_mm_cvtsi128_si64(r));
180 }
181
182 inline uint64_t mortonIndex(uint32_t x, uint32_t y) {
183 __m128i xy = _mm_set_epi64x(static_cast<int64_t>(y),
184 static_cast<int64_t>(x));
185 return mortonIndex(xy);
186 }
187#endif
188
194#if defined(NO_SIMD) || !defined(__x86_64__)
196 uint64_t x = z & UINT64_C(0x5555555555555555);
197 uint64_t y = (z >> 1) & UINT64_C(0x5555555555555555);
198 x = (x | (x >> 1)) & UINT64_C(0x3333333333333333);
199 y = (y | (y >> 1)) & UINT64_C(0x3333333333333333);
200 x = (x | (x >> 2)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
201 y = (y | (y >> 2)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
202 x = (x | (x >> 4)) & UINT64_C(0x00ff00ff00ff00ff);
203 y = (y | (y >> 4)) & UINT64_C(0x00ff00ff00ff00ff);
204 x = (x | (x >> 8)) & UINT64_C(0x0000ffff0000ffff);
205 y = (y | (y >> 8)) & UINT64_C(0x0000ffff0000ffff);
206 return std::make_tuple(static_cast<uint32_t>(x | (x >> 16)),
207 static_cast<uint32_t>(y | (y >> 16)));
208 }
209#else
210 inline __m128i mortonIndexInverseSimd(uint64_t z) {
211 __m128i xy = _mm_set_epi64x(static_cast<int64_t>(z >> 1),
212 static_cast<int64_t>(z));
213 xy = _mm_and_si128(xy, _mm_set1_epi32(0x55555555));
214 xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 1)),
215 _mm_set1_epi32(0x33333333));
216 xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 2)),
217 _mm_set1_epi32(0x0f0f0f0f));
218 xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 4)),
219 _mm_set1_epi32(0x00ff00ff));
220 xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 8)),
221 _mm_set1_epi32(0x0000ffff));
222 xy = _mm_or_si128(xy, _mm_srli_epi64(xy, 16));
223 return xy;
224 }
225
227 __m128i xy = mortonIndexInverseSimd(z);
228 uint64_t r = _mm_cvtsi128_si64(_mm_shuffle_epi32(xy, 8));
229 return std::make_tuple(static_cast<uint32_t>(r & 0xffffffff),
230 static_cast<uint32_t>(r >> 32));
231 }
232#endif
233
236inline uint64_t mortonToHilbert(uint64_t z, int m) {
237 alignas(64) static uint8_t const HILBERT_LUT_3[256] = {
238 0x40, 0xc3, 0x01, 0x02, 0x04, 0x45, 0x87, 0x46,
239 0x8e, 0x8d, 0x4f, 0xcc, 0x08, 0x49, 0x8b, 0x4a,
240 0xfa, 0x3b, 0xf9, 0xb8, 0x7c, 0xff, 0x3d, 0x3e,
241 0xf6, 0x37, 0xf5, 0xb4, 0xb2, 0xb1, 0x73, 0xf0,
242 0x10, 0x51, 0x93, 0x52, 0xde, 0x1f, 0xdd, 0x9c,
243 0x54, 0xd7, 0x15, 0x16, 0x58, 0xdb, 0x19, 0x1a,
244 0x20, 0x61, 0xa3, 0x62, 0xee, 0x2f, 0xed, 0xac,
245 0x64, 0xe7, 0x25, 0x26, 0x68, 0xeb, 0x29, 0x2a,
246 0x00, 0x41, 0x83, 0x42, 0xce, 0x0f, 0xcd, 0x8c,
247 0x44, 0xc7, 0x05, 0x06, 0x48, 0xcb, 0x09, 0x0a,
248 0x50, 0xd3, 0x11, 0x12, 0x14, 0x55, 0x97, 0x56,
249 0x9e, 0x9d, 0x5f, 0xdc, 0x18, 0x59, 0x9b, 0x5a,
250 0xba, 0xb9, 0x7b, 0xf8, 0xb6, 0xb5, 0x77, 0xf4,
251 0x3c, 0x7d, 0xbf, 0x7e, 0xf2, 0x33, 0xf1, 0xb0,
252 0x60, 0xe3, 0x21, 0x22, 0x24, 0x65, 0xa7, 0x66,
253 0xae, 0xad, 0x6f, 0xec, 0x28, 0x69, 0xab, 0x6a,
254 0xaa, 0xa9, 0x6b, 0xe8, 0xa6, 0xa5, 0x67, 0xe4,
255 0x2c, 0x6d, 0xaf, 0x6e, 0xe2, 0x23, 0xe1, 0xa0,
256 0x9a, 0x99, 0x5b, 0xd8, 0x96, 0x95, 0x57, 0xd4,
257 0x1c, 0x5d, 0x9f, 0x5e, 0xd2, 0x13, 0xd1, 0x90,
258 0x70, 0xf3, 0x31, 0x32, 0x34, 0x75, 0xb7, 0x76,
259 0xbe, 0xbd, 0x7f, 0xfc, 0x38, 0x79, 0xbb, 0x7a,
260 0xca, 0x0b, 0xc9, 0x88, 0x4c, 0xcf, 0x0d, 0x0e,
261 0xc6, 0x07, 0xc5, 0x84, 0x82, 0x81, 0x43, 0xc0,
262 0xea, 0x2b, 0xe9, 0xa8, 0x6c, 0xef, 0x2d, 0x2e,
263 0xe6, 0x27, 0xe5, 0xa4, 0xa2, 0xa1, 0x63, 0xe0,
264 0x30, 0x71, 0xb3, 0x72, 0xfe, 0x3f, 0xfd, 0xbc,
265 0x74, 0xf7, 0x35, 0x36, 0x78, 0xfb, 0x39, 0x3a,
266 0xda, 0x1b, 0xd9, 0x98, 0x5c, 0xdf, 0x1d, 0x1e,
267 0xd6, 0x17, 0xd5, 0x94, 0x92, 0x91, 0x53, 0xd0,
268 0x8a, 0x89, 0x4b, 0xc8, 0x86, 0x85, 0x47, 0xc4,
269 0x0c, 0x4d, 0x8f, 0x4e, 0xc2, 0x03, 0xc1, 0x80
270 };
271 uint64_t h = 0;
272 uint64_t i = 0;
273 for (m = 2 * m; m >= 6;) {
274 m -= 6;
275 uint8_t j = HILBERT_LUT_3[i | ((z >> m) & 0x3f)];
276 h = (h << 6) | (j & 0x3f);
277 i = j & 0xc0;
278 }
279 if (m != 0) {
280 // m = 2 or 4
281 int r = 6 - m;
282 uint8_t j = HILBERT_LUT_3[i | ((z << r) & 0x3f)];
283 h = (h << m) | ((j & 0x3f) >> r);
284 }
285 return h;
286}
287
290inline uint64_t hilbertToMorton(uint64_t h, int m) {
291 alignas(64) static uint8_t const HILBERT_INVERSE_LUT_3[256] = {
292 0x40, 0x02, 0x03, 0xc1, 0x04, 0x45, 0x47, 0x86,
293 0x0c, 0x4d, 0x4f, 0x8e, 0xcb, 0x89, 0x88, 0x4a,
294 0x20, 0x61, 0x63, 0xa2, 0x68, 0x2a, 0x2b, 0xe9,
295 0x6c, 0x2e, 0x2f, 0xed, 0xa7, 0xe6, 0xe4, 0x25,
296 0x30, 0x71, 0x73, 0xb2, 0x78, 0x3a, 0x3b, 0xf9,
297 0x7c, 0x3e, 0x3f, 0xfd, 0xb7, 0xf6, 0xf4, 0x35,
298 0xdf, 0x9d, 0x9c, 0x5e, 0x9b, 0xda, 0xd8, 0x19,
299 0x93, 0xd2, 0xd0, 0x11, 0x54, 0x16, 0x17, 0xd5,
300 0x00, 0x41, 0x43, 0x82, 0x48, 0x0a, 0x0b, 0xc9,
301 0x4c, 0x0e, 0x0f, 0xcd, 0x87, 0xc6, 0xc4, 0x05,
302 0x50, 0x12, 0x13, 0xd1, 0x14, 0x55, 0x57, 0x96,
303 0x1c, 0x5d, 0x5f, 0x9e, 0xdb, 0x99, 0x98, 0x5a,
304 0x70, 0x32, 0x33, 0xf1, 0x34, 0x75, 0x77, 0xb6,
305 0x3c, 0x7d, 0x7f, 0xbe, 0xfb, 0xb9, 0xb8, 0x7a,
306 0xaf, 0xee, 0xec, 0x2d, 0xe7, 0xa5, 0xa4, 0x66,
307 0xe3, 0xa1, 0xa0, 0x62, 0x28, 0x69, 0x6b, 0xaa,
308 0xff, 0xbd, 0xbc, 0x7e, 0xbb, 0xfa, 0xf8, 0x39,
309 0xb3, 0xf2, 0xf0, 0x31, 0x74, 0x36, 0x37, 0xf5,
310 0x9f, 0xde, 0xdc, 0x1d, 0xd7, 0x95, 0x94, 0x56,
311 0xd3, 0x91, 0x90, 0x52, 0x18, 0x59, 0x5b, 0x9a,
312 0x8f, 0xce, 0xcc, 0x0d, 0xc7, 0x85, 0x84, 0x46,
313 0xc3, 0x81, 0x80, 0x42, 0x08, 0x49, 0x4b, 0x8a,
314 0x60, 0x22, 0x23, 0xe1, 0x24, 0x65, 0x67, 0xa6,
315 0x2c, 0x6d, 0x6f, 0xae, 0xeb, 0xa9, 0xa8, 0x6a,
316 0xbf, 0xfe, 0xfc, 0x3d, 0xf7, 0xb5, 0xb4, 0x76,
317 0xf3, 0xb1, 0xb0, 0x72, 0x38, 0x79, 0x7b, 0xba,
318 0xef, 0xad, 0xac, 0x6e, 0xab, 0xea, 0xe8, 0x29,
319 0xa3, 0xe2, 0xe0, 0x21, 0x64, 0x26, 0x27, 0xe5,
320 0xcf, 0x8d, 0x8c, 0x4e, 0x8b, 0xca, 0xc8, 0x09,
321 0x83, 0xc2, 0xc0, 0x01, 0x44, 0x06, 0x07, 0xc5,
322 0x10, 0x51, 0x53, 0x92, 0x58, 0x1a, 0x1b, 0xd9,
323 0x5c, 0x1e, 0x1f, 0xdd, 0x97, 0xd6, 0xd4, 0x15
324 };
325 uint64_t z = 0;
326 uint64_t i = 0;
327 for (m = 2 * m; m >= 6;) {
328 m -= 6;
329 uint8_t j = HILBERT_INVERSE_LUT_3[i | ((h >> m) & 0x3f)];
330 z = (z << 6) | (j & 0x3f);
331 i = j & 0xc0;
332 }
333 if (m != 0) {
334 // m = 2 or 4
335 int r = 6 - m;
336 uint8_t j = HILBERT_INVERSE_LUT_3[i | ((h << r) & 0x3f)];
337 z = (z << m) | ((j & 0x3f) >> r);
338 }
339 return z;
340}
341
349inline uint64_t hilbertIndex(uint32_t x, uint32_t y, int m) {
350 return mortonToHilbert(mortonIndex(x, y), m);
351}
352
353#if !defined(NO_SIMD) && defined(__x86_64__)
354 inline uint64_t hilbertIndex(__m128i xy, int m) {
355 return mortonToHilbert(mortonIndex(xy), m);
356 }
357#endif
358
363}
364
365#if !defined(NO_SIMD) && defined(__x86_64__)
366 inline __m128i hilbertIndexInverseSimd(uint64_t h, int m) {
367 return mortonIndexInverseSimd(hilbertToMorton(h, m));
368 }
369#endif
370
371}} // namespace lsst::sphgeom
372
373#endif // LSST_SPHGEOM_CURVE_H_
double x
double z
Definition: Match.cc:44
int y
Definition: SpanSet.cc:48
int m
Definition: SpanSet.cc:48
table::Key< int > b
table::Key< int > a
T make_tuple(T... args)
std::tuple< uint32_t, uint32_t > mortonIndexInverse(uint64_t z)
mortonIndexInverse separates the even and odd bits of z.
Definition: curve.h:195
std::tuple< uint32_t, uint32_t > hilbertIndexInverse(uint64_t h, int m)
hilbertIndexInverse returns the point (x, y) with Hilbert index h, where x and y are m bit integers.
Definition: curve.h:361
uint8_t log2(uint64_t x)
Definition: curve.h:98
uint64_t hilbertIndex(uint32_t x, uint32_t y, int m)
hilbertIndex returns the index of (x, y) in a 2-D Hilbert curve.
Definition: curve.h:349
uint64_t mortonIndex(uint32_t x, uint32_t y)
mortonIndex interleaves the bits of x and y.
Definition: curve.h:148
uint64_t hilbertToMorton(uint64_t h, int m)
hilbertToMorton converts the 2m-bit Hilbert index h to the corresponding Morton index.
Definition: curve.h:290
uint64_t mortonToHilbert(uint64_t z, int m)
mortonToHilbert converts the 2m-bit Morton index z to the corresponding Hilbert index.
Definition: curve.h:236