LSST Applications 27.0.0,g0265f82a02+469cd937ee,g02d81e74bb+21ad69e7e1,g1470d8bcf6+cbe83ee85a,g2079a07aa2+e67c6346a6,g212a7c68fe+04a9158687,g2305ad1205+94392ce272,g295015adf3+81dd352a9d,g2bbee38e9b+469cd937ee,g337abbeb29+469cd937ee,g3939d97d7f+72a9f7b576,g487adcacf7+71499e7cba,g50ff169b8f+5929b3527e,g52b1c1532d+a6fc98d2e7,g591dd9f2cf+df404f777f,g5a732f18d5+be83d3ecdb,g64a986408d+21ad69e7e1,g858d7b2824+21ad69e7e1,g8a8a8dda67+a6fc98d2e7,g99cad8db69+f62e5b0af5,g9ddcbc5298+d4bad12328,ga1e77700b3+9c366c4306,ga8c6da7877+71e4819109,gb0e22166c9+25ba2f69a1,gb6a65358fc+469cd937ee,gbb8dafda3b+69d3c0e320,gc07e1c2157+a98bf949bb,gc120e1dc64+615ec43309,gc28159a63d+469cd937ee,gcf0d15dbbd+72a9f7b576,gdaeeff99f8+a38ce5ea23,ge6526c86ff+3a7c1ac5f1,ge79ae78c31+469cd937ee,gee10cc3b42+a6fc98d2e7,gf1cff7945b+21ad69e7e1,gfbcc870c63+9a11dc8c8f
LSST Data Management Base Package
Loading...
Searching...
No Matches
curve.h
Go to the documentation of this file.
1/*
2 * This file is part of sphgeom.
3 *
4 * Developed for the LSST Data Management System.
5 * This product includes software developed by the LSST Project
6 * (http://www.lsst.org).
7 * See the COPYRIGHT file at the top-level directory of this distribution
8 * for details of code ownership.
9 *
10 * This software is dual licensed under the GNU General Public License and also
11 * under a 3-clause BSD license. Recipients may choose which of these licenses
12 * to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
13 * respectively. If you choose the GPL option then the following text applies
14 * (but note that there is still no warranty even if you opt for BSD instead):
15 *
16 * This program is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program. If not, see <http://www.gnu.org/licenses/>.
28 */
29
30#ifndef LSST_SPHGEOM_CURVE_H_
31#define LSST_SPHGEOM_CURVE_H_
32
85
86#if !defined(NO_SIMD) && defined(__x86_64__)
87 #include <x86intrin.h>
88#endif
89#include <cstdint>
90#include <tuple>
91
92
93namespace lsst {
94namespace sphgeom {
95
105inline uint8_t log2(uint64_t x) {
106 alignas(64) static uint8_t const PERFECT_HASH_TABLE[64] = {
107 0, 1, 2, 7, 3, 13, 8, 19, 4, 25, 14, 28, 9, 34, 20, 40,
108 5, 17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57,
109 63, 6, 12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56,
110 62, 11, 23, 32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58
111 };
112 uint64_t const DE_BRUIJN_SEQUENCE = UINT64_C(0x0218a392cd3d5dbf);
113 // First ensure that all bits below the MSB are set.
114 x |= (x >> 1);
115 x |= (x >> 2);
116 x |= (x >> 4);
117 x |= (x >> 8);
118 x |= (x >> 16);
119 x |= (x >> 32);
120 // Then, subtract them away.
121 x = x - (x >> 1);
122 // Multiplication by x is now a shift by the index i of the MSB.
123 //
124 // By definition, the value of the upper 6 bits of a 64-bit De Bruijn
125 // sequence left shifted by i is different for every value of i in
126 // [0, 64). It can therefore be used as an an index into a lookup table
127 // that recovers i. In other words, (DE_BRUIJN_SEQUENCE * x) >> 58 is a
128 // minimal perfect hash function for 64 bit powers of 2.
129 return PERFECT_HASH_TABLE[(DE_BRUIJN_SEQUENCE * x) >> 58];
130}
131
132inline uint8_t log2(uint32_t x) {
133 // See https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
134 alignas(32) static uint8_t const PERFECT_HASH_TABLE[32] = {
135 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
136 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
137 };
138 uint32_t const DE_BRUIJN_SEQUENCE = UINT32_C(0x07c4acdd);
139 x |= (x >> 1);
140 x |= (x >> 2);
141 x |= (x >> 4);
142 x |= (x >> 8);
143 x |= (x >> 16);
144 return PERFECT_HASH_TABLE[(DE_BRUIJN_SEQUENCE * x) >> 27];
145}
147
154#if defined(NO_SIMD) || !defined(__x86_64__)
155 inline uint64_t mortonIndex(uint32_t x, uint32_t y) {
156 // This is just a 64-bit extension of:
157 // http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
158 uint64_t b = y;
159 uint64_t a = x;
160 b = (b | (b << 16)) & UINT64_C(0x0000ffff0000ffff);
161 a = (a | (a << 16)) & UINT64_C(0x0000ffff0000ffff);
162 b = (b | (b << 8)) & UINT64_C(0x00ff00ff00ff00ff);
163 a = (a | (a << 8)) & UINT64_C(0x00ff00ff00ff00ff);
164 b = (b | (b << 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
165 a = (a | (a << 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
166 b = (b | (b << 2)) & UINT64_C(0x3333333333333333);
167 a = (a | (a << 2)) & UINT64_C(0x3333333333333333);
168 b = (b | (b << 1)) & UINT64_C(0x5555555555555555);
169 a = (a | (a << 1)) & UINT64_C(0x5555555555555555);
170 return a | (b << 1);
171 }
172#else
173 inline uint64_t mortonIndex(__m128i xy) {
174 xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 16)),
175 _mm_set1_epi32(0x0000ffff));
176 xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 8)),
177 _mm_set1_epi32(0x00ff00ff));
178 xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 4)),
179 _mm_set1_epi32(0x0f0f0f0f));
180 xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 2)),
181 _mm_set1_epi32(0x33333333));
182 xy = _mm_and_si128(_mm_or_si128(xy, _mm_slli_epi64(xy, 1)),
183 _mm_set1_epi32(0x55555555));
184 __m128i y = _mm_unpackhi_epi64(xy, _mm_setzero_si128());
185 __m128i r = _mm_or_si128(xy, _mm_slli_epi64(y, 1));
186 return static_cast<uint64_t>(_mm_cvtsi128_si64(r));
187 }
188
189 inline uint64_t mortonIndex(uint32_t x, uint32_t y) {
190 __m128i xy = _mm_set_epi64x(static_cast<int64_t>(y),
191 static_cast<int64_t>(x));
192 return mortonIndex(xy);
193 }
194#endif
195
201#if defined(NO_SIMD) || !defined(__x86_64__)
203 uint64_t x = z & UINT64_C(0x5555555555555555);
204 uint64_t y = (z >> 1) & UINT64_C(0x5555555555555555);
205 x = (x | (x >> 1)) & UINT64_C(0x3333333333333333);
206 y = (y | (y >> 1)) & UINT64_C(0x3333333333333333);
207 x = (x | (x >> 2)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
208 y = (y | (y >> 2)) & UINT64_C(0x0f0f0f0f0f0f0f0f);
209 x = (x | (x >> 4)) & UINT64_C(0x00ff00ff00ff00ff);
210 y = (y | (y >> 4)) & UINT64_C(0x00ff00ff00ff00ff);
211 x = (x | (x >> 8)) & UINT64_C(0x0000ffff0000ffff);
212 y = (y | (y >> 8)) & UINT64_C(0x0000ffff0000ffff);
213 return std::make_tuple(static_cast<uint32_t>(x | (x >> 16)),
214 static_cast<uint32_t>(y | (y >> 16)));
215 }
216#else
217 inline __m128i mortonIndexInverseSimd(uint64_t z) {
218 __m128i xy = _mm_set_epi64x(static_cast<int64_t>(z >> 1),
219 static_cast<int64_t>(z));
220 xy = _mm_and_si128(xy, _mm_set1_epi32(0x55555555));
221 xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 1)),
222 _mm_set1_epi32(0x33333333));
223 xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 2)),
224 _mm_set1_epi32(0x0f0f0f0f));
225 xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 4)),
226 _mm_set1_epi32(0x00ff00ff));
227 xy = _mm_and_si128(_mm_or_si128(xy, _mm_srli_epi64(xy, 8)),
228 _mm_set1_epi32(0x0000ffff));
229 xy = _mm_or_si128(xy, _mm_srli_epi64(xy, 16));
230 return xy;
231 }
232
234 __m128i xy = mortonIndexInverseSimd(z);
235 uint64_t r = _mm_cvtsi128_si64(_mm_shuffle_epi32(xy, 8));
236 return std::make_tuple(static_cast<uint32_t>(r & 0xffffffff),
237 static_cast<uint32_t>(r >> 32));
238 }
239#endif
240
243inline uint64_t mortonToHilbert(uint64_t z, int m) {
244 alignas(64) static uint8_t const HILBERT_LUT_3[256] = {
245 0x40, 0xc3, 0x01, 0x02, 0x04, 0x45, 0x87, 0x46,
246 0x8e, 0x8d, 0x4f, 0xcc, 0x08, 0x49, 0x8b, 0x4a,
247 0xfa, 0x3b, 0xf9, 0xb8, 0x7c, 0xff, 0x3d, 0x3e,
248 0xf6, 0x37, 0xf5, 0xb4, 0xb2, 0xb1, 0x73, 0xf0,
249 0x10, 0x51, 0x93, 0x52, 0xde, 0x1f, 0xdd, 0x9c,
250 0x54, 0xd7, 0x15, 0x16, 0x58, 0xdb, 0x19, 0x1a,
251 0x20, 0x61, 0xa3, 0x62, 0xee, 0x2f, 0xed, 0xac,
252 0x64, 0xe7, 0x25, 0x26, 0x68, 0xeb, 0x29, 0x2a,
253 0x00, 0x41, 0x83, 0x42, 0xce, 0x0f, 0xcd, 0x8c,
254 0x44, 0xc7, 0x05, 0x06, 0x48, 0xcb, 0x09, 0x0a,
255 0x50, 0xd3, 0x11, 0x12, 0x14, 0x55, 0x97, 0x56,
256 0x9e, 0x9d, 0x5f, 0xdc, 0x18, 0x59, 0x9b, 0x5a,
257 0xba, 0xb9, 0x7b, 0xf8, 0xb6, 0xb5, 0x77, 0xf4,
258 0x3c, 0x7d, 0xbf, 0x7e, 0xf2, 0x33, 0xf1, 0xb0,
259 0x60, 0xe3, 0x21, 0x22, 0x24, 0x65, 0xa7, 0x66,
260 0xae, 0xad, 0x6f, 0xec, 0x28, 0x69, 0xab, 0x6a,
261 0xaa, 0xa9, 0x6b, 0xe8, 0xa6, 0xa5, 0x67, 0xe4,
262 0x2c, 0x6d, 0xaf, 0x6e, 0xe2, 0x23, 0xe1, 0xa0,
263 0x9a, 0x99, 0x5b, 0xd8, 0x96, 0x95, 0x57, 0xd4,
264 0x1c, 0x5d, 0x9f, 0x5e, 0xd2, 0x13, 0xd1, 0x90,
265 0x70, 0xf3, 0x31, 0x32, 0x34, 0x75, 0xb7, 0x76,
266 0xbe, 0xbd, 0x7f, 0xfc, 0x38, 0x79, 0xbb, 0x7a,
267 0xca, 0x0b, 0xc9, 0x88, 0x4c, 0xcf, 0x0d, 0x0e,
268 0xc6, 0x07, 0xc5, 0x84, 0x82, 0x81, 0x43, 0xc0,
269 0xea, 0x2b, 0xe9, 0xa8, 0x6c, 0xef, 0x2d, 0x2e,
270 0xe6, 0x27, 0xe5, 0xa4, 0xa2, 0xa1, 0x63, 0xe0,
271 0x30, 0x71, 0xb3, 0x72, 0xfe, 0x3f, 0xfd, 0xbc,
272 0x74, 0xf7, 0x35, 0x36, 0x78, 0xfb, 0x39, 0x3a,
273 0xda, 0x1b, 0xd9, 0x98, 0x5c, 0xdf, 0x1d, 0x1e,
274 0xd6, 0x17, 0xd5, 0x94, 0x92, 0x91, 0x53, 0xd0,
275 0x8a, 0x89, 0x4b, 0xc8, 0x86, 0x85, 0x47, 0xc4,
276 0x0c, 0x4d, 0x8f, 0x4e, 0xc2, 0x03, 0xc1, 0x80
277 };
278 uint64_t h = 0;
279 uint64_t i = 0;
280 for (m = 2 * m; m >= 6;) {
281 m -= 6;
282 uint8_t j = HILBERT_LUT_3[i | ((z >> m) & 0x3f)];
283 h = (h << 6) | (j & 0x3f);
284 i = j & 0xc0;
285 }
286 if (m != 0) {
287 // m = 2 or 4
288 int r = 6 - m;
289 uint8_t j = HILBERT_LUT_3[i | ((z << r) & 0x3f)];
290 h = (h << m) | ((j & 0x3f) >> r);
291 }
292 return h;
293}
294
297inline uint64_t hilbertToMorton(uint64_t h, int m) {
298 alignas(64) static uint8_t const HILBERT_INVERSE_LUT_3[256] = {
299 0x40, 0x02, 0x03, 0xc1, 0x04, 0x45, 0x47, 0x86,
300 0x0c, 0x4d, 0x4f, 0x8e, 0xcb, 0x89, 0x88, 0x4a,
301 0x20, 0x61, 0x63, 0xa2, 0x68, 0x2a, 0x2b, 0xe9,
302 0x6c, 0x2e, 0x2f, 0xed, 0xa7, 0xe6, 0xe4, 0x25,
303 0x30, 0x71, 0x73, 0xb2, 0x78, 0x3a, 0x3b, 0xf9,
304 0x7c, 0x3e, 0x3f, 0xfd, 0xb7, 0xf6, 0xf4, 0x35,
305 0xdf, 0x9d, 0x9c, 0x5e, 0x9b, 0xda, 0xd8, 0x19,
306 0x93, 0xd2, 0xd0, 0x11, 0x54, 0x16, 0x17, 0xd5,
307 0x00, 0x41, 0x43, 0x82, 0x48, 0x0a, 0x0b, 0xc9,
308 0x4c, 0x0e, 0x0f, 0xcd, 0x87, 0xc6, 0xc4, 0x05,
309 0x50, 0x12, 0x13, 0xd1, 0x14, 0x55, 0x57, 0x96,
310 0x1c, 0x5d, 0x5f, 0x9e, 0xdb, 0x99, 0x98, 0x5a,
311 0x70, 0x32, 0x33, 0xf1, 0x34, 0x75, 0x77, 0xb6,
312 0x3c, 0x7d, 0x7f, 0xbe, 0xfb, 0xb9, 0xb8, 0x7a,
313 0xaf, 0xee, 0xec, 0x2d, 0xe7, 0xa5, 0xa4, 0x66,
314 0xe3, 0xa1, 0xa0, 0x62, 0x28, 0x69, 0x6b, 0xaa,
315 0xff, 0xbd, 0xbc, 0x7e, 0xbb, 0xfa, 0xf8, 0x39,
316 0xb3, 0xf2, 0xf0, 0x31, 0x74, 0x36, 0x37, 0xf5,
317 0x9f, 0xde, 0xdc, 0x1d, 0xd7, 0x95, 0x94, 0x56,
318 0xd3, 0x91, 0x90, 0x52, 0x18, 0x59, 0x5b, 0x9a,
319 0x8f, 0xce, 0xcc, 0x0d, 0xc7, 0x85, 0x84, 0x46,
320 0xc3, 0x81, 0x80, 0x42, 0x08, 0x49, 0x4b, 0x8a,
321 0x60, 0x22, 0x23, 0xe1, 0x24, 0x65, 0x67, 0xa6,
322 0x2c, 0x6d, 0x6f, 0xae, 0xeb, 0xa9, 0xa8, 0x6a,
323 0xbf, 0xfe, 0xfc, 0x3d, 0xf7, 0xb5, 0xb4, 0x76,
324 0xf3, 0xb1, 0xb0, 0x72, 0x38, 0x79, 0x7b, 0xba,
325 0xef, 0xad, 0xac, 0x6e, 0xab, 0xea, 0xe8, 0x29,
326 0xa3, 0xe2, 0xe0, 0x21, 0x64, 0x26, 0x27, 0xe5,
327 0xcf, 0x8d, 0x8c, 0x4e, 0x8b, 0xca, 0xc8, 0x09,
328 0x83, 0xc2, 0xc0, 0x01, 0x44, 0x06, 0x07, 0xc5,
329 0x10, 0x51, 0x53, 0x92, 0x58, 0x1a, 0x1b, 0xd9,
330 0x5c, 0x1e, 0x1f, 0xdd, 0x97, 0xd6, 0xd4, 0x15
331 };
332 uint64_t z = 0;
333 uint64_t i = 0;
334 for (m = 2 * m; m >= 6;) {
335 m -= 6;
336 uint8_t j = HILBERT_INVERSE_LUT_3[i | ((h >> m) & 0x3f)];
337 z = (z << 6) | (j & 0x3f);
338 i = j & 0xc0;
339 }
340 if (m != 0) {
341 // m = 2 or 4
342 int r = 6 - m;
343 uint8_t j = HILBERT_INVERSE_LUT_3[i | ((h << r) & 0x3f)];
344 z = (z << m) | ((j & 0x3f) >> r);
345 }
346 return z;
347}
348
356inline uint64_t hilbertIndex(uint32_t x, uint32_t y, int m) {
357 return mortonToHilbert(mortonIndex(x, y), m);
358}
359
360#if !defined(NO_SIMD) && defined(__x86_64__)
361 inline uint64_t hilbertIndex(__m128i xy, int m) {
362 return mortonToHilbert(mortonIndex(xy), m);
363 }
364#endif
365
371
372#if !defined(NO_SIMD) && defined(__x86_64__)
373 inline __m128i hilbertIndexInverseSimd(uint64_t h, int m) {
374 return mortonIndexInverseSimd(hilbertToMorton(h, m));
375 }
376#endif
377
378}} // namespace lsst::sphgeom
379
380#endif // LSST_SPHGEOM_CURVE_H_
double z
Definition Match.cc:44
int y
Definition SpanSet.cc:48
int m
Definition SpanSet.cc:48
table::Key< int > b
T make_tuple(T... args)
std::tuple< uint32_t, uint32_t > mortonIndexInverse(uint64_t z)
mortonIndexInverse separates the even and odd bits of z.
Definition curve.h:202
std::tuple< uint32_t, uint32_t > hilbertIndexInverse(uint64_t h, int m)
hilbertIndexInverse returns the point (x, y) with Hilbert index h, where x and y are m bit integers.
Definition curve.h:368
uint8_t log2(uint64_t x)
Definition curve.h:105
uint64_t hilbertIndex(uint32_t x, uint32_t y, int m)
hilbertIndex returns the index of (x, y) in a 2-D Hilbert curve.
Definition curve.h:356
uint64_t mortonIndex(uint32_t x, uint32_t y)
mortonIndex interleaves the bits of x and y.
Definition curve.h:155
uint64_t hilbertToMorton(uint64_t h, int m)
hilbertToMorton converts the 2m-bit Hilbert index h to the corresponding Morton index.
Definition curve.h:297
uint64_t mortonToHilbert(uint64_t z, int m)
mortonToHilbert converts the 2m-bit Morton index z to the corresponding Hilbert index.
Definition curve.h:243