LSST Applications g180d380827+0f66a164bb,g2079a07aa2+86d27d4dc4,g2305ad1205+7d304bc7a0,g29320951ab+500695df56,g2bbee38e9b+0e5473021a,g337abbeb29+0e5473021a,g33d1c0ed96+0e5473021a,g3a166c0a6a+0e5473021a,g3ddfee87b4+e42ea45bea,g48712c4677+36a86eeaa5,g487adcacf7+2dd8f347ac,g50ff169b8f+96c6868917,g52b1c1532d+585e252eca,g591dd9f2cf+c70619cc9d,g5a732f18d5+53520f316c,g5ea96fc03c+341ea1ce94,g64a986408d+f7cd9c7162,g858d7b2824+f7cd9c7162,g8a8a8dda67+585e252eca,g99cad8db69+469ab8c039,g9ddcbc5298+9a081db1e4,ga1e77700b3+15fc3df1f7,gb0e22166c9+60f28cb32d,gba4ed39666+c2a2e4ac27,gbb8dafda3b+c92fc63c7e,gbd866b1f37+f7cd9c7162,gc120e1dc64+02c66aa596,gc28159a63d+0e5473021a,gc3e9b769f7+b0068a2d9f,gcf0d15dbbd+e42ea45bea,gdaeeff99f8+f9a426f77a,ge6526c86ff+84383d05b3,ge79ae78c31+0e5473021a,gee10cc3b42+585e252eca,gff1a9f87cc+f7cd9c7162,w.2024.17
LSST Data Management Base Package
Loading...
Searching...
No Matches
_id_generator.py
Go to the documentation of this file.
1# This file is part of meas_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22from __future__ import annotations
23
24__all__ = (
25 "IdGenerator",
26 "FullIdGenerator",
27 "BaseIdGeneratorConfig",
28 "DetectorExposureIdGeneratorConfig",
29 "DetectorVisitIdGeneratorConfig",
30 "SkyMapIdGeneratorConfig",
31)
32
33import dataclasses
34from typing import Any, Callable
35
36import numpy as np
37from lsst.afw.table import IdFactory, Schema, SourceCatalog, SourceTable
38from lsst.daf.butler import DataCoordinate, DimensionPacker
39from lsst.pex.config import Config, ConfigField, Field
40from lsst.pipe.base import Instrument
41from lsst.skymap.packers import SkyMapDimensionPacker
42
43DEFAULT_RELEASE_ID = 0
44"""Default release ID to embed in catalog IDs.
45
46This can be changed globally to avoid having to override individual task
47configs to set the release ID.
48"""
49
50DEFAULT_N_RELEASES = 1 # 1 means don't reserve space for releases.
51"""Default number of releases to reserve space for in catalog IDs."""
52
53
55 """Base class for configuration of `IdGenerator` instances.
56
57 This class is abstract (it cannot use `abc.ABCMeta` due to a metaclass
58 conflict), and it should mostly be considered an implementation detail
59 of how the attributes it defines are included in its concrete derived
60 classes. Derived classes must implemented `_make_dimension_packer`.
61
62 See `IdGenerator` for usage.
63 """
64
65 release_id = Field(
66 doc=(
67 "Identifier for a data release or other version to embed in generated IDs. "
68 "Zero is reserved for IDs with no embedded release identifier."
69 ),
70 dtype=int,
71 default=DEFAULT_RELEASE_ID,
72 check=lambda x: x >= 0,
73 )
74
75 n_releases = Field(
76 doc=(
77 "Number of (contiguous, starting from zero) `release_id` values to reserve space for. "
78 "One (not zero) is used to reserve no space."
79 ),
80 dtype=int,
81 default=DEFAULT_N_RELEASES,
82 check=lambda x: x > 0,
83 )
84
85 @classmethod
87 cls, doc="Configuration for how to generate catalog IDs from data IDs."
88 ):
89 """Return a config field that holds an instance of this class.
90
91 Parameters
92 ----------
93 doc : `str`, optional
94 Documentation for the config field. As this configuration almost
95 always plays the same role in any parent config, the default is
96 usually fine.
97
98 Returns
99 -------
100 field : `lsst.pex.config.ConfigField`
101 New config field for instances of this class.
102
103 Notes
104 -----
105 This method is provided as a convenience to reduce boilerplate
106 downstream: it typically saves an import or two, and it allows the same
107 usually-appropriate docstring to be reused instead of rewritten each
108 time. It does not need to be used in order to use this config class.
109 """
110 return ConfigField(doc, dtype=cls)
111
112 def apply(self, data_id: DataCoordinate, **kwargs: Any) -> IdGenerator:
113 """Construct an `IdGenerator` instance from this configuration.
114
115 Parameters
116 ----------
117 data_id : `DataCoordinate`
118 The data ID the `IdGenerator` will embed into all IDs. This
119 generally must be a fully-expanded data ID (i.e. have dimension
120 records attached), that identifies the "instrument" or "skymap"
121 dimension, though this requirement may be relaxed for certain
122 dimension packer types.
123 **kwargs
124 Additional keyword arguments are interpreted as dimension value
125 pairs to include in the data ID. This may be used to provide
126 constraints on dimensions for which records are not available.
127
128 Returns
129 -------
130 id_generator : `IdGenerator`
131 Object that generates integer IDs for catalogs and their rows by
132 embedding the given data ID and a configurably-optional release ID.
133
134 Notes
135 -----
136 This method is called `apply` for consistency with the pattern of using
137 `lsst.pex.config.ConfigurableField` and `lsst.pex.config.RegistryField`
138 to construct the objects whose configuration they hold. It doesn't
139 actually use those mechanisms because we have many config classes for
140 the one `IdGenerator` class, instead of the other way around, and as a
141 result a "config as factory" approach works better.
142 """
143 packer = self._make_dimension_packer(data_id)
144 return FullIdGenerator(
145 packer,
146 DataCoordinate.standardize(data_id, **kwargs, dimensions=packer.dimensions),
147 release_id=self.release_id,
148 n_releases=self.n_releases,
149 )
150
151 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker:
152 """Abstract hook for building a dimension packer from configuration.
153
154 Parameters
155 ----------
156 data_id : `DataCoordinate`
157 The data ID the `IdGenerator` will embed into all IDs. This
158 generally must be a fully-expanded data ID (i.e. have dimension
159 records attached), that identifies the "instrument" or "skymap"
160 dimension, though this requirement may be relaxed for certain
161 dimension packer types.
162
163 Returns
164 -------
165 packer : `lsst.daf.butler.DimensionPacker`
166 Object that packs data IDs into integers.
167 """
168 raise NotImplementedError("Method is abstract.")
169
170
172 """Configuration class for generating integer IDs from
173 ``{exposure, detector}`` data IDs.
174
175 See `IdGenerator` for usage.
176 """
177
178 packer = Instrument.make_dimension_packer_config_field()
179
180 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker:
181 # Docstring inherited.
182 return self.packer.apply(data_id, is_exposure=True)
183
184
186 """Configuration class for generating integer IDs from
187 ``{visit, detector}`` data IDs.
188
189 See `IdGenerator` for usage.
190 """
191
192 packer = Instrument.make_dimension_packer_config_field()
193
194 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker:
195 # Docstring inherited.
196 return self.packer.apply(data_id, is_exposure=False)
197
198
200 """Configuration class for generating integer IDs from
201 ``{tract, patch, [band]}`` data IDs.
202
203 See `IdGenerator` for usage.
204 """
205
206 packer = SkyMapDimensionPacker.make_config_field()
207
208 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker:
209 # Docstring inherited.
210 return self.packer.apply(data_id)
211
212
214 """A helper class for packing some combination of a data ID, a per-data-ID
215 counter, and a release ID into a single 64-bit integer.
216
217 As an object frequently passed into code that otherwise has no knowledge of
218 its own data ID, `IdGenerator` also implements ``__str__`` to provide a
219 human-readable representation of the data ID for use in logs and exception
220 messages, with a suitable fallback when no data ID was provided to it.
221
222 Notes
223 -----
224 Instances of this class are expected to usually be created via
225 configuration, which will return a derived instance. This pattern starts
226 with one of `DetectorExposureIdGeneratorConfig`,
227 `DetectorVisitIdGeneratorConfig`, and `SkyMapIdGeneratorConfig` (which have
228 the same interface), and looks something this:
229
230 from lsst.meas.base import DetectorVisitIdGeneratorConfig
231 from lsst.pex.config import Config
232 from lsst.pipe.base import PipelineTask
233
234 class SomeTaskConfig(PipelineTaskConfig, ...):
235 id_generator = DetectorVisitIdGeneratorConfig.make_field()
236
237 class SomeTask(PipelineTaskTask):
238
239 ConfigClass = SomeTaskConfig
240
241 ...
242
243 def runQuantum(self, ..., data_id: DataCoordinate):
244 id_generator = self.config.apply(data_id)
245 catalog = id_generator.make_source_catalog(self.schema) ...
246
247 There is no requirement that `IdGenerator` instances be constructed in
248 `PipelineTask.runQuantum` methods and passed to the ``run`` method, but
249 this is the most common approach.
250
251 Code that wishes to instead unpack these record IDs to obtain the release
252 ID, data ID and counter value should use the same config (often loaded from
253 the ``Butler``) and pass a fully-expanded data ID identifying only a
254 particular ``skymap`` or ``instrument`` to `unpacker_from_config`::
255
256 config = butler.get("some_task_config")
257 catalog = butler.get("some_output_catalog", given_data_id)
258 unpacker = IdGenerator.unpacker_from_config(
259 config.id_generator, butler.registry.expandDataId(skymap="HSC"),
260 )
261 release_id, embedded_data_id, counter = unpacker(catalog[0]["id"])
262 assert embedded_data_id == given_data_id
263
264 This example is a bit contrived, as the ability to reconstruct the data ID
265 is really only useful when you don't have it already, such as when the
266 record ID is obtained from some further-processed version of the original
267 table (such as a SQL database), and in that context the right config to
268 load will not be obvious unless it has been carefully documented.
269
270 Simple instances of the base class that do not include a data ID may also
271 be constructed by calling the constructor directly::
272
273 id_generator = IdGenerator()
274
275 These IDs may not be unpacked, but they also don't need to be, because
276 they're just the per-catalog "counter" integer already.
277
278 See Also
279 --------
280 :ref:`lsst.meas.base-generating-source-and-object-ids`
281 """
282
283 @property
284 def catalog_id(self) -> int:
285 """The integer identifier for the full catalog with this data ID, not
286 just one of its rows (`int`).
287
288 This combines the packed data ID and release ID, but not the
289 counter.
290 """
291 return 0
292
293 def __str__(self) -> str:
294 """Return a human-readable representation of the data ID (or a note
295 about its absence) for use in log and error messages.
296 """
297 return "[no data ID]"
298
299 def make_table_id_factory(self) -> IdFactory:
300 """Construct a new `lsst.afw.table.IdFactory` for this catalog."""
301 return IdFactory.makeSimple()
302
303 def make_source_catalog(self, schema: Schema) -> SourceCatalog:
304 """Construct a empty catalog object with an ID factory.
305
306 This is a convenience function for the common pattern of calling
307 `make_table_id_factory`, constructing a `~lsst.afw.table.SourceTable`
308 from that, and then constructing an (empty)
309 `~lsst.afw.table.SourceCatalog` from that.
310 """
311 table = SourceTable.make(schema, self.make_table_id_factory())
312 return SourceCatalog(table)
313
314 def arange(self, *args, **kwargs) -> np.ndarray:
315 """Generate an array of integer IDs for this catalog.
316
317 All parameters are forwarded to `numpy.arange` to generate an array of
318 per-catalog counter integers. These are then combined with the
319 `catalog_id`` to form the returned array.
320
321 The IDs generated by `arange` will be equivalent to those generated by
322 `make_table_id_factory` (and by extension, `make_source_catalog`) only
323 if the counter integers start with ``1``, not ``0``, because that's
324 what `~lsst.afw.table.IdFactory` does.
325 """
326 return np.arange(*args, **kwargs)
327
328 @classmethod
330 cls,
331 config: BaseIdGeneratorConfig,
332 fixed: DataCoordinate,
333 ) -> Callable[[int], tuple[DataCoordinate, int]]:
334 """Return a callable that unpacks the IDs generated by this class,
335 from a config field.
336
337 Parameters
338 ----------
339 config : `BaseIdGeneratorConfig`
340 Configuration for an ID generator.
341 fixed : `DataCoordinate`
342 Data ID identifying the dimensions that are considered fixed by the
343 `IdGenerator` that produced the IDs: usually just ``instrument`` or
344 ``skymap``, depending on the configuration. For most configurations
345 this will need to be a fully-expanded data ID.
346
347 Returns
348 -------
349 unpacker
350 Callable that takes a single `int` argument (an ID generated by an
351 identically-configured `IdGenerator`) and returns a tuple of:
352
353 - release_id: the integer that identifies a data release or
354 similar (`int`);
355 - data_id : the data ID used to initialize the original ID
356 generator (`DataCoordinate`);
357 - counter : the counter part of the original ID (`int`).
358
359 Notes
360 -----
361 This method cannot be used on IDs generated without a data ID.
362 """
363 packer = config._make_dimension_packer(fixed)
364 return cls.unpacker_from_dimension_packer(packer, config.n_releases)
365
366 @classmethod
368 cls,
369 dimension_packer: DimensionPacker,
370 n_releases: int = DEFAULT_N_RELEASES,
371 ) -> Callable[[int], tuple[int, DataCoordinate, int]]:
372 """Return a callable that unpacks the IDs generated by this class,
373 from a `lsst.daf.butler.DimensionPacker` instance.
374
375 Parameters
376 ----------
377 dimension_packer : `lsst.daf.butler.DimensionPacker`
378 Dimension packer used to construct the original
379 `DimensionPackerIdGenerator`.
380 n_releases : `int`, optional
381 Number of (contiguous, starting from zero) ``release_id`` values to
382 reserve space for. One (not zero) is used to reserve no space.
383
384 Returns
385 -------
386 unpacker
387 Callable that takes a single `int` argument (an ID generated by an
388 identically-constructed `DimensionPackerIdGenerator`) and returns a
389 tuple of:
390
391 - release_id: the integer that identifies a data release or
392 similar (`int`);
393 - data_id : the data ID used to initialize the original ID
394 generator (`DataCoordinate`);
395 - counter : the counter part of the original ID (`int`).
396
397 Notes
398 -----
399 This method cannot be used on IDs generated with no data ID.
400 """
401 bits = _IdGeneratorBits(dimension_packer, n_releases)
402
403 def unpack(record_id: int) -> tuple[int, DataCoordinate, int]:
404 rest, counter = divmod(record_id, bits.n_counters)
405 rest, packed_data_id = divmod(rest, bits.n_data_ids)
406 rest, release_id = divmod(rest, bits.n_data_ids)
407 if rest:
408 raise ValueError(
409 f"Unexpected overall factor {rest} in record_id {record_id}, "
410 f"after extracting packed_data_id={packed_data_id}, counter={counter}, and "
411 f"release_id={release_id}."
412 )
413 data_id = bits.packer.unpack(packed_data_id)
414 return release_id, data_id, counter
415
416 return unpack
417
418
420 """The subclass of `IdGenerator` that actually includes packed data IDs
421 and release IDs in its generated IDs.
422
423 Parameters
424 ----------
425 dimension_packer : `lsst.daf.butler.DimensionPacker`
426 Object that packs data IDs into integers.
427 data_id : `lsst.daf.butler.DataCoordinate`
428 Data ID to embed in all generated IDs and random seeds.
429 release_id : `int`, optional
430 Release identifier to embed in generated IDs.
431 n_releases : `int`, optional
432 Number of (contiguous, starting from zero) `release_id` values to
433 reserve space for. One (not zero) is used to reserve no space.
434
435 Notes
436 -----
437 Instances of this class should usually be constructed via configuration
438 instead of by calling the constructor directly; see `IdGenerator` for
439 details.
440 """
441
443 self,
444 dimension_packer: DimensionPacker,
445 data_id: DataCoordinate,
446 release_id: int = DEFAULT_RELEASE_ID,
447 n_releases: int = DEFAULT_N_RELEASES,
448 ):
449 self._bits = _IdGeneratorBits(dimension_packer, n_releases)
450 self._release_id = release_id
451 self._data_id = data_id.subset(self._bits.packer.dimensions)
452 self._packed_data_id = self._bits.packer.pack(self._data_id)
453
454 @property
455 def data_id(self) -> DataCoordinate:
456 """The data ID that will be embedded in all generated IDs
457 (`DataCoordinate`)."""
458 return self._data_id
459
460 @property
461 def release_id(self) -> int:
462 """The release ID that will embedded in all generated IDs (`int`)."""
463 return self._release_id
464
465 @property
466 def catalog_id(self) -> int:
467 # Docstring inherited.
468 return self._packed_data_id + self._bits.n_data_ids * self._release_id
469
470 def __str__(self) -> str:
471 # Docstring inherited.
472 return str(self.data_iddata_id)
473
474 def make_table_id_factory(self) -> IdFactory:
475 # Docstring inherited.
476 return IdFactory.makeSource(self.catalog_idcatalog_idcatalog_id, self._bits.counter_bits)
477
478 def arange(self, *args, **kwargs) -> np.ndarray:
479 # Docstring inherited.
480 lower = super().arange(*args, **kwargs)
481 if np.any(lower >= self._bits.n_counters):
482 arg_terms = [repr(arg) for arg in args] + [f"{k}={v!r}" for k, v in kwargs.items()]
483 raise ValueError(
484 f"Integer range from numpy.arange({arg_terms}) has "
485 f"{(lower >= self._bits.n_counters).sum()} values that are not "
486 f"below the upper bound of {self._bits.n_counters}."
487 )
488 return lower + self.catalog_idcatalog_idcatalog_id * self._bits.n_counters
489
490
491@dataclasses.dataclass
493 """A private helper struct that manages the allocation of bits between the
494 packed data ID, the release ID, and a per-catalog counter.
495 """
496
497 packer: DimensionPacker
498 """Object that maps data IDs to integers
499 (`lsst.daf.butler.DimensionPacker`).
500 """
501
502 n_releases: int = dataclasses.field(default=0)
503 """Number of releases to reserve space for, starting from zero (`int`)."""
504
505 n_data_ids: int = dataclasses.field(init=False)
506 """Number of contiguous packed data IDs to reserve space for, starting
507 from zero (`int`).
508 """
509
510 counter_bits: int = dataclasses.field(init=False)
511 """Number of bits allocated to the per-catalog counter (`int`)."""
512
513 n_counters: int = dataclasses.field(init=False)
514 """Number of contiguous counter values to reserve space for, starting from
515 zero (`int`)."""
516
517 def __post_init__(self) -> None:
518 self.n_data_idsn_data_ids = 1 << self.packer.maxBits
519 upper_bits = (self.n_releases - 1).bit_length() + self.packer.maxBits
520 self.counter_bitscounter_bits = IdFactory.computeReservedFromMaxBits(upper_bits)
IdGenerator apply(self, DataCoordinate data_id, **Any kwargs)
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)
make_field(cls, doc="Configuration for how to generate catalog IDs from data IDs.")
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)
__init__(self, DimensionPacker dimension_packer, DataCoordinate data_id, int release_id=DEFAULT_RELEASE_ID, int n_releases=DEFAULT_N_RELEASES)
np.ndarray arange(self, *args, **kwargs)
SourceCatalog make_source_catalog(self, Schema schema)
np.ndarray arange(self, *args, **kwargs)
Callable[[int], tuple[int, DataCoordinate, int]] unpacker_from_dimension_packer(cls, DimensionPacker dimension_packer, int n_releases=DEFAULT_N_RELEASES)
Callable[[int], tuple[DataCoordinate, int]] unpacker_from_config(cls, BaseIdGeneratorConfig config, DataCoordinate fixed)
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)