LSST Applications 26.0.0,g0265f82a02+6660c170cc,g07994bdeae+30b05a742e,g0a0026dc87+17526d298f,g0a60f58ba1+17526d298f,g0e4bf8285c+96dd2c2ea9,g0ecae5effc+c266a536c8,g1e7d6db67d+6f7cb1f4bb,g26482f50c6+6346c0633c,g2bbee38e9b+6660c170cc,g2cc88a2952+0a4e78cd49,g3273194fdb+f6908454ef,g337abbeb29+6660c170cc,g337c41fc51+9a8f8f0815,g37c6e7c3d5+7bbafe9d37,g44018dc512+6660c170cc,g4a941329ef+4f7594a38e,g4c90b7bd52+5145c320d2,g58be5f913a+bea990ba40,g635b316a6c+8d6b3a3e56,g67924a670a+bfead8c487,g6ae5381d9b+81bc2a20b4,g93c4d6e787+26b17396bd,g98cecbdb62+ed2cb6d659,g98ffbb4407+81bc2a20b4,g9ddcbc5298+7f7571301f,ga1e77700b3+99e9273977,gae46bcf261+6660c170cc,gb2715bf1a1+17526d298f,gc86a011abf+17526d298f,gcf0d15dbbd+96dd2c2ea9,gdaeeff99f8+0d8dbea60f,gdb4ec4c597+6660c170cc,ge23793e450+96dd2c2ea9,gf041782ebf+171108ac67
LSST Data Management Base Package
Loading...
Searching...
No Matches
_id_generator.py
Go to the documentation of this file.
1# This file is part of meas_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22from __future__ import annotations
23
24__all__ = (
25 "IdGenerator",
26 "FullIdGenerator",
27 "BaseIdGeneratorConfig",
28 "DetectorExposureIdGeneratorConfig",
29 "DetectorVisitIdGeneratorConfig",
30 "SkyMapIdGeneratorConfig",
31)
32
33import dataclasses
34from typing import Any, Callable
35
36import numpy as np
37from lsst.afw.table import IdFactory, Schema, SourceCatalog, SourceTable
38from lsst.daf.butler import DataCoordinate, DimensionPacker
39from lsst.obs.base import ExposureIdInfo
40from lsst.pex.config import Config, ConfigField, Field
41from lsst.pipe.base import Instrument
42from lsst.skymap.packers import SkyMapDimensionPacker
43
44DEFAULT_RELEASE_ID = 0
45"""Default release ID to embed in catalog IDs.
46
47This can be changed globally to avoid having to override individual task
48configs to set the release ID.
49"""
50
51DEFAULT_N_RELEASES = 1 # 1 means don't reserve space for releases.
52"""Default number of releases to reserve space for in catalog IDs."""
53
54
56 """Base class for configuration of `IdGenerator` instances.
57
58 This class is abstract (it cannot use `abc.ABCMeta` due to a metaclass
59 conflict), and it should mostly be considered an implementation detail
60 of how the attributes it defines are included in its concrete derived
61 classes. Derived classes must implemented `_make_dimension_packer`.
62
63 See `IdGenerator` for usage.
64 """
65
66 release_id = Field(
67 doc=(
68 "Identifier for a data release or other version to embed in generated IDs. "
69 "Zero is reserved for IDs with no embedded release identifier."
70 ),
71 dtype=int,
72 default=DEFAULT_RELEASE_ID,
73 check=lambda x: x >= 0,
74 )
75
76 n_releases = Field(
77 doc=(
78 "Number of (contiguous, starting from zero) `release_id` values to reserve space for. "
79 "One (not zero) is used to reserve no space."
80 ),
81 dtype=int,
82 default=DEFAULT_N_RELEASES,
83 check=lambda x: x > 0,
84 )
85
86 @classmethod
88 cls, doc="Configuration for how to generate catalog IDs from data IDs."
89 ):
90 """Return a config field that holds an instance of this class.
91
92 Parameters
93 ----------
94 doc : `str`, optional
95 Documentation for the config field. As this configuration almost
96 always plays the same role in any parent config, the default is
97 usually fine.
98
99 Returns
100 -------
102 New config field for instances of this class.
103
104 Notes
105 -----
106 This method is provided as a convenience to reduce boilerplate
107 downstream: it typically saves an import or two, and it allows the same
108 usually-appropriate docstring to be reused instead of rewritten each
109 time. It does not need to be used in order to use this config class.
110 """
111 return ConfigField(doc, dtype=cls)
112
113 def apply(self, data_id: DataCoordinate, **kwargs: Any) -> IdGenerator:
114 """Construct an `IdGenerator` instance from this configuration.
115
116 Parameters
117 ----------
118 data_id : `DataCoordinate`
119 The data ID the `IdGenerator` will embed into all IDs. This
120 generally must be a fully-expanded data ID (i.e. have dimension
121 records attached), that identifies the "instrument" or "skymap"
122 dimension, though this requirement may be relaxed for certain
123 dimension packer types.
124 **kwargs
125 Additional keyword arguments are interpreted as dimension value
126 pairs to include in the data ID. This may be used to provide
127 constraints on dimensions for which records are not available.
128
129 Returns
130 -------
131 id_generator : `IdGenerator`
132 Object that generates integer IDs for catalogs and their rows by
133 embedding the given data ID and a configurably-optional release ID.
134
135 Notes
136 -----
137 This method is called `apply` for consistency with the pattern of using
139 to construct the objects whose configuration they hold. It doesn't
140 actually use those mechanisms because we have many config classes for
141 the one `IdGenerator` class, instead of the other way around, and as a
142 result a "config as factory" approach works better.
143 """
144 packer = self._make_dimension_packer(data_id)
145 return FullIdGenerator(
146 packer,
147 DataCoordinate.standardize(data_id, **kwargs, graph=packer.dimensions),
148 release_id=self.release_id,
149 n_releases=self.n_releases,
150 )
151
152 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker:
153 """Abstract hook for building a dimension packer from configuration.
154
155 Parameters
156 ----------
157 data_id : `DataCoordinate`
158 The data ID the `IdGenerator` will embed into all IDs. This
159 generally must be a fully-expanded data ID (i.e. have dimension
160 records attached), that identifies the "instrument" or "skymap"
161 dimension, though this requirement may be relaxed for certain
162 dimension packer types.
163
164 Returns
165 -------
166 packer : `lsst.daf.butler.DimensionPacker`
167 Object that packs data IDs into integers.
168 """
169 raise NotImplementedError("Method is abstract.")
170
171
173 """Configuration class for generating integer IDs from
174 ``{exposure, detector}`` data IDs.
175
176 See `IdGenerator` for usage.
177 """
178
179 packer = Instrument.make_dimension_packer_config_field()
180
181 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker:
182 # Docstring inherited.
183 return self.packer.apply(data_id, is_exposure=True)
184
185
187 """Configuration class for generating integer IDs from
188 ``{visit, detector}`` data IDs.
189
190 See `IdGenerator` for usage.
191 """
192
193 packer = Instrument.make_dimension_packer_config_field()
194
195 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker:
196 # Docstring inherited.
197 return self.packer.apply(data_id, is_exposure=False)
198
199
201 """Configuration class for generating integer IDs from
202 ``{tract, patch, [band]}`` data IDs.
203
204 See `IdGenerator` for usage.
205 """
206
207 packer = SkyMapDimensionPacker.make_config_field()
208
209 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker:
210 # Docstring inherited.
211 return self.packer.apply(data_id)
212
213
215 """A helper class for packing some combination of a data ID, a per-data-ID
216 counter, and a release ID into a single 64-bit integer.
217
218 As an object frequently passed into code that otherwise has no knowledge of
219 its own data ID, `IdGenerator` also implements ``__str__`` to provide a
220 human-readable representation of the data ID for use in logs and exception
221 messages, with a suitable fallback when no data ID was provided to it.
222
223 Notes
224 -----
225 Instances of this class are expected to usually be created via
226 configuration, which will return a derived instance. This pattern starts
227 with one of `DetectorExposureIdGeneratorConfig`,
228 `DetectorVisitIdGeneratorConfig`, and `SkyMapIdGeneratorConfig` (which have
229 the same interface), and looks something this:
230
231 from lsst.meas.base import DetectorVisitIdGeneratorConfig
232 from lsst.pex.config import Config
233 from lsst.pipe.base import PipelineTask
234
235 class SomeTaskConfig(PipelineTaskConfig, ...):
236 id_generator = DetectorVisitIdGeneratorConfig.make_field()
237
238 class SomeTask(PipelineTaskTask):
239
240 ConfigClass = SomeTaskConfig
241
242 ...
243
244 def runQuantum(self, ..., data_id: DataCoordinate):
245 id_generator = self.config.apply(data_id)
246 catalog = id_generator.make_source_catalog(self.schema) ...
247
248 There is no requirement that `IdGenerator` instances be constructed in
249 `PipelineTask.runQuantum` methods and passed to the ``run`` method, but
250 this is the most common approach.
251
252 Code that wishes to instead unpack these record IDs to obtain the release
253 ID, data ID and counter value should use the same config (often loaded from
254 the ``Butler``) and pass a fully-expanded data ID identifying only a
255 particular ``skymap`` or ``instrument`` to `unpacker_from_config`::
256
257 config = butler.get("some_task_config")
258 catalog = butler.get("some_output_catalog", given_data_id)
259 unpacker = IdGenerator.unpacker_from_config(
260 config.id_generator, butler.registry.expandDataId(skymap="HSC"),
261 )
262 release_id, embedded_data_id, counter = unpacker(catalog[0]["id"])
263 assert embedded_data_id == given_data_id
264
265 This example is a bit contrived, as the ability to reconstruct the data ID
266 is really only useful when you don't have it already, such as when the
267 record ID is obtained from some further-processed version of the original
268 table (such as a SQL database), and in that context the right config to
269 load will not be obvious unless it has been carefully documented.
270
271 Simple instances of the base class that do not include a data ID may also
272 be constructed by calling the constructor directly::
273
274 id_generator = IdGenerator()
275
276 These IDs may not be unpacked, but they also don't need to be, because
277 they're just the per-catalog "counter" integer already.
278
279 See Also
280 --------
281 :ref:`lsst.meas.base-generating-source-and-object-ids`
282 """
283
284 # TODO: remove this method on DM-38687.
285 # No deprecation decorator here because the type this method accepts is
286 # itself deprecated, so it's only going to be called by code paths that
287 # will go away when the deprecation turns into a removal, and which already
288 # warn.
289 @staticmethod
290 def _from_exposure_id_info(exposure_id_info: ExposureIdInfo) -> IdGenerator:
291 """Construct a new ID generator from the object this class supersedes.
292
293 This method is deprecated along with the type it accepts; it's provided
294 only as a temporary helper to aid in the transition from
295 `lsst.obs.base.ExposureIdInfo` to `IdGenerator`.
296 """
297 return _ExposureIdInfoIdGenerator(exposure_id_info)
298
299 @property
300 def catalog_id(self) -> int:
301 """The integer identifier for the full catalog with this data ID, not
302 just one of its rows (`int`).
303
304 This combines the packed data ID and release ID, but not the
305 counter.
306 """
307 return 0
308
309 def __str__(self) -> str:
310 """Return a human-readable representation of the data ID (or a note
311 about its absence) for use in log and error messages.
312 """
313 return "[no data ID]"
314
315 def make_table_id_factory(self) -> IdFactory:
316 """Construct a new `lsst.afw.table.IdFactory` for this catalog."""
317 return IdFactory.makeSimple()
318
319 def make_source_catalog(self, schema: Schema) -> SourceCatalog:
320 """Construct a empty catalog object with an ID factory.
321
322 This is a convenience function for the common pattern of calling
323 `make_table_id_factory`, constructing a `~lsst.afw.table.SourceTable`
324 from that, and then constructing an (empty)
325 `~lsst.afw.table.SourceCatalog` from that.
326 """
327 table = SourceTable.make(schema, self.make_table_id_factory())
328 return SourceCatalog(table)
329
330 def arange(self, *args, **kwargs) -> np.ndarray:
331 """Generate an array of integer IDs for this catalog.
332
333 All parameters are forwarded to `numpy.arange` to generate an array of
334 per-catalog counter integers. These are then combined with the
335 `catalog_id`` to form the returned array.
336
337 The IDs generated by `arange` will be equivalent to those generated by
338 `make_table_id_factory` (and by extension, `make_source_catalog`) only
339 if the counter integers start with ``1``, not ``0``, because that's
340 what `~lsst.afw.table.IdFactory` does.
341 """
342 return np.arange(*args, **kwargs)
343
344 @classmethod
346 cls,
347 config: BaseIdGeneratorConfig,
348 fixed: DataCoordinate,
349 ) -> Callable[[int], tuple[DataCoordinate, int]]:
350 """Return a callable that unpacks the IDs generated by this class,
351 from a config field.
352
353 Parameters
354 ----------
355 config : `BaseIdGeneratorConfig`
356 Configuration for an ID generator.
357 fixed : `DataCoordinate`
358 Data ID identifying the dimensions that are considered fixed by the
359 `IdGenerator` that produced the IDs: usually just ``instrument`` or
360 ``skymap``, depending on the configuration. For most configurations
361 this will need to be a fully-expanded data ID.
362
363 Returns
364 -------
365 unpacker
366 Callable that takes a single `int` argument (an ID generated by an
367 identically-configured `IdGenerator`) and returns a tuple of:
368
369 - release_id: the integer that identifies a data release or
370 similar (`int`);
371 - data_id : the data ID used to initialize the original ID
372 generator (`DataCoordinate`);
373 - counter : the counter part of the original ID (`int`).
374
375 Notes
376 -----
377 This method cannot be used on IDs generated without a data ID.
378 """
379 packer = config._make_dimension_packer(fixed)
380 return cls.unpacker_from_dimension_packer(packer, config.n_releases)
381
382 @classmethod
384 cls,
385 dimension_packer: DimensionPacker,
386 n_releases: int = DEFAULT_N_RELEASES,
387 ) -> Callable[[int], tuple[int, DataCoordinate, int]]:
388 """Return a callable that unpacks the IDs generated by this class,
389 from a `lsst.daf.butler.DimensionPacker` instance.
390
391 Parameters
392 ----------
393 dimension_packer : `lsst.daf.butler.DimensionPacker`
394 Dimension packer used to construct the original
395 `DimensionPackerIdGenerator`.
396 n_releases : `int`, optional
397 Number of (contiguous, starting from zero) ``release_id`` values to
398 reserve space for. One (not zero) is used to reserve no space.
399
400 Returns
401 -------
402 unpacker
403 Callable that takes a single `int` argument (an ID generated by an
404 identically-constructed `DimensionPackerIdGenerator`) and returns a
405 tuple of:
406
407 - release_id: the integer that identifies a data release or
408 similar (`int`);
409 - data_id : the data ID used to initialize the original ID
410 generator (`DataCoordinate`);
411 - counter : the counter part of the original ID (`int`).
412
413 Notes
414 -----
415 This method cannot be used on IDs generated with no data ID.
416 """
417 bits = _IdGeneratorBits(dimension_packer, n_releases)
418
419 def unpack(record_id: int) -> tuple[int, DataCoordinate, int]:
420 rest, counter = divmod(record_id, bits.n_counters)
421 rest, packed_data_id = divmod(rest, bits.n_data_ids)
422 rest, release_id = divmod(rest, bits.n_data_ids)
423 if rest:
424 raise ValueError(
425 f"Unexpected overall factor {rest} in record_id {record_id}, "
426 f"after extracting packed_data_id={packed_data_id}, counter={counter}, and "
427 f"release_id={release_id}."
428 )
429 data_id = bits.packer.unpack(packed_data_id)
430 return release_id, data_id, counter
431
432 return unpack
433
434
436 """The subclass of `IdGenerator` that actually includes packed data IDs
437 and release IDs in its generated IDs.
438
439 Parameters
440 ----------
441 dimension_packer : `lsst.daf.butler.DimensionPacker`
442 Object that packs data IDs into integers.
443 data_id : `lsst.daf.butler.DataCoordinate`
444 Data ID to embed in all generated IDs and random seeds.
445 release_id : `int`, optional
446 Release identifier to embed in generated IDs.
447 n_releases : `int`, optional
448 Number of (contiguous, starting from zero) `release_id` values to
449 reserve space for. One (not zero) is used to reserve no space.
450
451 Notes
452 -----
453 Instances of this class should usually be constructed via configuration
454 instead of by calling the constructor directly; see `IdGenerator` for
455 details.
456 """
457
459 self,
460 dimension_packer: DimensionPacker,
461 data_id: DataCoordinate,
462 release_id: int = DEFAULT_RELEASE_ID,
463 n_releases: int = DEFAULT_N_RELEASES,
464 ):
465 self._bits = _IdGeneratorBits(dimension_packer, n_releases)
466 self._release_id = release_id
467 self._data_id = data_id.subset(self._bits.packer.dimensions)
468 self._packed_data_id = self._bits.packer.pack(self._data_id)
469
470 @property
471 def data_id(self) -> DataCoordinate:
472 """The data ID that will be embedded in all generated IDs
473 (`DataCoordinate`)."""
474 return self._data_id
475
476 @property
477 def release_id(self) -> int:
478 """The release ID that will embedded in all generated IDs (`int`)."""
479 return self._release_id
480
481 @property
482 def catalog_id(self) -> int:
483 # Docstring inherited.
484 return self._packed_data_id + self._bits.n_data_ids * self._release_id
485
486 def __str__(self) -> str:
487 # Docstring inherited.
488 return str(self.data_iddata_id)
489
490 def make_table_id_factory(self) -> IdFactory:
491 # Docstring inherited.
492 return IdFactory.makeSource(self.catalog_idcatalog_idcatalog_id, self._bits.counter_bits)
493
494 def arange(self, *args, **kwargs) -> np.ndarray:
495 # Docstring inherited.
496 lower = super().arange(*args, **kwargs)
497 if np.any(lower >= self._bits.n_counters):
498 arg_terms = [repr(arg) for arg in args] + [f"{k}={v!r}" for k, v in kwargs.items()]
499 raise ValueError(
500 f"Integer range from numpy.arange({arg_terms}) has "
501 f"{(lower >= self._bits.n_counters).sum()} values that are not "
502 f"below the upper bound of {self._bits.n_counters}."
503 )
504 return lower + self.catalog_idcatalog_idcatalog_id * self._bits.n_counters
505
506
507@dataclasses.dataclass
509 """A private helper struct that manages the allocation of bits between the
510 packed data ID, the release ID, and a per-catalog counter.
511 """
512
513 packer: DimensionPacker
514 """Object that maps data IDs to integers
515 (`lsst.daf.butler.DimensionPacker`).
516 """
517
518 n_releases: int = dataclasses.field(default=0)
519 """Number of releases to reserve space for, starting from zero (`int`)."""
520
521 n_data_ids: int = dataclasses.field(init=False)
522 """Number of contiguous packed data IDs to reserve space for, starting
523 from zero (`int`).
524 """
525
526 counter_bits: int = dataclasses.field(init=False)
527 """Number of bits allocated to the per-catalog counter (`int`)."""
528
529 n_counters: int = dataclasses.field(init=False)
530 """Number of contiguous counter values to reserve space for, starting from
531 zero (`int`)."""
532
533 def __post_init__(self) -> None:
534 self.n_data_idsn_data_ids = 1 << self.packer.maxBits
535 upper_bits = (self.n_releases - 1).bit_length() + self.packer.maxBits
536 self.counter_bitscounter_bits = IdFactory.computeReservedFromMaxBits(upper_bits)
538
539
540# TODO: remove this method on DM-38687.
541# No deprecation decorator here because the type this class holds is itself
542# deprecated, so it's only going to be called by code paths that will go away
543# when the deprecation turns into a removal, and which already warn.
545 """A `IdGenerator` implementation to aid in the transition from
546 `lsst.obs.base.ExposureIdInfo`.
547 """
548
549 def __init__(self, exposure_id_info: ExposureIdInfo):
550 self._exposure_id_info = exposure_id_info
551
552 @property
553 def catalog_id(self) -> int:
554 # Docstring inherited.
555 return self._exposure_id_info.expId
556
557 def __str__(self) -> str:
559
560 def make_table_id_factory(self) -> IdFactory:
561 # Docstring inherited.
562 return self._exposure_id_info.makeSourceIdFactory()
563
564 def arange(self, *args, **kwargs) -> np.ndarray:
565 # Docstring inherited.
566 raise NotImplementedError(
567 "This IdGenerator implementation does not support arange; "
568 "please update to IdGenerator.from_config for a full-featured implementation."
569 )
char * data
Definition BaseRecord.cc:61
table::Key< int > to
table::Key< int > a
A polymorphic functor base class for generating record IDs for a table.
Definition IdFactory.h:21
Table class that contains measurements made on a single exposure.
Definition Source.h:217
__init__(self, ExposureIdInfo exposure_id_info)
IdGenerator apply(self, DataCoordinate data_id, **Any kwargs)
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)
make_field(cls, doc="Configuration for how to generate catalog IDs from data IDs.")
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)
__init__(self, DimensionPacker dimension_packer, DataCoordinate data_id, int release_id=DEFAULT_RELEASE_ID, int n_releases=DEFAULT_N_RELEASES)
np.ndarray arange(self, *args, **kwargs)
SourceCatalog make_source_catalog(self, Schema schema)
np.ndarray arange(self, *args, **kwargs)
IdGenerator _from_exposure_id_info(ExposureIdInfo exposure_id_info)
Callable[[int], tuple[int, DataCoordinate, int]] unpacker_from_dimension_packer(cls, DimensionPacker dimension_packer, int n_releases=DEFAULT_N_RELEASES)
Callable[[int], tuple[DataCoordinate, int]] unpacker_from_config(cls, BaseIdGeneratorConfig config, DataCoordinate fixed)
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)
runQuantum(self, butlerQC, inputRefs, outputRefs)