LSST Applications g070148d5b3+33e5256705,g0d53e28543+25c8b88941,g0da5cf3356+2dd1178308,g1081da9e2a+62d12e78cb,g17e5ecfddb+7e422d6136,g1c76d35bf8+ede3a706f7,g295839609d+225697d880,g2e2c1a68ba+cc1f6f037e,g2ffcdf413f+853cd4dcde,g38293774b4+62d12e78cb,g3b44f30a73+d953f1ac34,g48ccf36440+885b902d19,g4b2f1765b6+7dedbde6d2,g5320a0a9f6+0c5d6105b6,g56b687f8c9+ede3a706f7,g5c4744a4d9+ef6ac23297,g5ffd174ac0+0c5d6105b6,g6075d09f38+66af417445,g667d525e37+2ced63db88,g670421136f+2ced63db88,g71f27ac40c+2ced63db88,g774830318a+463cbe8d1f,g7876bc68e5+1d137996f1,g7985c39107+62d12e78cb,g7fdac2220c+0fd8241c05,g96f01af41f+368e6903a7,g9ca82378b8+2ced63db88,g9d27549199+ef6ac23297,gabe93b2c52+e3573e3735,gb065e2a02a+3dfbe639da,gbc3249ced9+0c5d6105b6,gbec6a3398f+0c5d6105b6,gc9534b9d65+35b9f25267,gd01420fc67+0c5d6105b6,geee7ff78d7+a14128c129,gf63283c776+ede3a706f7,gfed783d017+0c5d6105b6,w.2022.47
LSST Data Management Base Package
Loading...
Searching...
No Matches
functors.py
Go to the documentation of this file.
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22__all__ = ["Functor", "CompositeFunctor", "CustomFunctor", "Column", "Index",
23 "IDColumn", "FootprintNPix", "CoordColumn", "RAColumn", "DecColumn",
24 "HtmIndex20", "Mag", "MagErr", "NanoMaggie", "MagDiff", "Color",
25 "Labeller", "StarGalaxyLabeller", "NumStarLabeller", "DeconvolvedMoments",
26 "SdssTraceSize", "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff",
27 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs", "ComputePixelScale",
28 "ConvertPixelToArcseconds", "ConvertPixelSqToArcsecondsSq", "ReferenceBand",
29 "Photometry", "NanoJansky", "NanoJanskyErr", "Magnitude", "MagnitudeErr",
30 "LocalPhotometry", "LocalNanojansky", "LocalNanojanskyErr",
31 "LocalMagnitude", "LocalMagnitudeErr", "LocalDipoleMeanFlux",
32 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux", "LocalDipoleDiffFluxErr",
33 "Ratio", "Ebv"]
34
35import yaml
36import re
37from itertools import product
38import logging
39import os.path
40
41import pandas as pd
42import numpy as np
43import astropy.units as u
44from dustmaps.sfd import SFDQuery
45from astropy.coordinates import SkyCoord
46
47from lsst.utils import doImport
48from lsst.daf.butler import DeferredDatasetHandle
49import lsst.geom as geom
50import lsst.sphgeom as sphgeom
51
52from .parquetTable import ParquetTable, MultilevelParquetTable
53
54
55def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
56 typeKey='functor', name=None):
57 """Initialize an object defined in a dictionary
58
59 The object needs to be importable as
60 f'{basePath}.{initDict[typeKey]}'
61 The positional and keyword arguments (if any) are contained in
62 "args" and "kwargs" entries in the dictionary, respectively.
63 This is used in `functors.CompositeFunctor.from_yaml` to initialize
64 a composite functor from a specification in a YAML file.
65
66 Parameters
67 ----------
68 initDict : dictionary
69 Dictionary describing object's initialization. Must contain
70 an entry keyed by ``typeKey`` that is the name of the object,
71 relative to ``basePath``.
72 basePath : str
73 Path relative to module in which ``initDict[typeKey]`` is defined.
74 typeKey : str
75 Key of ``initDict`` that is the name of the object
76 (relative to `basePath`).
77 """
78 initDict = initDict.copy()
79 # TO DO: DM-21956 We should be able to define functors outside this module
80 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
81 args = []
82 if 'args' in initDict:
83 args = initDict.pop('args')
84 if isinstance(args, str):
85 args = [args]
86 try:
87 element = pythonType(*args, **initDict)
88 except Exception as e:
89 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
90 raise type(e)(message, e.args)
91 return element
92
93
95 """Define and execute a calculation on a ParquetTable
96
97 The `__call__` method accepts either a `ParquetTable` object or a
98 `DeferredDatasetHandle`, and returns the
99 result of the calculation as a single column. Each functor defines what
100 columns are needed for the calculation, and only these columns are read
101 from the `ParquetTable`.
102
103 The action of `__call__` consists of two steps: first, loading the
104 necessary columns from disk into memory as a `pandas.DataFrame` object;
105 and second, performing the computation on this dataframe and returning the
106 result.
107
108
109 To define a new `Functor`, a subclass must define a `_func` method,
110 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
111 In addition, it must define the following attributes
112
113 * `_columns`: The columns necessary to perform the calculation
114 * `name`: A name appropriate for a figure axis label
115 * `shortname`: A name appropriate for use as a dictionary key
116
117 On initialization, a `Functor` should declare what band (`filt` kwarg)
118 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
119 applied to. This enables the `_get_data` method to extract the proper
120 columns from the parquet file. If not specified, the dataset will fall back
121 on the `_defaultDataset`attribute. If band is not specified and `dataset`
122 is anything other than `'ref'`, then an error will be raised when trying to
123 perform the calculation.
124
125 Originally, `Functor` was set up to expect
126 datasets formatted like the `deepCoadd_obj` dataset; that is, a
127 dataframe with a multi-level column index, with the levels of the
128 column index being `band`, `dataset`, and `column`.
129 It has since been generalized to apply to dataframes without mutli-level
130 indices and multi-level indices with just `dataset` and `column` levels.
131 In addition, the `_get_data` method that reads
132 the dataframe from the `ParquetTable` will return a dataframe with column
133 index levels defined by the `_dfLevels` attribute; by default, this is
134 `column`.
135
136 The `_dfLevels` attributes should generally not need to
137 be changed, unless `_func` needs columns from multiple filters or datasets
138 to do the calculation.
139 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
140 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
141 it gets to have those levels in the column index.
142
143 Parameters
144 ----------
145 filt : str
146 Filter upon which to do the calculation
147
148 dataset : str
149 Dataset upon which to do the calculation
150 (e.g., 'ref', 'meas', 'forced_src').
151
152 """
153
154 _defaultDataset = 'ref'
155 _dfLevels = ('column',)
156 _defaultNoDup = False
157
158 def __init__(self, filt=None, dataset=None, noDup=None):
159 self.filt = filt
160 self.dataset = dataset if dataset is not None else self._defaultDataset
161 self._noDup = noDup
162 self.log = logging.getLogger(type(self).__name__)
163
164 @property
165 def noDup(self):
166 if self._noDup is not None:
167 return self._noDup
168 else:
169 return self._defaultNoDup
170
171 @property
172 def columns(self):
173 """Columns required to perform calculation
174 """
175 if not hasattr(self, '_columns'):
176 raise NotImplementedError('Must define columns property or _columns attribute')
177 return self._columns
178
179 def _get_data_columnLevels(self, data, columnIndex=None):
180 """Gets the names of the column index levels
181
182 This should only be called in the context of a multilevel table.
183 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
184 and with the gen3 `DeferredDatasetHandle`.
185
186 Parameters
187 ----------
188 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
189
190 columnnIndex (optional): pandas `Index` object
191 if not passed, then it is read from the `DeferredDatasetHandle`
192 """
193 if isinstance(data, DeferredDatasetHandle):
194 if columnIndex is None:
195 columnIndex = data.get(component="columns")
196 if columnIndex is not None:
197 return columnIndex.names
198 if isinstance(data, MultilevelParquetTable):
199 return data.columnLevels
200 else:
201 raise TypeError(f"Unknown type for data: {type(data)}!")
202
203 def _get_data_columnLevelNames(self, data, columnIndex=None):
204 """Gets the content of each of the column levels for a multilevel table
205
206 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
207
208 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
209 """
210 if isinstance(data, DeferredDatasetHandle):
211 if columnIndex is None:
212 columnIndex = data.get(component="columns")
213 if columnIndex is not None:
214 columnLevels = columnIndex.names
215 columnLevelNames = {
216 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
217 for i, level in enumerate(columnLevels)
218 }
219 return columnLevelNames
220 if isinstance(data, MultilevelParquetTable):
221 return data.columnLevelNames
222 else:
223 raise TypeError(f"Unknown type for data: {type(data)}!")
224
225 def _colsFromDict(self, colDict, columnIndex=None):
226 """Converts dictionary column specficiation to a list of columns
227
228 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
229 """
230 new_colDict = {}
231 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
232
233 for i, lev in enumerate(columnLevels):
234 if lev in colDict:
235 if isinstance(colDict[lev], str):
236 new_colDict[lev] = [colDict[lev]]
237 else:
238 new_colDict[lev] = colDict[lev]
239 else:
240 new_colDict[lev] = columnIndex.levels[i]
241
242 levelCols = [new_colDict[lev] for lev in columnLevels]
243 cols = list(product(*levelCols))
244 colsAvailable = [col for col in cols if col in columnIndex]
245 return colsAvailable
246
247 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
248 """Returns columns needed by functor from multilevel dataset
249
250 To access tables with multilevel column structure, the `MultilevelParquetTable`
251 or `DeferredDatasetHandle` need to be passed either a list of tuples or a
252 dictionary.
253
254 Parameters
255 ----------
256 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
257
258 columnIndex (optional): pandas `Index` object
259 either passed or read in from `DeferredDatasetHandle`.
260
261 `returnTuple` : bool
262 If true, then return a list of tuples rather than the column dictionary
263 specification. This is set to `True` by `CompositeFunctor` in order to be able to
264 combine columns from the various component functors.
265
266 """
267 if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
268 columnIndex = data.get(component="columns")
269
270 # Confirm that the dataset has the column levels the functor is expecting it to have.
271 columnLevels = self._get_data_columnLevels(data, columnIndex)
272
273 columnDict = {'column': self.columns,
274 'dataset': self.dataset}
275 if self.filt is None:
276 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
277 if "band" in columnLevels:
278 if self.dataset == "ref":
279 columnDict["band"] = columnLevelNames["band"][0]
280 else:
281 raise ValueError(f"'filt' not set for functor {self.name}"
282 f"(dataset {self.dataset}) "
283 "and ParquetTable "
284 "contains multiple filters in column index. "
285 "Set 'filt' or set 'dataset' to 'ref'.")
286 else:
287 columnDict['band'] = self.filt
288
289 if isinstance(data, MultilevelParquetTable):
290 return data._colsFromDict(columnDict)
291 elif isinstance(data, DeferredDatasetHandle):
292 if returnTuple:
293 return self._colsFromDict(columnDict, columnIndex=columnIndex)
294 else:
295 return columnDict
296
297 def _func(self, df, dropna=True):
298 raise NotImplementedError('Must define calculation on dataframe')
299
300 def _get_columnIndex(self, data):
301 """Return columnIndex
302 """
303
304 if isinstance(data, DeferredDatasetHandle):
305 return data.get(component="columns")
306 else:
307 return None
308
309 def _get_data(self, data):
310 """Retrieve dataframe necessary for calculation.
311
312 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
313
314 Returns dataframe upon which `self._func` can act.
315
316 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
317 """
318 if isinstance(data, pd.DataFrame):
319 return data
320
321 # First thing to do: check to see if the data source has a multilevel column index or not.
322 columnIndex = self._get_columnIndex(data)
323 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
324
325 # Simple single-level parquet table, gen2
326 if isinstance(data, ParquetTable) and not is_multiLevel:
327 columns = self.columns
328 df = data.toDataFrame(columns=columns)
329 return df
330
331 # Get proper columns specification for this functor
332 if is_multiLevel:
333 columns = self.multilevelColumns(data, columnIndex=columnIndex)
334 else:
335 columns = self.columns
336
337 if isinstance(data, MultilevelParquetTable):
338 # Load in-memory dataframe with appropriate columns the gen2 way
339 df = data.toDataFrame(columns=columns, droplevels=False)
340 elif isinstance(data, DeferredDatasetHandle):
341 # Load in-memory dataframe with appropriate columns the gen3 way
342 df = data.get(parameters={"columns": columns})
343
344 # Drop unnecessary column levels
345 if is_multiLevel:
346 df = self._setLevels(df)
347
348 return df
349
350 def _setLevels(self, df):
351 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
352 df.columns = df.columns.droplevel(levelsToDrop)
353 return df
354
355 def _dropna(self, vals):
356 return vals.dropna()
357
358 def __call__(self, data, dropna=False):
359 try:
360 df = self._get_data(data)
361 vals = self._func(df)
362 except Exception as e:
363 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
364 vals = self.fail(df)
365 if dropna:
366 vals = self._dropna(vals)
367
368 return vals
369
370 def difference(self, data1, data2, **kwargs):
371 """Computes difference between functor called on two different ParquetTable objects
372 """
373 return self(data1, **kwargs) - self(data2, **kwargs)
374
375 def fail(self, df):
376 return pd.Series(np.full(len(df), np.nan), index=df.index)
377
378 @property
379 def name(self):
380 """Full name of functor (suitable for figure labels)
381 """
382 return NotImplementedError
383
384 @property
385 def shortname(self):
386 """Short name of functor (suitable for column name/dict key)
387 """
388 return self.name
389
390
392 """Perform multiple calculations at once on a catalog
393
394 The role of a `CompositeFunctor` is to group together computations from
395 multiple functors. Instead of returning `pandas.Series` a
396 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
397 being the keys of `funcDict`.
398
399 The `columns` attribute of a `CompositeFunctor` is the union of all columns
400 in all the component functors.
401
402 A `CompositeFunctor` does not use a `_func` method itself; rather,
403 when a `CompositeFunctor` is called, all its columns are loaded
404 at once, and the resulting dataframe is passed to the `_func` method of each component
405 functor. This has the advantage of only doing I/O (reading from parquet file) once,
406 and works because each individual `_func` method of each component functor does not
407 care if there are *extra* columns in the dataframe being passed; only that it must contain
408 *at least* the `columns` it expects.
409
410 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
411 file specifying a collection of functors.
412
413 Parameters
414 ----------
415 funcs : `dict` or `list`
416 Dictionary or list of functors. If a list, then it will be converted
417 into a dictonary according to the `.shortname` attribute of each functor.
418
419 """
420 dataset = None
421
422 def __init__(self, funcs, **kwargs):
423
424 if type(funcs) == dict:
425 self.funcDict = funcs
426 else:
427 self.funcDict = {f.shortname: f for f in funcs}
428
429 self._filt = None
430
431 super().__init__(**kwargs)
432
433 @property
434 def filt(self):
435 return self._filt
436
437 @filt.setter
438 def filt(self, filt):
439 if filt is not None:
440 for _, f in self.funcDict.items():
441 f.filt = filt
442 self._filt = filt
443
444 def update(self, new):
445 if isinstance(new, dict):
446 self.funcDict.update(new)
447 elif isinstance(new, CompositeFunctor):
448 self.funcDict.update(new.funcDict)
449 else:
450 raise TypeError('Can only update with dictionary or CompositeFunctor.')
451
452 # Make sure new functors have the same 'filt' set
453 if self.filtfiltfiltfilt is not None:
455
456 @property
457 def columns(self):
458 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
459
460 def multilevelColumns(self, data, **kwargs):
461 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
462 return list(
463 set(
464 [
465 x
466 for y in [
467 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
468 ]
469 for x in y
470 ]
471 )
472 )
473
474 def __call__(self, data, **kwargs):
475 """Apply the functor to the data table
476
477 Parameters
478 ----------
479 data : `lsst.daf.butler.DeferredDatasetHandle`,
482 or `pandas.DataFrame`.
483 The table or a pointer to a table on disk from which columns can
484 be accessed
485 """
486 columnIndex = self._get_columnIndex(data)
487
488 # First, determine whether data has a multilevel index (either gen2 or gen3)
489 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
490
491 # Multilevel index, gen2 or gen3
492 if is_multiLevel:
493 columns = self.multilevelColumnsmultilevelColumns(data, columnIndex=columnIndex)
494
495 if isinstance(data, MultilevelParquetTable):
496 # Read data into memory the gen2 way
497 df = data.toDataFrame(columns=columns, droplevels=False)
498 elif isinstance(data, DeferredDatasetHandle):
499 # Read data into memory the gen3 way
500 df = data.get(parameters={"columns": columns})
501
502 valDict = {}
503 for k, f in self.funcDict.items():
504 try:
505 subdf = f._setLevels(
506 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
507 )
508 valDict[k] = f._func(subdf)
509 except Exception as e:
510 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
511 try:
512 valDict[k] = f.fail(subdf)
513 except NameError:
514 raise e
515
516 else:
517 if isinstance(data, DeferredDatasetHandle):
518 # input if Gen3 deferLoad=True
519 df = data.get(parameters={"columns": self.columnscolumns})
520 elif isinstance(data, pd.DataFrame):
521 # input if Gen3 deferLoad=False
522 df = data
523 else:
524 # Original Gen2 input is type ParquetTable and the fallback
525 df = data.toDataFrame(columns=self.columnscolumns)
526
527 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
528
529 # Check that output columns are actually columns
530 for name, colVal in valDict.items():
531 if len(colVal.shape) != 1:
532 raise RuntimeError("Transformed column '%s' is not the shape of a column. "
533 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
534
535 try:
536 valDf = pd.concat(valDict, axis=1)
537 except TypeError:
538 print([(k, type(v)) for k, v in valDict.items()])
539 raise
540
541 if kwargs.get('dropna', False):
542 valDf = valDf.dropna(how='any')
543
544 return valDf
545
546 @classmethod
547 def renameCol(cls, col, renameRules):
548 if renameRules is None:
549 return col
550 for old, new in renameRules:
551 if col.startswith(old):
552 col = col.replace(old, new)
553 return col
554
555 @classmethod
556 def from_file(cls, filename, **kwargs):
557 # Allow environment variables in the filename.
558 filename = os.path.expandvars(filename)
559 with open(filename) as f:
560 translationDefinition = yaml.safe_load(f)
561
562 return cls.from_yaml(translationDefinition, **kwargs)
563
564 @classmethod
565 def from_yaml(cls, translationDefinition, **kwargs):
566 funcs = {}
567 for func, val in translationDefinition['funcs'].items():
568 funcs[func] = init_fromDict(val, name=func)
569
570 if 'flag_rename_rules' in translationDefinition:
571 renameRules = translationDefinition['flag_rename_rules']
572 else:
573 renameRules = None
574
575 if 'calexpFlags' in translationDefinition:
576 for flag in translationDefinition['calexpFlags']:
577 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
578
579 if 'refFlags' in translationDefinition:
580 for flag in translationDefinition['refFlags']:
581 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
582
583 if 'forcedFlags' in translationDefinition:
584 for flag in translationDefinition['forcedFlags']:
585 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
586
587 if 'flags' in translationDefinition:
588 for flag in translationDefinition['flags']:
589 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
590
591 return cls(funcs, **kwargs)
592
593
594def mag_aware_eval(df, expr, log):
595 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
596
597 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
598
599 Parameters
600 ----------
601 df : pandas.DataFrame
602 Dataframe on which to evaluate expression.
603
604 expr : str
605 Expression.
606 """
607 try:
608 expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>)/log(10)', expr)
609 val = df.eval(expr_new)
610 except Exception as e: # Should check what actually gets raised
611 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e)
612 expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
613 val = df.eval(expr_new)
614 return val
615
616
618 """Arbitrary computation on a catalog
619
620 Column names (and thus the columns to be loaded from catalog) are found
621 by finding all words and trying to ignore all "math-y" words.
622
623 Parameters
624 ----------
625 expr : str
626 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
627 """
628 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
629
630 def __init__(self, expr, **kwargs):
631 self.expr = expr
632 super().__init__(**kwargs)
633
634 @property
635 def name(self):
636 return self.expr
637
638 @property
639 def columns(self):
640 flux_cols = re.findall(r'mag\‍(\s*(\w+)\s*\‍)', self.expr)
641
642 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
643 not_a_col = []
644 for c in flux_cols:
645 if not re.search('_instFlux$', c):
646 cols.append(f'{c}_instFlux')
647 not_a_col.append(c)
648 else:
649 cols.append(c)
650
651 return list(set([c for c in cols if c not in not_a_col]))
652
653 def _func(self, df):
654 return mag_aware_eval(df, self.expr, self.log)
655
656
658 """Get column with specified name
659 """
660
661 def __init__(self, col, **kwargs):
662 self.col = col
663 super().__init__(**kwargs)
664
665 @property
666 def name(self):
667 return self.col
668
669 @property
670 def columns(self):
671 return [self.col]
672
673 def _func(self, df):
674 return df[self.col]
675
676
678 """Return the value of the index for each object
679 """
680
681 columns = ['coord_ra'] # just a dummy; something has to be here
682 _defaultDataset = 'ref'
683 _defaultNoDup = True
684
685 def _func(self, df):
686 return pd.Series(df.index, index=df.index)
687
688
690 col = 'id'
691 _allow_difference = False
692 _defaultNoDup = True
693
694 def _func(self, df):
695 return pd.Series(df.index, index=df.index)
696
697
699 col = 'base_Footprint_nPix'
700
701
703 """Base class for coordinate column, in degrees
704 """
705 _radians = True
706
707 def __init__(self, col, **kwargs):
708 super().__init__(col, **kwargs)
709
710 def _func(self, df):
711 # Must not modify original column in case that column is used by another functor
712 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
713 return output
714
715
717 """Right Ascension, in degrees
718 """
719 name = 'RA'
720 _defaultNoDup = True
721
722 def __init__(self, **kwargs):
723 super().__init__('coord_ra', **kwargs)
724
725 def __call__(self, catalog, **kwargs):
726 return super().__call__(catalog, **kwargs)
727
728
730 """Declination, in degrees
731 """
732 name = 'Dec'
733 _defaultNoDup = True
734
735 def __init__(self, **kwargs):
736 super().__init__('coord_dec', **kwargs)
737
738 def __call__(self, catalog, **kwargs):
739 return super().__call__(catalog, **kwargs)
740
741
743 """Compute the level 20 HtmIndex for the catalog.
744
745 Notes
746 -----
747 This functor was implemented to satisfy requirements of old APDB interface
748 which required ``pixelId`` column in DiaObject with HTM20 index. APDB
749 interface had migrated to not need that information, but we keep this
750 class in case it may be useful for something else.
751 """
752 name = "Htm20"
753 htmLevel = 20
754 _radians = True
755
756 def __init__(self, ra, decl, **kwargs):
758 self.ra = ra
759 self.decl = decl
760 self._columns = [self.ra, self.decl]
761 super().__init__(**kwargs)
762
763 def _func(self, df):
764
765 def computePixel(row):
766 if self._radians:
767 sphPoint = geom.SpherePoint(row[self.ra],
768 row[self.decl],
769 geom.radians)
770 else:
771 sphPoint = geom.SpherePoint(row[self.ra],
772 row[self.decl],
773 geom.degrees)
774 return self.pixelator.index(sphPoint.getVector())
775
776 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64')
777
778
779def fluxName(col):
780 if not col.endswith('_instFlux'):
781 col += '_instFlux'
782 return col
783
784
785def fluxErrName(col):
786 if not col.endswith('_instFluxErr'):
787 col += '_instFluxErr'
788 return col
789
790
792 """Compute calibrated magnitude
793
794 Takes a `calib` argument, which returns the flux at mag=0
795 as `calib.getFluxMag0()`. If not provided, then the default
796 `fluxMag0` is 63095734448.0194, which is default for HSC.
797 This default should be removed in DM-21955
798
799 This calculation hides warnings about invalid values and dividing by zero.
800
801 As for all functors, a `dataset` and `filt` kwarg should be provided upon
802 initialization. Unlike the default `Functor`, however, the default dataset
803 for a `Mag` is `'meas'`, rather than `'ref'`.
804
805 Parameters
806 ----------
807 col : `str`
808 Name of flux column from which to compute magnitude. Can be parseable
809 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
810 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
811 understand.
812 calib : `lsst.afw.image.calib.Calib` (optional)
813 Object that knows zero point.
814 """
815 _defaultDataset = 'meas'
816
817 def __init__(self, col, calib=None, **kwargs):
818 self.col = fluxName(col)
819 self.calib = calib
820 if calib is not None:
821 self.fluxMag0 = calib.getFluxMag0()[0]
822 else:
823 # TO DO: DM-21955 Replace hard coded photometic calibration values
824 self.fluxMag0 = 63095734448.0194
825
826 super().__init__(**kwargs)
827
828 @property
829 def columns(self):
830 return [self.col]
831
832 def _func(self, df):
833 with np.warnings.catch_warnings():
834 np.warnings.filterwarnings('ignore', r'invalid value encountered')
835 np.warnings.filterwarnings('ignore', r'divide by zero')
836 return -2.5*np.log10(df[self.col] / self.fluxMag0)
837
838 @property
839 def name(self):
840 return f'mag_{self.col}'
841
842
843class MagErr(Mag):
844 """Compute calibrated magnitude uncertainty
845
846 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
847
848 Parameters
849 col : `str`
850 Name of flux column
851 calib : `lsst.afw.image.calib.Calib` (optional)
852 Object that knows zero point.
853 """
854
855 def __init__(self, *args, **kwargs):
856 super().__init__(*args, **kwargs)
857 if self.calib is not None:
858 self.fluxMag0Err = self.calib.getFluxMag0()[1]
859 else:
860 self.fluxMag0Err = 0.
861
862 @property
863 def columns(self):
864 return [self.col, self.col + 'Err']
865
866 def _func(self, df):
867 with np.warnings.catch_warnings():
868 np.warnings.filterwarnings('ignore', r'invalid value encountered')
869 np.warnings.filterwarnings('ignore', r'divide by zero')
870 fluxCol, fluxErrCol = self.columnscolumnscolumns
871 x = df[fluxErrCol] / df[fluxCol]
872 y = self.fluxMag0Err / self.fluxMag0
873 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
874 return magErr
875
876 @property
877 def name(self):
878 return super().name + '_err'
879
880
882 """
883 """
884
885 def _func(self, df):
886 return (df[self.col] / self.fluxMag0) * 1e9
887
888
890 _defaultDataset = 'meas'
891
892 """Functor to calculate magnitude difference"""
893
894 def __init__(self, col1, col2, **kwargs):
895 self.col1 = fluxName(col1)
896 self.col2 = fluxName(col2)
897 super().__init__(**kwargs)
898
899 @property
900 def columns(self):
901 return [self.col1, self.col2]
902
903 def _func(self, df):
904 with np.warnings.catch_warnings():
905 np.warnings.filterwarnings('ignore', r'invalid value encountered')
906 np.warnings.filterwarnings('ignore', r'divide by zero')
907 return -2.5*np.log10(df[self.col1]/df[self.col2])
908
909 @property
910 def name(self):
911 return f'(mag_{self.col1} - mag_{self.col2})'
912
913 @property
914 def shortname(self):
915 return f'magDiff_{self.col1}_{self.col2}'
916
917
919 """Compute the color between two filters
920
921 Computes color by initializing two different `Mag`
922 functors based on the `col` and filters provided, and
923 then returning the difference.
924
925 This is enabled by the `_func` expecting a dataframe with a
926 multilevel column index, with both `'band'` and `'column'`,
927 instead of just `'column'`, which is the `Functor` default.
928 This is controlled by the `_dfLevels` attribute.
929
930 Also of note, the default dataset for `Color` is `forced_src'`,
931 whereas for `Mag` it is `'meas'`.
932
933 Parameters
934 ----------
935 col : str
936 Name of flux column from which to compute; same as would be passed to
938
939 filt2, filt1 : str
940 Filters from which to compute magnitude difference.
941 Color computed is `Mag(filt2) - Mag(filt1)`.
942 """
943 _defaultDataset = 'forced_src'
944 _dfLevels = ('band', 'column')
945 _defaultNoDup = True
946
947 def __init__(self, col, filt2, filt1, **kwargs):
948 self.col = fluxName(col)
949 if filt2 == filt1:
950 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
951 self.filt2 = filt2
952 self.filt1 = filt1
953
954 self.mag2 = Mag(col, filt=filt2, **kwargs)
955 self.mag1 = Mag(col, filt=filt1, **kwargs)
956
957 super().__init__(**kwargs)
958
959 @property
960 def filt(self):
961 return None
962
963 @filt.setter
964 def filt(self, filt):
965 pass
966
967 def _func(self, df):
968 mag2 = self.mag2._func(df[self.filt2])
969 mag1 = self.mag1._func(df[self.filt1])
970 return mag2 - mag1
971
972 @property
973 def columns(self):
974 return [self.mag1.col, self.mag2.col]
975
976 def multilevelColumns(self, parq, **kwargs):
977 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
978
979 @property
980 def name(self):
981 return f'{self.filt2} - {self.filt1} ({self.col})'
982
983 @property
984 def shortname(self):
985 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
986
987
989 """Main function of this subclass is to override the dropna=True
990 """
991 _null_label = 'null'
992 _allow_difference = False
993 name = 'label'
994 _force_str = False
995
996 def __call__(self, parq, dropna=False, **kwargs):
997 return super().__call__(parq, dropna=False, **kwargs)
998
999
1001 _columns = ["base_ClassificationExtendedness_value"]
1002 _column = "base_ClassificationExtendedness_value"
1003
1004 def _func(self, df):
1005 x = df[self._columns][self._column]
1006 mask = x.isnull()
1007 test = (x < 0.5).astype(int)
1008 test = test.mask(mask, 2)
1009
1010 # TODO: DM-21954 Look into veracity of inline comment below
1011 # are these backwards?
1012 categories = ['galaxy', 'star', self._null_label]
1013 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
1014 index=x.index, name='label')
1015 if self._force_str:
1016 label = label.astype(str)
1017 return label
1018
1019
1021 _columns = ['numStarFlags']
1022 labels = {"star": 0, "maybe": 1, "notStar": 2}
1023
1024 def _func(self, df):
1025 x = df[self._columns][self._columns[0]]
1026
1027 # Number of filters
1028 n = len(x.unique()) - 1
1029
1030 labels = ['noStar', 'maybe', 'star']
1031 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
1032 index=x.index, name='label')
1033
1034 if self._force_str:
1035 label = label.astype(str)
1036
1037 return label
1038
1039
1041 name = 'Deconvolved Moments'
1042 shortname = 'deconvolvedMoments'
1043 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1044 "ext_shapeHSM_HsmSourceMoments_yy",
1045 "base_SdssShape_xx", "base_SdssShape_yy",
1046 "ext_shapeHSM_HsmPsfMoments_xx",
1047 "ext_shapeHSM_HsmPsfMoments_yy")
1048
1049 def _func(self, df):
1050 """Calculate deconvolved moments"""
1051 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
1052 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
1053 else:
1054 hsm = np.ones(len(df))*np.nan
1055 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
1056 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1057 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
1058 else:
1059 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
1060 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
1061 # raise TaskError("No psf shape parameter found in catalog")
1062 raise RuntimeError('No psf shape parameter found in catalog')
1063
1064 return hsm.where(np.isfinite(hsm), sdss) - psf
1065
1066
1068 """Functor to calculate SDSS trace radius size for sources"""
1069 name = "SDSS Trace Size"
1070 shortname = 'sdssTrace'
1071 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1072
1073 def _func(self, df):
1074 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1075 return srcSize
1076
1077
1079 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1080 name = "PSF - SDSS Trace Size"
1081 shortname = 'psf_sdssTrace'
1082 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1083 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1084
1085 def _func(self, df):
1086 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1087 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1088 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1089 return sizeDiff
1090
1091
1093 """Functor to calculate HSM trace radius size for sources"""
1094 name = 'HSM Trace Size'
1095 shortname = 'hsmTrace'
1096 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1097 "ext_shapeHSM_HsmSourceMoments_yy")
1098
1099 def _func(self, df):
1100 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1101 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1102 return srcSize
1103
1104
1106 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1107 name = 'PSF - HSM Trace Size'
1108 shortname = 'psf_HsmTrace'
1109 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1110 "ext_shapeHSM_HsmSourceMoments_yy",
1111 "ext_shapeHSM_HsmPsfMoments_xx",
1112 "ext_shapeHSM_HsmPsfMoments_yy")
1113
1114 def _func(self, df):
1115 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1116 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1117 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1118 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1119 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1120 return sizeDiff
1121
1122
1124 name = 'HSM Psf FWHM'
1125 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1126 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1127 pixelScale = 0.168
1128 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1129
1130 def _func(self, df):
1131 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1132 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1133
1134
1136 name = "Distortion Ellipticity (e1)"
1137 shortname = "Distortion"
1138
1139 def __init__(self, colXX, colXY, colYY, **kwargs):
1140 self.colXX = colXX
1141 self.colXY = colXY
1142 self.colYY = colYY
1143 self._columns = [self.colXX, self.colXY, self.colYY]
1144 super().__init__(**kwargs)
1145
1146 @property
1147 def columns(self):
1148 return [self.colXX, self.colXY, self.colYY]
1149
1150 def _func(self, df):
1151 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1152
1153
1155 name = "Ellipticity e2"
1156
1157 def __init__(self, colXX, colXY, colYY, **kwargs):
1158 self.colXX = colXX
1159 self.colXY = colXY
1160 self.colYY = colYY
1161 super().__init__(**kwargs)
1162
1163 @property
1164 def columns(self):
1165 return [self.colXX, self.colXY, self.colYY]
1166
1167 def _func(self, df):
1168 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1169
1170
1172
1173 def __init__(self, colXX, colXY, colYY, **kwargs):
1174 self.colXX = colXX
1175 self.colXY = colXY
1176 self.colYY = colYY
1177 super().__init__(**kwargs)
1178
1179 @property
1180 def columns(self):
1181 return [self.colXX, self.colXY, self.colYY]
1182
1183 def _func(self, df):
1184 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1185
1186
1188 """Computations using the stored localWcs.
1189 """
1190 name = "LocalWcsOperations"
1191
1192 def __init__(self,
1193 colCD_1_1,
1194 colCD_1_2,
1195 colCD_2_1,
1196 colCD_2_2,
1197 **kwargs):
1198 self.colCD_1_1 = colCD_1_1
1199 self.colCD_1_2 = colCD_1_2
1200 self.colCD_2_1 = colCD_2_1
1201 self.colCD_2_2 = colCD_2_2
1202 super().__init__(**kwargs)
1203
1204 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1205 """Compute the distance on the sphere from x2, y1 to x1, y1.
1206
1207 Parameters
1208 ----------
1209 x : `pandas.Series`
1210 X pixel coordinate.
1211 y : `pandas.Series`
1212 Y pixel coordinate.
1213 cd11 : `pandas.Series`
1214 [1, 1] element of the local Wcs affine transform.
1215 cd11 : `pandas.Series`
1216 [1, 1] element of the local Wcs affine transform.
1217 cd12 : `pandas.Series`
1218 [1, 2] element of the local Wcs affine transform.
1219 cd21 : `pandas.Series`
1220 [2, 1] element of the local Wcs affine transform.
1221 cd22 : `pandas.Series`
1222 [2, 2] element of the local Wcs affine transform.
1223
1224 Returns
1225 -------
1226 raDecTuple : tuple
1227 RA and dec conversion of x and y given the local Wcs. Returned
1228 units are in radians.
1229
1230 """
1231 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1232
1233 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1234 """Compute the local pixel scale conversion.
1235
1236 Parameters
1237 ----------
1238 ra1 : `pandas.Series`
1239 Ra of the first coordinate in radians.
1240 dec1 : `pandas.Series`
1241 Dec of the first coordinate in radians.
1242 ra2 : `pandas.Series`
1243 Ra of the second coordinate in radians.
1244 dec2 : `pandas.Series`
1245 Dec of the second coordinate in radians.
1246
1247 Returns
1248 -------
1249 dist : `pandas.Series`
1250 Distance on the sphere in radians.
1251 """
1252 deltaDec = dec2 - dec1
1253 deltaRa = ra2 - ra1
1254 return 2 * np.arcsin(
1255 np.sqrt(
1256 np.sin(deltaDec / 2) ** 2
1257 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1258
1259 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1260 """Compute the distance on the sphere from x2, y1 to x1, y1.
1261
1262 Parameters
1263 ----------
1264 x1 : `pandas.Series`
1265 X pixel coordinate.
1266 y1 : `pandas.Series`
1267 Y pixel coordinate.
1268 x2 : `pandas.Series`
1269 X pixel coordinate.
1270 y2 : `pandas.Series`
1271 Y pixel coordinate.
1272 cd11 : `pandas.Series`
1273 [1, 1] element of the local Wcs affine transform.
1274 cd11 : `pandas.Series`
1275 [1, 1] element of the local Wcs affine transform.
1276 cd12 : `pandas.Series`
1277 [1, 2] element of the local Wcs affine transform.
1278 cd21 : `pandas.Series`
1279 [2, 1] element of the local Wcs affine transform.
1280 cd22 : `pandas.Series`
1281 [2, 2] element of the local Wcs affine transform.
1282
1283 Returns
1284 -------
1285 Distance : `pandas.Series`
1286 Arcseconds per pixel at the location of the local WC
1287 """
1288 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1289 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1290 # Great circle distance for small separations.
1291 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1292
1293
1295 """Compute the local pixel scale from the stored CDMatrix.
1296 """
1297 name = "PixelScale"
1298
1299 @property
1300 def columns(self):
1301 return [self.colCD_1_1,
1302 self.colCD_1_2,
1303 self.colCD_2_1,
1304 self.colCD_2_2]
1305
1306 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1307 """Compute the local pixel to scale conversion in arcseconds.
1308
1309 Parameters
1310 ----------
1311 cd11 : `pandas.Series`
1312 [1, 1] element of the local Wcs affine transform in radians.
1313 cd11 : `pandas.Series`
1314 [1, 1] element of the local Wcs affine transform in radians.
1315 cd12 : `pandas.Series`
1316 [1, 2] element of the local Wcs affine transform in radians.
1317 cd21 : `pandas.Series`
1318 [2, 1] element of the local Wcs affine transform in radians.
1319 cd22 : `pandas.Series`
1320 [2, 2] element of the local Wcs affine transform in radians.
1321
1322 Returns
1323 -------
1324 pixScale : `pandas.Series`
1325 Arcseconds per pixel at the location of the local WC
1326 """
1327 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1328
1329 def _func(self, df):
1330 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1331 df[self.colCD_1_2],
1332 df[self.colCD_2_1],
1333 df[self.colCD_2_2])
1334
1335
1337 """Convert a value in units pixels squared to units arcseconds squared.
1338 """
1339
1340 def __init__(self,
1341 col,
1342 colCD_1_1,
1343 colCD_1_2,
1344 colCD_2_1,
1345 colCD_2_2,
1346 **kwargs):
1347 self.col = col
1348 super().__init__(colCD_1_1,
1349 colCD_1_2,
1350 colCD_2_1,
1351 colCD_2_2,
1352 **kwargs)
1353
1354 @property
1355 def name(self):
1356 return f"{self.col}_asArcseconds"
1357
1358 @property
1359 def columns(self):
1360 return [self.col,
1361 self.colCD_1_1,
1362 self.colCD_1_2,
1363 self.colCD_2_1,
1364 self.colCD_2_2]
1365
1366 def _func(self, df):
1367 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1368 df[self.colCD_1_2],
1369 df[self.colCD_2_1],
1370 df[self.colCD_2_2])
1371
1372
1374 """Convert a value in units pixels to units arcseconds.
1375 """
1376
1377 def __init__(self,
1378 col,
1379 colCD_1_1,
1380 colCD_1_2,
1381 colCD_2_1,
1382 colCD_2_2,
1383 **kwargs):
1384 self.col = col
1385 super().__init__(colCD_1_1,
1386 colCD_1_2,
1387 colCD_2_1,
1388 colCD_2_2,
1389 **kwargs)
1390
1391 @property
1392 def name(self):
1393 return f"{self.col}_asArcsecondsSq"
1394
1395 @property
1396 def columns(self):
1397 return [self.col,
1398 self.colCD_1_1,
1399 self.colCD_1_2,
1400 self.colCD_2_1,
1401 self.colCD_2_2]
1402
1403 def _func(self, df):
1404 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1405 df[self.colCD_1_2],
1406 df[self.colCD_2_1],
1407 df[self.colCD_2_2])
1408 return df[self.col] * pixScale * pixScale
1409
1410
1412 name = 'Reference Band'
1413 shortname = 'refBand'
1414
1415 @property
1416 def columns(self):
1417 return ["merge_measurement_i",
1418 "merge_measurement_r",
1419 "merge_measurement_z",
1420 "merge_measurement_y",
1421 "merge_measurement_g",
1422 "merge_measurement_u"]
1423
1424 def _func(self, df: pd.DataFrame) -> pd.Series:
1425 def getFilterAliasName(row):
1426 # get column name with the max value (True > False)
1427 colName = row.idxmax()
1428 return colName.replace('merge_measurement_', '')
1429
1430 # Skip columns that are unavailable, because this functor requests the
1431 # superset of bands that could be included in the object table
1432 columns = [col for col in self.columnscolumns if col in df.columns]
1433 # Makes a Series of dtype object if df is empty
1434 return df[columns].apply(getFilterAliasName, axis=1,
1435 result_type='reduce').astype('object')
1436
1437
1439 # AB to NanoJansky (3631 Jansky)
1440 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1441 LOG_AB_FLUX_SCALE = 12.56
1442 FIVE_OVER_2LOG10 = 1.085736204758129569
1443 # TO DO: DM-21955 Replace hard coded photometic calibration values
1444 COADD_ZP = 27
1445
1446 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1447 self.vhypot = np.vectorize(self.hypot)
1448 self.col = colFlux
1449 self.colFluxErr = colFluxErr
1450
1451 self.calib = calib
1452 if calib is not None:
1453 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1454 else:
1455 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1456 self.fluxMag0Err = 0.
1457
1458 super().__init__(**kwargs)
1459
1460 @property
1461 def columns(self):
1462 return [self.col]
1463
1464 @property
1465 def name(self):
1466 return f'mag_{self.col}'
1467
1468 @classmethod
1469 def hypot(cls, a, b):
1470 if np.abs(a) < np.abs(b):
1471 a, b = b, a
1472 if a == 0.:
1473 return 0.
1474 q = b/a
1475 return np.abs(a) * np.sqrt(1. + q*q)
1476
1477 def dn2flux(self, dn, fluxMag0):
1478 return self.AB_FLUX_SCALE * dn / fluxMag0
1479
1480 def dn2mag(self, dn, fluxMag0):
1481 with np.warnings.catch_warnings():
1482 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1483 np.warnings.filterwarnings('ignore', r'divide by zero')
1484 return -2.5 * np.log10(dn/fluxMag0)
1485
1486 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1487 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1488 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1489 return retVal
1490
1491 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1492 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1493 return self.FIVE_OVER_2LOG10 * retVal
1494
1495
1497 def _func(self, df):
1498 return self.dn2flux(df[self.col], self.fluxMag0)
1499
1500
1502 @property
1503 def columns(self):
1504 return [self.col, self.colFluxErr]
1505
1506 def _func(self, df):
1507 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1508 return pd.Series(retArr, index=df.index)
1509
1510
1512 def _func(self, df):
1513 return self.dn2mag(df[self.col], self.fluxMag0)
1514
1515
1517 @property
1518 def columns(self):
1519 return [self.col, self.colFluxErr]
1520
1521 def _func(self, df):
1522 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1523 return pd.Series(retArr, index=df.index)
1524
1525
1527 """Base class for calibrating the specified instrument flux column using
1528 the local photometric calibration.
1529
1530 Parameters
1531 ----------
1532 instFluxCol : `str`
1533 Name of the instrument flux column.
1534 instFluxErrCol : `str`
1535 Name of the assocated error columns for ``instFluxCol``.
1536 photoCalibCol : `str`
1537 Name of local calibration column.
1538 photoCalibErrCol : `str`
1539 Error associated with ``photoCalibCol``
1540
1541 See also
1542 --------
1543 LocalPhotometry
1544 LocalNanojansky
1545 LocalNanojanskyErr
1546 LocalMagnitude
1547 LocalMagnitudeErr
1548 """
1549 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1550
1551 def __init__(self,
1552 instFluxCol,
1553 instFluxErrCol,
1554 photoCalibCol,
1555 photoCalibErrCol,
1556 **kwargs):
1557 self.instFluxCol = instFluxCol
1558 self.instFluxErrCol = instFluxErrCol
1559 self.photoCalibCol = photoCalibCol
1560 self.photoCalibErrCol = photoCalibErrCol
1561 super().__init__(**kwargs)
1562
1563 def instFluxToNanojansky(self, instFlux, localCalib):
1564 """Convert instrument flux to nanojanskys.
1565
1566 Parameters
1567 ----------
1568 instFlux : `numpy.ndarray` or `pandas.Series`
1569 Array of instrument flux measurements
1570 localCalib : `numpy.ndarray` or `pandas.Series`
1571 Array of local photometric calibration estimates.
1572
1573 Returns
1574 -------
1575 calibFlux : `numpy.ndarray` or `pandas.Series`
1576 Array of calibrated flux measurements.
1577 """
1578 return instFlux * localCalib
1579
1580 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1581 """Convert instrument flux to nanojanskys.
1582
1583 Parameters
1584 ----------
1585 instFlux : `numpy.ndarray` or `pandas.Series`
1586 Array of instrument flux measurements
1587 instFluxErr : `numpy.ndarray` or `pandas.Series`
1588 Errors on associated ``instFlux`` values
1589 localCalib : `numpy.ndarray` or `pandas.Series`
1590 Array of local photometric calibration estimates.
1591 localCalibErr : `numpy.ndarray` or `pandas.Series`
1592 Errors on associated ``localCalib`` values
1593
1594 Returns
1595 -------
1596 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1597 Errors on calibrated flux measurements.
1598 """
1599 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1600
1601 def instFluxToMagnitude(self, instFlux, localCalib):
1602 """Convert instrument flux to nanojanskys.
1603
1604 Parameters
1605 ----------
1606 instFlux : `numpy.ndarray` or `pandas.Series`
1607 Array of instrument flux measurements
1608 localCalib : `numpy.ndarray` or `pandas.Series`
1609 Array of local photometric calibration estimates.
1610
1611 Returns
1612 -------
1613 calibMag : `numpy.ndarray` or `pandas.Series`
1614 Array of calibrated AB magnitudes.
1615 """
1616 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1617
1618 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1619 """Convert instrument flux err to nanojanskys.
1620
1621 Parameters
1622 ----------
1623 instFlux : `numpy.ndarray` or `pandas.Series`
1624 Array of instrument flux measurements
1625 instFluxErr : `numpy.ndarray` or `pandas.Series`
1626 Errors on associated ``instFlux`` values
1627 localCalib : `numpy.ndarray` or `pandas.Series`
1628 Array of local photometric calibration estimates.
1629 localCalibErr : `numpy.ndarray` or `pandas.Series`
1630 Errors on associated ``localCalib`` values
1631
1632 Returns
1633 -------
1634 calibMagErr: `numpy.ndarray` or `pandas.Series`
1635 Error on calibrated AB magnitudes.
1636 """
1637 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1638 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1639
1640
1642 """Compute calibrated fluxes using the local calibration value.
1643
1644 See also
1645 --------
1646 LocalNanojansky
1647 LocalNanojanskyErr
1648 LocalMagnitude
1649 LocalMagnitudeErr
1650 """
1651
1652 @property
1653 def columns(self):
1654 return [self.instFluxCol, self.photoCalibCol]
1655
1656 @property
1657 def name(self):
1658 return f'flux_{self.instFluxCol}'
1659
1660 def _func(self, df):
1661 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1662
1663
1665 """Compute calibrated flux errors using the local calibration value.
1666
1667 See also
1668 --------
1669 LocalNanojansky
1670 LocalNanojanskyErr
1671 LocalMagnitude
1672 LocalMagnitudeErr
1673 """
1674
1675 @property
1676 def columns(self):
1677 return [self.instFluxCol, self.instFluxErrCol,
1679
1680 @property
1681 def name(self):
1682 return f'fluxErr_{self.instFluxCol}'
1683
1684 def _func(self, df):
1685 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1686 df[self.photoCalibCol], df[self.photoCalibErrCol])
1687
1688
1690 """Compute calibrated AB magnitudes using the local calibration value.
1691
1692 See also
1693 --------
1694 LocalNanojansky
1695 LocalNanojanskyErr
1696 LocalMagnitude
1697 LocalMagnitudeErr
1698 """
1699
1700 @property
1701 def columns(self):
1702 return [self.instFluxCol, self.photoCalibCol]
1703
1704 @property
1705 def name(self):
1706 return f'mag_{self.instFluxCol}'
1707
1708 def _func(self, df):
1709 return self.instFluxToMagnitude(df[self.instFluxCol],
1710 df[self.photoCalibCol])
1711
1712
1714 """Compute calibrated AB magnitude errors using the local calibration value.
1715
1716 See also
1717 --------
1718 LocalNanojansky
1719 LocalNanojanskyErr
1720 LocalMagnitude
1721 LocalMagnitudeErr
1722 """
1723
1724 @property
1725 def columns(self):
1726 return [self.instFluxCol, self.instFluxErrCol,
1728
1729 @property
1730 def name(self):
1731 return f'magErr_{self.instFluxCol}'
1732
1733 def _func(self, df):
1734 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1735 df[self.instFluxErrCol],
1736 df[self.photoCalibCol],
1737 df[self.photoCalibErrCol])
1738
1739
1741 """Compute absolute mean of dipole fluxes.
1742
1743 See also
1744 --------
1745 LocalNanojansky
1746 LocalNanojanskyErr
1747 LocalMagnitude
1748 LocalMagnitudeErr
1749 LocalDipoleMeanFlux
1750 LocalDipoleMeanFluxErr
1751 LocalDipoleDiffFlux
1752 LocalDipoleDiffFluxErr
1753 """
1754 def __init__(self,
1755 instFluxPosCol,
1756 instFluxNegCol,
1757 instFluxPosErrCol,
1758 instFluxNegErrCol,
1759 photoCalibCol,
1760 photoCalibErrCol,
1761 **kwargs):
1762 self.instFluxNegCol = instFluxNegCol
1763 self.instFluxPosCol = instFluxPosCol
1764 self.instFluxNegErrCol = instFluxNegErrCol
1765 self.instFluxPosErrCol = instFluxPosErrCol
1766 self.photoCalibColphotoCalibCol = photoCalibCol
1767 self.photoCalibErrColphotoCalibErrCol = photoCalibErrCol
1768 super().__init__(instFluxNegCol,
1769 instFluxNegErrCol,
1770 photoCalibCol,
1771 photoCalibErrCol,
1772 **kwargs)
1773
1774 @property
1775 def columns(self):
1776 return [self.instFluxPosCol,
1777 self.instFluxNegCol,
1779
1780 @property
1781 def name(self):
1782 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1783
1784 def _func(self, df):
1785 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibColphotoCalibCol]))
1786 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibColphotoCalibCol])))
1787
1788
1790 """Compute the error on the absolute mean of dipole fluxes.
1791
1792 See also
1793 --------
1794 LocalNanojansky
1795 LocalNanojanskyErr
1796 LocalMagnitude
1797 LocalMagnitudeErr
1798 LocalDipoleMeanFlux
1799 LocalDipoleMeanFluxErr
1800 LocalDipoleDiffFlux
1801 LocalDipoleDiffFluxErr
1802 """
1803
1804 @property
1805 def columns(self):
1806 return [self.instFluxPosCol,
1807 self.instFluxNegCol,
1808 self.instFluxPosErrCol,
1809 self.instFluxNegErrCol,
1812
1813 @property
1814 def name(self):
1815 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1816
1817 def _func(self, df):
1818 return 0.5*np.sqrt(
1819 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1821 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1822 * df[self.photoCalibColphotoCalibCol]**2)
1823
1824
1826 """Compute the absolute difference of dipole fluxes.
1827
1828 Value is (abs(pos) - abs(neg))
1829
1830 See also
1831 --------
1832 LocalNanojansky
1833 LocalNanojanskyErr
1834 LocalMagnitude
1835 LocalMagnitudeErr
1836 LocalDipoleMeanFlux
1837 LocalDipoleMeanFluxErr
1838 LocalDipoleDiffFlux
1839 LocalDipoleDiffFluxErr
1840 """
1841
1842 @property
1843 def columns(self):
1844 return [self.instFluxPosCol,
1845 self.instFluxNegCol,
1847
1848 @property
1849 def name(self):
1850 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1851
1852 def _func(self, df):
1853 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibColphotoCalibCol]))
1854 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibColphotoCalibCol])))
1855
1856
1858 """Compute the error on the absolute difference of dipole fluxes.
1859
1860 See also
1861 --------
1862 LocalNanojansky
1863 LocalNanojanskyErr
1864 LocalMagnitude
1865 LocalMagnitudeErr
1866 LocalDipoleMeanFlux
1867 LocalDipoleMeanFluxErr
1868 LocalDipoleDiffFlux
1869 LocalDipoleDiffFluxErr
1870 """
1871
1872 @property
1873 def columns(self):
1874 return [self.instFluxPosCol,
1875 self.instFluxNegCol,
1876 self.instFluxPosErrCol,
1877 self.instFluxNegErrCol,
1880
1881 @property
1882 def name(self):
1883 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1884
1885 def _func(self, df):
1886 return np.sqrt(
1887 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1889 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1890 * df[self.photoCalibColphotoCalibCol]**2)
1891
1892
1894 """Base class for returning the ratio of 2 columns.
1895
1896 Can be used to compute a Signal to Noise ratio for any input flux.
1897
1898 Parameters
1899 ----------
1900 numerator : `str`
1901 Name of the column to use at the numerator in the ratio
1902 denominator : `str`
1903 Name of the column to use as the denominator in the ratio.
1904 """
1905 def __init__(self,
1906 numerator,
1907 denominator,
1908 **kwargs):
1909 self.numerator = numerator
1910 self.denominator = denominator
1911 super().__init__(**kwargs)
1912
1913 @property
1914 def columns(self):
1915 return [self.numerator, self.denominator]
1916
1917 @property
1918 def name(self):
1919 return f'ratio_{self.numerator}_{self.denominator}'
1920
1921 def _func(self, df):
1922 with np.warnings.catch_warnings():
1923 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1924 np.warnings.filterwarnings('ignore', r'divide by zero')
1925 return df[self.numerator] / df[self.denominator]
1926
1927
1929 """Compute E(B-V) from dustmaps.sfd
1930 """
1931 _defaultDataset = 'ref'
1932 name = "E(B-V)"
1933 shortname = "ebv"
1934
1935 def __init__(self, **kwargs):
1936 self._columns = ['coord_ra', 'coord_dec']
1937 self.sfd = SFDQuery()
1938 super().__init__(**kwargs)
1939
1940 def _func(self, df):
1941 coords = SkyCoord(df['coord_ra']*u.rad, df['coord_dec']*u.rad)
1942 ebv = self.sfd(coords)
1943 # Double precision unnecessary scientifically
1944 # but currently needed for ingest to qserv
1945 return pd.Series(ebv, index=df.index).astype('float64')
table::Key< std::string > name
Definition: Amplifier.cc:116
std::vector< SchemaItem< Flag > > * items
table::Key< int > type
Definition: Detector.cc:163
Point in an unspecified spherical coordinate system.
Definition: SpherePoint.h:57
def multilevelColumns(self, parq, **kwargs)
Definition: functors.py:976
def __init__(self, col, filt2, filt1, **kwargs)
Definition: functors.py:947
def __init__(self, col, **kwargs)
Definition: functors.py:661
def __init__(self, funcs, **kwargs)
Definition: functors.py:422
def __call__(self, data, **kwargs)
Definition: functors.py:474
def from_file(cls, filename, **kwargs)
Definition: functors.py:556
def from_yaml(cls, translationDefinition, **kwargs)
Definition: functors.py:565
def renameCol(cls, col, renameRules)
Definition: functors.py:547
def multilevelColumns(self, data, **kwargs)
Definition: functors.py:460
def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22)
Definition: functors.py:1306
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1383
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1346
def __init__(self, col, **kwargs)
Definition: functors.py:707
def __init__(self, expr, **kwargs)
Definition: functors.py:630
def __init__(self, **kwargs)
Definition: functors.py:735
def __call__(self, catalog, **kwargs)
Definition: functors.py:738
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1139
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1157
def __init__(self, **kwargs)
Definition: functors.py:1935
def __call__(self, data, dropna=False)
Definition: functors.py:358
def _func(self, df, dropna=True)
Definition: functors.py:297
def multilevelColumns(self, data, columnIndex=None, returnTuple=False)
Definition: functors.py:247
def _get_data_columnLevelNames(self, data, columnIndex=None)
Definition: functors.py:203
def difference(self, data1, data2, **kwargs)
Definition: functors.py:370
def __init__(self, filt=None, dataset=None, noDup=None)
Definition: functors.py:158
def _get_columnIndex(self, data)
Definition: functors.py:300
def _colsFromDict(self, colDict, columnIndex=None)
Definition: functors.py:225
def _get_data_columnLevels(self, data, columnIndex=None)
Definition: functors.py:179
def __init__(self, ra, decl, **kwargs)
Definition: functors.py:756
def __call__(self, parq, dropna=False, **kwargs)
Definition: functors.py:996
def __init__(self, instFluxPosCol, instFluxNegCol, instFluxPosErrCol, instFluxNegErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1761
def instFluxToNanojansky(self, instFlux, localCalib)
Definition: functors.py:1563
def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1618
def __init__(self, instFluxCol, instFluxErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1556
def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1580
def instFluxToMagnitude(self, instFlux, localCalib)
Definition: functors.py:1601
def __init__(self, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1197
def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22)
Definition: functors.py:1204
def computeSkySeperation(self, ra1, dec1, ra2, dec2)
Definition: functors.py:1233
def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22)
Definition: functors.py:1259
def __init__(self, col1, col2, **kwargs)
Definition: functors.py:894
def __init__(self, *args, **kwargs)
Definition: functors.py:855
def __init__(self, col, calib=None, **kwargs)
Definition: functors.py:817
def dn2mag(self, dn, fluxMag0)
Definition: functors.py:1480
def dn2flux(self, dn, fluxMag0)
Definition: functors.py:1477
def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1486
def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1491
def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs)
Definition: functors.py:1446
def __call__(self, catalog, **kwargs)
Definition: functors.py:725
def __init__(self, **kwargs)
Definition: functors.py:722
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1173
def __init__(self, numerator, denominator, **kwargs)
Definition: functors.py:1908
HtmPixelization provides HTM indexing of points and regions.
daf::base::PropertyList * list
Definition: fits.cc:928
daf::base::PropertySet * set
Definition: fits.cc:927
def mag_aware_eval(df, expr, log)
Definition: functors.py:594
def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor', name=None)
Definition: functors.py:56