LSST Applications  21.0.0-172-gfb10e10a+18fedfabac,22.0.0+297cba6710,22.0.0+80564b0ff1,22.0.0+8d77f4f51a,22.0.0+a28f4c53b1,22.0.0+dcf3732eb2,22.0.1-1-g7d6de66+2a20fdde0d,22.0.1-1-g8e32f31+297cba6710,22.0.1-1-geca5380+7fa3b7d9b6,22.0.1-12-g44dc1dc+2a20fdde0d,22.0.1-15-g6a90155+515f58c32b,22.0.1-16-g9282f48+790f5f2caa,22.0.1-2-g92698f7+dcf3732eb2,22.0.1-2-ga9b0f51+7fa3b7d9b6,22.0.1-2-gd1925c9+bf4f0e694f,22.0.1-24-g1ad7a390+a9625a72a8,22.0.1-25-g5bf6245+3ad8ecd50b,22.0.1-25-gb120d7b+8b5510f75f,22.0.1-27-g97737f7+2a20fdde0d,22.0.1-32-gf62ce7b1+aa4237961e,22.0.1-4-g0b3f228+2a20fdde0d,22.0.1-4-g243d05b+871c1b8305,22.0.1-4-g3a563be+32dcf1063f,22.0.1-4-g44f2e3d+9e4ab0f4fa,22.0.1-42-gca6935d93+ba5e5ca3eb,22.0.1-5-g15c806e+85460ae5f3,22.0.1-5-g58711c4+611d128589,22.0.1-5-g75bb458+99c117b92f,22.0.1-6-g1c63a23+7fa3b7d9b6,22.0.1-6-g50866e6+84ff5a128b,22.0.1-6-g8d3140d+720564cf76,22.0.1-6-gd805d02+cc5644f571,22.0.1-8-ge5750ce+85460ae5f3,master-g6e05de7fdc+babf819c66,master-g99da0e417a+8d77f4f51a,w.2021.48
LSST Data Management Base Package
Public Member Functions | Public Attributes | Static Public Attributes | List of all members
lsst.pipe.tasks.functors.CompositeFunctor Class Reference
Inheritance diagram for lsst.pipe.tasks.functors.CompositeFunctor:
lsst.pipe.tasks.functors.Functor

Public Member Functions

def __init__ (self, funcs, **kwargs)
 
def filt (self)
 
def filt (self, filt)
 
def update (self, new)
 
def columns (self)
 
def multilevelColumns (self, data, **kwargs)
 
def __call__ (self, data, **kwargs)
 
def renameCol (cls, col, renameRules)
 
def from_file (cls, filename, **kwargs)
 
def from_yaml (cls, translationDefinition, **kwargs)
 
def noDup (self)
 
def multilevelColumns (self, data, columnIndex=None, returnTuple=False)
 
def __call__ (self, data, dropna=False)
 
def difference (self, data1, data2, **kwargs)
 
def fail (self, df)
 
def name (self)
 
def shortname (self)
 

Public Attributes

 funcDict
 
 filt
 

Static Public Attributes

 dataset = None
 

Detailed Description

Perform multiple calculations at once on a catalog

The role of a `CompositeFunctor` is to group together computations from
multiple functors.  Instead of returning `pandas.Series` a
`CompositeFunctor` returns a `pandas.Dataframe`, with the column names
being the keys of `funcDict`.

The `columns` attribute of a `CompositeFunctor` is the union of all columns
in all the component functors.

A `CompositeFunctor` does not use a `_func` method itself; rather,
when a `CompositeFunctor` is called, all its columns are loaded
at once, and the resulting dataframe is passed to the `_func` method of each component
functor.  This has the advantage of only doing I/O (reading from parquet file) once,
and works because each individual `_func` method of each component functor does not
care if there are *extra* columns in the dataframe being passed; only that it must contain
*at least* the `columns` it expects.

An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
file specifying a collection of functors.

Parameters
----------
funcs : `dict` or `list`
    Dictionary or list of functors.  If a list, then it will be converted
    into a dictonary according to the `.shortname` attribute of each functor.

Definition at line 372 of file functors.py.

Constructor & Destructor Documentation

◆ __init__()

def lsst.pipe.tasks.functors.CompositeFunctor.__init__ (   self,
  funcs,
**  kwargs 
)

Definition at line 403 of file functors.py.

403  def __init__(self, funcs, **kwargs):
404 
405  if type(funcs) == dict:
406  self.funcDict = funcs
407  else:
408  self.funcDict = {f.shortname: f for f in funcs}
409 
410  self._filt = None
411 
412  super().__init__(**kwargs)
413 
table::Key< int > type
Definition: Detector.cc:163

Member Function Documentation

◆ __call__() [1/2]

def lsst.pipe.tasks.functors.CompositeFunctor.__call__ (   self,
  data,
**  kwargs 
)
Apply the functor to the data table

Parameters
----------
data : `lsst.daf.butler.DeferredDatasetHandle`,
       `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
       `lsst.pipe.tasks.parquetTable.ParquetTable`,
       or `pandas.DataFrame`.
    The table or a pointer to a table on disk from which columns can
    be accessed

Definition at line 455 of file functors.py.

455  def __call__(self, data, **kwargs):
456  """Apply the functor to the data table
457 
458  Parameters
459  ----------
460  data : `lsst.daf.butler.DeferredDatasetHandle`,
461  `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
462  `lsst.pipe.tasks.parquetTable.ParquetTable`,
463  or `pandas.DataFrame`.
464  The table or a pointer to a table on disk from which columns can
465  be accessed
466  """
467  columnIndex = self._get_columnIndex(data)
468 
469  # First, determine whether data has a multilevel index (either gen2 or gen3)
470  is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
471 
472  # Multilevel index, gen2 or gen3
473  if is_multiLevel:
474  columns = self.multilevelColumns(data, columnIndex=columnIndex)
475 
476  if isinstance(data, MultilevelParquetTable):
477  # Read data into memory the gen2 way
478  df = data.toDataFrame(columns=columns, droplevels=False)
479  elif isinstance(data, DeferredDatasetHandle):
480  # Read data into memory the gen3 way
481  df = data.get(parameters={"columns": columns})
482 
483  valDict = {}
484  for k, f in self.funcDict.items():
485  try:
486  subdf = f._setLevels(
487  df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
488  )
489  valDict[k] = f._func(subdf)
490  except Exception as e:
491  try:
492  valDict[k] = f.fail(subdf)
493  except NameError:
494  raise e
495 
496  else:
497  if isinstance(data, DeferredDatasetHandle):
498  # input if Gen3 deferLoad=True
499  df = data.get(parameters={"columns": self.columns})
500  elif isinstance(data, pd.DataFrame):
501  # input if Gen3 deferLoad=False
502  df = data
503  else:
504  # Original Gen2 input is type ParquetTable and the fallback
505  df = data.toDataFrame(columns=self.columns)
506 
507  valDict = {k: f._func(df) for k, f in self.funcDict.items()}
508 
509  # Check that output columns are actually columns
510  for name, colVal in valDict.items():
511  if len(colVal.shape) != 1:
512  raise RuntimeError("Transformed column '%s' is not the shape of a column. "
513  "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
514 
515  try:
516  valDf = pd.concat(valDict, axis=1)
517  except TypeError:
518  print([(k, type(v)) for k, v in valDict.items()])
519  raise
520 
521  if kwargs.get('dropna', False):
522  valDf = valDf.dropna(how='any')
523 
524  return valDf
525 
std::vector< SchemaItem< Flag > > * items

◆ __call__() [2/2]

def lsst.pipe.tasks.functors.Functor.__call__ (   self,
  data,
  dropna = False 
)
inherited

Definition at line 340 of file functors.py.

340  def __call__(self, data, dropna=False):
341  try:
342  df = self._get_data(data)
343  vals = self._func(df)
344  except Exception:
345  vals = self.fail(df)
346  if dropna:
347  vals = self._dropna(vals)
348 
349  return vals
350 

◆ columns()

def lsst.pipe.tasks.functors.CompositeFunctor.columns (   self)
Columns required to perform calculation

Reimplemented from lsst.pipe.tasks.functors.Functor.

Definition at line 438 of file functors.py.

438  def columns(self):
439  return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
440 
daf::base::PropertyList * list
Definition: fits.cc:913
daf::base::PropertySet * set
Definition: fits.cc:912

◆ difference()

def lsst.pipe.tasks.functors.Functor.difference (   self,
  data1,
  data2,
**  kwargs 
)
inherited
Computes difference between functor called on two different ParquetTable objects

Definition at line 351 of file functors.py.

351  def difference(self, data1, data2, **kwargs):
352  """Computes difference between functor called on two different ParquetTable objects
353  """
354  return self(data1, **kwargs) - self(data2, **kwargs)
355 

◆ fail()

def lsst.pipe.tasks.functors.Functor.fail (   self,
  df 
)
inherited

Definition at line 356 of file functors.py.

356  def fail(self, df):
357  return pd.Series(np.full(len(df), np.nan), index=df.index)
358 

◆ filt() [1/2]

def lsst.pipe.tasks.functors.CompositeFunctor.filt (   self)

Definition at line 415 of file functors.py.

415  def filt(self):
416  return self._filt
417 

◆ filt() [2/2]

def lsst.pipe.tasks.functors.CompositeFunctor.filt (   self,
  filt 
)

Definition at line 419 of file functors.py.

419  def filt(self, filt):
420  if filt is not None:
421  for _, f in self.funcDict.items():
422  f.filt = filt
423  self._filt = filt
424 

◆ from_file()

def lsst.pipe.tasks.functors.CompositeFunctor.from_file (   cls,
  filename,
**  kwargs 
)

Definition at line 536 of file functors.py.

536  def from_file(cls, filename, **kwargs):
537  # Allow environment variables in the filename.
538  filename = os.path.expandvars(filename)
539  with open(filename) as f:
540  translationDefinition = yaml.safe_load(f)
541 
542  return cls.from_yaml(translationDefinition, **kwargs)
543 

◆ from_yaml()

def lsst.pipe.tasks.functors.CompositeFunctor.from_yaml (   cls,
  translationDefinition,
**  kwargs 
)

Definition at line 545 of file functors.py.

545  def from_yaml(cls, translationDefinition, **kwargs):
546  funcs = {}
547  for func, val in translationDefinition['funcs'].items():
548  funcs[func] = init_fromDict(val, name=func)
549 
550  if 'flag_rename_rules' in translationDefinition:
551  renameRules = translationDefinition['flag_rename_rules']
552  else:
553  renameRules = None
554 
555  if 'calexpFlags' in translationDefinition:
556  for flag in translationDefinition['calexpFlags']:
557  funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
558 
559  if 'refFlags' in translationDefinition:
560  for flag in translationDefinition['refFlags']:
561  funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
562 
563  if 'forcedFlags' in translationDefinition:
564  for flag in translationDefinition['forcedFlags']:
565  funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
566 
567  if 'flags' in translationDefinition:
568  for flag in translationDefinition['flags']:
569  funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
570 
571  return cls(funcs, **kwargs)
572 
573 
def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor', name=None)
Definition: functors.py:40

◆ multilevelColumns() [1/2]

def lsst.pipe.tasks.functors.CompositeFunctor.multilevelColumns (   self,
  data,
**  kwargs 
)

Definition at line 441 of file functors.py.

441  def multilevelColumns(self, data, **kwargs):
442  # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
443  return list(
444  set(
445  [
446  x
447  for y in [
448  f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
449  ]
450  for x in y
451  ]
452  )
453  )
454 

◆ multilevelColumns() [2/2]

def lsst.pipe.tasks.functors.Functor.multilevelColumns (   self,
  data,
  columnIndex = None,
  returnTuple = False 
)
inherited
Returns columns needed by functor from multilevel dataset

To access tables with multilevel column structure, the `MultilevelParquetTable`
or `DeferredDatasetHandle` need to be passed either a list of tuples or a
dictionary.

Parameters
----------
data : `MultilevelParquetTable` or `DeferredDatasetHandle`

columnIndex (optional): pandas `Index` object
    either passed or read in from `DeferredDatasetHandle`.

`returnTuple` : bool
    If true, then return a list of tuples rather than the column dictionary
    specification.  This is set to `True` by `CompositeFunctor` in order to be able to
    combine columns from the various component functors.

Definition at line 229 of file functors.py.

229  def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
230  """Returns columns needed by functor from multilevel dataset
231 
232  To access tables with multilevel column structure, the `MultilevelParquetTable`
233  or `DeferredDatasetHandle` need to be passed either a list of tuples or a
234  dictionary.
235 
236  Parameters
237  ----------
238  data : `MultilevelParquetTable` or `DeferredDatasetHandle`
239 
240  columnIndex (optional): pandas `Index` object
241  either passed or read in from `DeferredDatasetHandle`.
242 
243  `returnTuple` : bool
244  If true, then return a list of tuples rather than the column dictionary
245  specification. This is set to `True` by `CompositeFunctor` in order to be able to
246  combine columns from the various component functors.
247 
248  """
249  if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
250  columnIndex = data.get(component="columns")
251 
252  # Confirm that the dataset has the column levels the functor is expecting it to have.
253  columnLevels = self._get_data_columnLevels(data, columnIndex)
254 
255  columnDict = {'column': self.columns,
256  'dataset': self.dataset}
257  if self.filt is None:
258  columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
259  if "band" in columnLevels:
260  if self.dataset == "ref":
261  columnDict["band"] = columnLevelNames["band"][0]
262  else:
263  raise ValueError(f"'filt' not set for functor {self.name}"
264  f"(dataset {self.dataset}) "
265  "and ParquetTable "
266  "contains multiple filters in column index. "
267  "Set 'filt' or set 'dataset' to 'ref'.")
268  else:
269  columnDict['band'] = self.filt
270 
271  if isinstance(data, MultilevelParquetTable):
272  return data._colsFromDict(columnDict)
273  elif isinstance(data, DeferredDatasetHandle):
274  if returnTuple:
275  return self._colsFromDict(columnDict, columnIndex=columnIndex)
276  else:
277  return columnDict
278 

◆ name()

def lsst.pipe.tasks.functors.Functor.name (   self)
inherited

◆ noDup()

def lsst.pipe.tasks.functors.Functor.noDup (   self)
inherited

Definition at line 148 of file functors.py.

148  def noDup(self):
149  if self._noDup is not None:
150  return self._noDup
151  else:
152  return self._defaultNoDup
153 

◆ renameCol()

def lsst.pipe.tasks.functors.CompositeFunctor.renameCol (   cls,
  col,
  renameRules 
)

Definition at line 527 of file functors.py.

527  def renameCol(cls, col, renameRules):
528  if renameRules is None:
529  return col
530  for old, new in renameRules:
531  if col.startswith(old):
532  col = col.replace(old, new)
533  return col
534 

◆ shortname()

def lsst.pipe.tasks.functors.Functor.shortname (   self)
inherited
Short name of functor (suitable for column name/dict key)

Reimplemented in lsst.pipe.tasks.functors.Color, and lsst.pipe.tasks.functors.MagDiff.

Definition at line 366 of file functors.py.

366  def shortname(self):
367  """Short name of functor (suitable for column name/dict key)
368  """
369  return self.name
370 
371 

◆ update()

def lsst.pipe.tasks.functors.CompositeFunctor.update (   self,
  new 
)

Definition at line 425 of file functors.py.

425  def update(self, new):
426  if isinstance(new, dict):
427  self.funcDict.update(new)
428  elif isinstance(new, CompositeFunctor):
429  self.funcDict.update(new.funcDict)
430  else:
431  raise TypeError('Can only update with dictionary or CompositeFunctor.')
432 
433  # Make sure new functors have the same 'filt' set
434  if self.filt is not None:
435  self.filt = self.filt
436 

Member Data Documentation

◆ dataset

lsst.pipe.tasks.functors.CompositeFunctor.dataset = None
static

Definition at line 401 of file functors.py.

◆ filt

lsst.pipe.tasks.functors.CompositeFunctor.filt

Definition at line 435 of file functors.py.

◆ funcDict

lsst.pipe.tasks.functors.CompositeFunctor.funcDict

Definition at line 406 of file functors.py.


The documentation for this class was generated from the following file: