LSST Applications  21.0.0-147-g0e635eb1+1acddb5be5,22.0.0+052faf71bd,22.0.0+1ea9a8b2b2,22.0.0+6312710a6c,22.0.0+729191ecac,22.0.0+7589c3a021,22.0.0+9f079a9461,22.0.1-1-g7d6de66+b8044ec9de,22.0.1-1-g87000a6+536b1ee016,22.0.1-1-g8e32f31+6312710a6c,22.0.1-10-gd060f87+016f7cdc03,22.0.1-12-g9c3108e+df145f6f68,22.0.1-16-g314fa6d+c825727ab8,22.0.1-19-g93a5c75+d23f2fb6d8,22.0.1-19-gb93eaa13+aab3ef7709,22.0.1-2-g8ef0a89+b8044ec9de,22.0.1-2-g92698f7+9f079a9461,22.0.1-2-ga9b0f51+052faf71bd,22.0.1-2-gac51dbf+052faf71bd,22.0.1-2-gb66926d+6312710a6c,22.0.1-2-gcb770ba+09e3807989,22.0.1-20-g32debb5+b8044ec9de,22.0.1-23-gc2439a9a+fb0756638e,22.0.1-3-g496fd5d+09117f784f,22.0.1-3-g59f966b+1e6ba2c031,22.0.1-3-g849a1b8+f8b568069f,22.0.1-3-gaaec9c0+c5c846a8b1,22.0.1-32-g5ddfab5d3+60ce4897b0,22.0.1-4-g037fbe1+64e601228d,22.0.1-4-g8623105+b8044ec9de,22.0.1-5-g096abc9+d18c45d440,22.0.1-5-g15c806e+57f5c03693,22.0.1-7-gba73697+57f5c03693,master-g6e05de7fdc+c1283a92b8,master-g72cdda8301+729191ecac,w.2021.39
LSST Data Management Base Package
Public Member Functions | Public Attributes | Static Public Attributes | List of all members
lsst.pipe.tasks.functors.CompositeFunctor Class Reference
Inheritance diagram for lsst.pipe.tasks.functors.CompositeFunctor:
lsst.pipe.tasks.functors.Functor

Public Member Functions

def __init__ (self, funcs, **kwargs)
 
def filt (self)
 
def filt (self, filt)
 
def update (self, new)
 
def columns (self)
 
def multilevelColumns (self, data, **kwargs)
 
def __call__ (self, data, **kwargs)
 
def renameCol (cls, col, renameRules)
 
def from_file (cls, filename, **kwargs)
 
def from_yaml (cls, translationDefinition, **kwargs)
 
def noDup (self)
 
def multilevelColumns (self, data, columnIndex=None, returnTuple=False)
 
def __call__ (self, data, dropna=False)
 
def difference (self, data1, data2, **kwargs)
 
def fail (self, df)
 
def name (self)
 
def shortname (self)
 

Public Attributes

 funcDict
 
 filt
 

Static Public Attributes

 dataset = None
 

Detailed Description

Perform multiple calculations at once on a catalog

The role of a `CompositeFunctor` is to group together computations from
multiple functors.  Instead of returning `pandas.Series` a
`CompositeFunctor` returns a `pandas.Dataframe`, with the column names
being the keys of `funcDict`.

The `columns` attribute of a `CompositeFunctor` is the union of all columns
in all the component functors.

A `CompositeFunctor` does not use a `_func` method itself; rather,
when a `CompositeFunctor` is called, all its columns are loaded
at once, and the resulting dataframe is passed to the `_func` method of each component
functor.  This has the advantage of only doing I/O (reading from parquet file) once,
and works because each individual `_func` method of each component functor does not
care if there are *extra* columns in the dataframe being passed; only that it must contain
*at least* the `columns` it expects.

An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
file specifying a collection of functors.

Parameters
----------
funcs : `dict` or `list`
    Dictionary or list of functors.  If a list, then it will be converted
    into a dictonary according to the `.shortname` attribute of each functor.

Definition at line 372 of file functors.py.

Constructor & Destructor Documentation

◆ __init__()

def lsst.pipe.tasks.functors.CompositeFunctor.__init__ (   self,
  funcs,
**  kwargs 
)

Definition at line 403 of file functors.py.

403  def __init__(self, funcs, **kwargs):
404 
405  if type(funcs) == dict:
406  self.funcDict = funcs
407  else:
408  self.funcDict = {f.shortname: f for f in funcs}
409 
410  self._filt = None
411 
412  super().__init__(**kwargs)
413 
table::Key< int > type
Definition: Detector.cc:163

Member Function Documentation

◆ __call__() [1/2]

def lsst.pipe.tasks.functors.CompositeFunctor.__call__ (   self,
  data,
**  kwargs 
)
Apply the functor to the data table

Parameters
----------
data : `lsst.daf.butler.DeferredDatasetHandle`,
       `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
       `lsst.pipe.tasks.parquetTable.ParquetTable`,
       or `pandas.DataFrame`.
    The table or a pointer to a table on disk from which columns can
    be accessed

Definition at line 455 of file functors.py.

455  def __call__(self, data, **kwargs):
456  """Apply the functor to the data table
457 
458  Parameters
459  ----------
460  data : `lsst.daf.butler.DeferredDatasetHandle`,
461  `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
462  `lsst.pipe.tasks.parquetTable.ParquetTable`,
463  or `pandas.DataFrame`.
464  The table or a pointer to a table on disk from which columns can
465  be accessed
466  """
467  columnIndex = self._get_columnIndex(data)
468 
469  # First, determine whether data has a multilevel index (either gen2 or gen3)
470  is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
471 
472  # Multilevel index, gen2 or gen3
473  if is_multiLevel:
474  columns = self.multilevelColumns(data, columnIndex=columnIndex)
475 
476  if isinstance(data, MultilevelParquetTable):
477  # Read data into memory the gen2 way
478  df = data.toDataFrame(columns=columns, droplevels=False)
479  elif isinstance(data, DeferredDatasetHandle):
480  # Read data into memory the gen3 way
481  df = data.get(parameters={"columns": columns})
482 
483  valDict = {}
484  for k, f in self.funcDict.items():
485  try:
486  subdf = f._setLevels(
487  df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
488  )
489  valDict[k] = f._func(subdf)
490  except Exception as e:
491  try:
492  valDict[k] = f.fail(subdf)
493  except NameError:
494  raise e
495 
496  else:
497  if isinstance(data, DeferredDatasetHandle):
498  # input if Gen3 deferLoad=True
499  df = data.get(parameters={"columns": self.columns})
500  elif isinstance(data, pd.DataFrame):
501  # input if Gen3 deferLoad=False
502  df = data
503  else:
504  # Original Gen2 input is type ParquetTable and the fallback
505  df = data.toDataFrame(columns=self.columns)
506 
507  valDict = {k: f._func(df) for k, f in self.funcDict.items()}
508 
509  try:
510  valDf = pd.concat(valDict, axis=1)
511  except TypeError:
512  print([(k, type(v)) for k, v in valDict.items()])
513  raise
514 
515  if kwargs.get('dropna', False):
516  valDf = valDf.dropna(how='any')
517 
518  return valDf
519 
std::vector< SchemaItem< Flag > > * items

◆ __call__() [2/2]

def lsst.pipe.tasks.functors.Functor.__call__ (   self,
  data,
  dropna = False 
)
inherited

Definition at line 340 of file functors.py.

340  def __call__(self, data, dropna=False):
341  try:
342  df = self._get_data(data)
343  vals = self._func(df)
344  except Exception:
345  vals = self.fail(df)
346  if dropna:
347  vals = self._dropna(vals)
348 
349  return vals
350 

◆ columns()

def lsst.pipe.tasks.functors.CompositeFunctor.columns (   self)
Columns required to perform calculation

Reimplemented from lsst.pipe.tasks.functors.Functor.

Definition at line 438 of file functors.py.

438  def columns(self):
439  return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
440 
daf::base::PropertyList * list
Definition: fits.cc:913
daf::base::PropertySet * set
Definition: fits.cc:912

◆ difference()

def lsst.pipe.tasks.functors.Functor.difference (   self,
  data1,
  data2,
**  kwargs 
)
inherited
Computes difference between functor called on two different ParquetTable objects

Definition at line 351 of file functors.py.

351  def difference(self, data1, data2, **kwargs):
352  """Computes difference between functor called on two different ParquetTable objects
353  """
354  return self(data1, **kwargs) - self(data2, **kwargs)
355 

◆ fail()

def lsst.pipe.tasks.functors.Functor.fail (   self,
  df 
)
inherited

Definition at line 356 of file functors.py.

356  def fail(self, df):
357  return pd.Series(np.full(len(df), np.nan), index=df.index)
358 

◆ filt() [1/2]

def lsst.pipe.tasks.functors.CompositeFunctor.filt (   self)

Definition at line 415 of file functors.py.

415  def filt(self):
416  return self._filt
417 

◆ filt() [2/2]

def lsst.pipe.tasks.functors.CompositeFunctor.filt (   self,
  filt 
)

Definition at line 419 of file functors.py.

419  def filt(self, filt):
420  if filt is not None:
421  for _, f in self.funcDict.items():
422  f.filt = filt
423  self._filt = filt
424 

◆ from_file()

def lsst.pipe.tasks.functors.CompositeFunctor.from_file (   cls,
  filename,
**  kwargs 
)

Definition at line 530 of file functors.py.

530  def from_file(cls, filename, **kwargs):
531  # Allow environment variables in the filename.
532  filename = os.path.expandvars(filename)
533  with open(filename) as f:
534  translationDefinition = yaml.safe_load(f)
535 
536  return cls.from_yaml(translationDefinition, **kwargs)
537 

◆ from_yaml()

def lsst.pipe.tasks.functors.CompositeFunctor.from_yaml (   cls,
  translationDefinition,
**  kwargs 
)

Definition at line 539 of file functors.py.

539  def from_yaml(cls, translationDefinition, **kwargs):
540  funcs = {}
541  for func, val in translationDefinition['funcs'].items():
542  funcs[func] = init_fromDict(val, name=func)
543 
544  if 'flag_rename_rules' in translationDefinition:
545  renameRules = translationDefinition['flag_rename_rules']
546  else:
547  renameRules = None
548 
549  if 'calexpFlags' in translationDefinition:
550  for flag in translationDefinition['calexpFlags']:
551  funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
552 
553  if 'refFlags' in translationDefinition:
554  for flag in translationDefinition['refFlags']:
555  funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
556 
557  if 'forcedFlags' in translationDefinition:
558  for flag in translationDefinition['forcedFlags']:
559  funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
560 
561  if 'flags' in translationDefinition:
562  for flag in translationDefinition['flags']:
563  funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
564 
565  return cls(funcs, **kwargs)
566 
567 
def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor', name=None)
Definition: functors.py:40

◆ multilevelColumns() [1/2]

def lsst.pipe.tasks.functors.CompositeFunctor.multilevelColumns (   self,
  data,
**  kwargs 
)

Definition at line 441 of file functors.py.

441  def multilevelColumns(self, data, **kwargs):
442  # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
443  return list(
444  set(
445  [
446  x
447  for y in [
448  f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
449  ]
450  for x in y
451  ]
452  )
453  )
454 

◆ multilevelColumns() [2/2]

def lsst.pipe.tasks.functors.Functor.multilevelColumns (   self,
  data,
  columnIndex = None,
  returnTuple = False 
)
inherited
Returns columns needed by functor from multilevel dataset

To access tables with multilevel column structure, the `MultilevelParquetTable`
or `DeferredDatasetHandle` need to be passed either a list of tuples or a
dictionary.

Parameters
----------
data : `MultilevelParquetTable` or `DeferredDatasetHandle`

columnIndex (optional): pandas `Index` object
    either passed or read in from `DeferredDatasetHandle`.

`returnTuple` : bool
    If true, then return a list of tuples rather than the column dictionary
    specification.  This is set to `True` by `CompositeFunctor` in order to be able to
    combine columns from the various component functors.

Definition at line 229 of file functors.py.

229  def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
230  """Returns columns needed by functor from multilevel dataset
231 
232  To access tables with multilevel column structure, the `MultilevelParquetTable`
233  or `DeferredDatasetHandle` need to be passed either a list of tuples or a
234  dictionary.
235 
236  Parameters
237  ----------
238  data : `MultilevelParquetTable` or `DeferredDatasetHandle`
239 
240  columnIndex (optional): pandas `Index` object
241  either passed or read in from `DeferredDatasetHandle`.
242 
243  `returnTuple` : bool
244  If true, then return a list of tuples rather than the column dictionary
245  specification. This is set to `True` by `CompositeFunctor` in order to be able to
246  combine columns from the various component functors.
247 
248  """
249  if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
250  columnIndex = data.get(component="columns")
251 
252  # Confirm that the dataset has the column levels the functor is expecting it to have.
253  columnLevels = self._get_data_columnLevels(data, columnIndex)
254 
255  columnDict = {'column': self.columns,
256  'dataset': self.dataset}
257  if self.filt is None:
258  columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
259  if "band" in columnLevels:
260  if self.dataset == "ref":
261  columnDict["band"] = columnLevelNames["band"][0]
262  else:
263  raise ValueError(f"'filt' not set for functor {self.name}"
264  f"(dataset {self.dataset}) "
265  "and ParquetTable "
266  "contains multiple filters in column index. "
267  "Set 'filt' or set 'dataset' to 'ref'.")
268  else:
269  columnDict['band'] = self.filt
270 
271  if isinstance(data, MultilevelParquetTable):
272  return data._colsFromDict(columnDict)
273  elif isinstance(data, DeferredDatasetHandle):
274  if returnTuple:
275  return self._colsFromDict(columnDict, columnIndex=columnIndex)
276  else:
277  return columnDict
278 

◆ name()

def lsst.pipe.tasks.functors.Functor.name (   self)
inherited

◆ noDup()

def lsst.pipe.tasks.functors.Functor.noDup (   self)
inherited

Definition at line 148 of file functors.py.

148  def noDup(self):
149  if self._noDup is not None:
150  return self._noDup
151  else:
152  return self._defaultNoDup
153 

◆ renameCol()

def lsst.pipe.tasks.functors.CompositeFunctor.renameCol (   cls,
  col,
  renameRules 
)

Definition at line 521 of file functors.py.

521  def renameCol(cls, col, renameRules):
522  if renameRules is None:
523  return col
524  for old, new in renameRules:
525  if col.startswith(old):
526  col = col.replace(old, new)
527  return col
528 

◆ shortname()

def lsst.pipe.tasks.functors.Functor.shortname (   self)
inherited
Short name of functor (suitable for column name/dict key)

Reimplemented in lsst.pipe.tasks.functors.Color, and lsst.pipe.tasks.functors.MagDiff.

Definition at line 366 of file functors.py.

366  def shortname(self):
367  """Short name of functor (suitable for column name/dict key)
368  """
369  return self.name
370 
371 

◆ update()

def lsst.pipe.tasks.functors.CompositeFunctor.update (   self,
  new 
)

Definition at line 425 of file functors.py.

425  def update(self, new):
426  if isinstance(new, dict):
427  self.funcDict.update(new)
428  elif isinstance(new, CompositeFunctor):
429  self.funcDict.update(new.funcDict)
430  else:
431  raise TypeError('Can only update with dictionary or CompositeFunctor.')
432 
433  # Make sure new functors have the same 'filt' set
434  if self.filt is not None:
435  self.filt = self.filt
436 

Member Data Documentation

◆ dataset

lsst.pipe.tasks.functors.CompositeFunctor.dataset = None
static

Definition at line 401 of file functors.py.

◆ filt

lsst.pipe.tasks.functors.CompositeFunctor.filt

Definition at line 435 of file functors.py.

◆ funcDict

lsst.pipe.tasks.functors.CompositeFunctor.funcDict

Definition at line 406 of file functors.py.


The documentation for this class was generated from the following file: