LSST Applications  22.0.1,22.0.1+01bcf6a671,22.0.1+046ee49490,22.0.1+05c7de27da,22.0.1+0c6914dbf6,22.0.1+1220d50b50,22.0.1+12fd109e95,22.0.1+1a1dd69893,22.0.1+1c910dc348,22.0.1+1ef34551f5,22.0.1+30170c3d08,22.0.1+39153823fd,22.0.1+611137eacc,22.0.1+771eb1e3e8,22.0.1+94e66cc9ed,22.0.1+9a075d06e2,22.0.1+a5ff6e246e,22.0.1+a7db719c1a,22.0.1+ba0d97e778,22.0.1+bfe1ee9056,22.0.1+c4e1e0358a,22.0.1+cc34b8281e,22.0.1+d640e2c0fa,22.0.1+d72a2e677a,22.0.1+d9a6b571bd,22.0.1+e485e9761b,22.0.1+ebe8d3385e
LSST Data Management Base Package
task.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2016 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 __all__ = ["Task", "TaskError"]
23 
24 import contextlib
25 
26 import lsstDebug
27 from lsst.pex.config import ConfigurableField
28 from lsst.log import Log
29 import lsst.daf.base as dafBase
30 from .timer import logInfo
31 
32 
33 class TaskError(Exception):
34  """Use to report errors for which a traceback is not useful.
35 
36  Notes
37  -----
38  Examples of such errors:
39 
40  - processCcd is asked to run detection, but not calibration, and no calexp
41  is found.
42  - coadd finds no valid images in the specified patch.
43  """
44  pass
45 
46 
47 class Task:
48  r"""Base class for data processing tasks.
49 
50  See :ref:`task-framework-overview` to learn what tasks are, and
51  :ref:`creating-a-task` for more information about writing tasks.
52 
53  Parameters
54  ----------
55  config : `Task.ConfigClass` instance, optional
56  Configuration for this task (an instance of Task.ConfigClass, which
57  is a task-specific subclass of `lsst.pex.config.Config`, or `None`.
58  If `None`:
59 
60  - If parentTask specified then defaults to parentTask.config.<name>
61  - If parentTask is None then defaults to self.ConfigClass()
62 
63  name : `str`, optional
64  Brief name of task, or `None`; if `None` then defaults to
65  `Task._DefaultName`
66  parentTask : `Task`-type, optional
67  The parent task of this subtask, if any.
68 
69  - If `None` (a top-level task) then you must specify config and name
70  is ignored.
71  - If not `None` (a subtask) then you must specify name.
72  log : `lsst.log.Log`, optional
73  Log whose name is used as a log name prefix, or `None` for no prefix.
74  Ignored if is parentTask specified, in which case
75  ``parentTask.log``\ 's name is used as a prefix. The task's log name is
76  ``prefix + "." + name`` if a prefix exists, else ``name``. The task's
77  log is then a child logger of ``parentTask.log`` (if ``parentTask``
78  specified), or a child logger of the log from the argument
79  (if ``log`` is not `None`).
80 
81  Raises
82  ------
83  RuntimeError
84  Raised under these circumstances:
85 
86  - If ``parentTask`` is `None` and ``config`` is `None`.
87  - If ``parentTask`` is not `None` and ``name`` is `None`.
88  - If ``name`` is `None` and ``_DefaultName`` does not exist.
89 
90  Notes
91  -----
92  Useful attributes include:
93 
94  - ``log``: an lsst.log.Log
95  - ``config``: task-specific configuration; an instance of ``ConfigClass``
96  (see below).
97  - ``metadata``: an `lsst.daf.base.PropertyList` for collecting
98  task-specific metadata, e.g. data quality and performance metrics.
99  This is data that is only meant to be persisted, never to be used by
100  the task.
101 
102  Subclasses typically have a method named ``runDataRef`` to perform the
103  main data processing. Details:
104 
105  - ``runDataRef`` should process the minimum reasonable amount of data,
106  typically a single CCD. Iteration, if desired, is performed by a caller
107  of the method. This is good design and allows multiprocessing without
108  the run method having to support it directly.
109  - If ``runDataRef`` can persist or unpersist data:
110 
111  - ``runDataRef`` should accept a butler data reference (or a collection
112  of data references, if appropriate, e.g. coaddition).
113  - There should be a way to run the task without persisting data.
114  Typically the run method returns all data, even if it is persisted, and
115  the task's config method offers a flag to disable persistence.
116 
117  **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not*
118  accept a blob such as a butler data reference. How we will handle data
119  references is still TBD, so don't make changes yet!
120  RHL 2014-06-27
121 
122  Subclasses must also have an attribute ``ConfigClass`` that is a subclass
123  of `lsst.pex.config.Config` which configures the task. Subclasses should
124  also have an attribute ``_DefaultName``: the default name if there is no
125  parent task. ``_DefaultName`` is required for subclasses of
126  `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task
127  because it simplifies construction (e.g. for unit tests).
128 
129  Tasks intended to be run from the command line should be subclasses of
130  `~lsst.pipe.base.CmdLineTask` not Task.
131  """
132 
133  def __init__(self, config=None, name=None, parentTask=None, log=None):
135  self._parentTask_parentTask = parentTask
136 
137  if parentTask is not None:
138  if name is None:
139  raise RuntimeError("name is required for a subtask")
140  self._name_name = name
141  self._fullName_fullName = parentTask._computeFullName(name)
142  if config is None:
143  config = getattr(parentTask.config, name)
144  self._taskDict_taskDict = parentTask._taskDict
145  loggerName = parentTask.log.getName() + '.' + name
146  else:
147  if name is None:
148  name = getattr(self, "_DefaultName", None)
149  if name is None:
150  raise RuntimeError("name is required for a task unless it has attribute _DefaultName")
151  name = self._DefaultName
152  self._name_name = name
153  self._fullName_fullName = self._name_name
154  if config is None:
155  config = self.ConfigClass()
156  self._taskDict_taskDict = dict()
157  loggerName = self._fullName_fullName
158  if log is not None and log.getName():
159  loggerName = log.getName() + '.' + loggerName
160 
161  self.loglog = Log.getLogger(loggerName)
162  self.configconfig = config
163  self._display_display = lsstDebug.Info(self.__module__).display
164  self._taskDict_taskDict[self._fullName_fullName] = self
165 
166  def emptyMetadata(self):
167  """Empty (clear) the metadata for this Task and all sub-Tasks.
168  """
169  for subtask in self._taskDict_taskDict.values():
170  subtask.metadata = dafBase.PropertyList()
171 
172  def getSchemaCatalogs(self):
173  """Get the schemas generated by this task.
174 
175  Returns
176  -------
177  schemaCatalogs : `dict`
178  Keys are butler dataset type, values are an empty catalog (an
179  instance of the appropriate `lsst.afw.table` Catalog type) for
180  this task.
181 
182  Notes
183  -----
184 
185  .. warning::
186 
187  Subclasses that use schemas must override this method. The default
188  implementation returns an empty dict.
189 
190  This method may be called at any time after the Task is constructed,
191  which means that all task schemas should be computed at construction
192  time, *not* when data is actually processed. This reflects the
193  philosophy that the schema should not depend on the data.
194 
195  Returning catalogs rather than just schemas allows us to save e.g.
196  slots for SourceCatalog as well.
197 
198  See also
199  --------
200  Task.getAllSchemaCatalogs
201  """
202  return {}
203 
205  """Get schema catalogs for all tasks in the hierarchy, combining the
206  results into a single dict.
207 
208  Returns
209  -------
210  schemacatalogs : `dict`
211  Keys are butler dataset type, values are a empty catalog (an
212  instance of the appropriate `lsst.afw.table` Catalog type) for all
213  tasks in the hierarchy, from the top-level task down
214  through all subtasks.
215 
216  Notes
217  -----
218  This method may be called on any task in the hierarchy; it will return
219  the same answer, regardless.
220 
221  The default implementation should always suffice. If your subtask uses
222  schemas the override `Task.getSchemaCatalogs`, not this method.
223  """
224  schemaDict = self.getSchemaCatalogsgetSchemaCatalogs()
225  for subtask in self._taskDict_taskDict.values():
226  schemaDict.update(subtask.getSchemaCatalogs())
227  return schemaDict
228 
229  def getFullMetadata(self):
230  """Get metadata for all tasks.
231 
232  Returns
233  -------
234  metadata : `lsst.daf.base.PropertySet`
235  The `~lsst.daf.base.PropertySet` keys are the full task name.
236  Values are metadata for the top-level task and all subtasks,
237  sub-subtasks, etc.
238 
239  Notes
240  -----
241  The returned metadata includes timing information (if
242  ``@timer.timeMethod`` is used) and any metadata set by the task. The
243  name of each item consists of the full task name with ``.`` replaced
244  by ``:``, followed by ``.`` and the name of the item, e.g.::
245 
246  topLevelTaskName:subtaskName:subsubtaskName.itemName
247 
248  using ``:`` in the full task name disambiguates the rare situation
249  that a task has a subtask and a metadata item with the same name.
250  """
251  fullMetadata = dafBase.PropertySet()
252  for fullName, task in self.getTaskDictgetTaskDict().items():
253  fullMetadata.set(fullName.replace(".", ":"), task.metadata)
254  return fullMetadata
255 
256  def getFullName(self):
257  """Get the task name as a hierarchical name including parent task
258  names.
259 
260  Returns
261  -------
262  fullName : `str`
263  The full name consists of the name of the parent task and each
264  subtask separated by periods. For example:
265 
266  - The full name of top-level task "top" is simply "top".
267  - The full name of subtask "sub" of top-level task "top" is
268  "top.sub".
269  - The full name of subtask "sub2" of subtask "sub" of top-level
270  task "top" is "top.sub.sub2".
271  """
272  return self._fullName_fullName
273 
274  def getName(self):
275  """Get the name of the task.
276 
277  Returns
278  -------
279  taskName : `str`
280  Name of the task.
281 
282  See also
283  --------
284  getFullName
285  """
286  return self._name_name
287 
288  def getTaskDict(self):
289  """Get a dictionary of all tasks as a shallow copy.
290 
291  Returns
292  -------
293  taskDict : `dict`
294  Dictionary containing full task name: task object for the top-level
295  task and all subtasks, sub-subtasks, etc.
296  """
297  return self._taskDict_taskDict.copy()
298 
299  def makeSubtask(self, name, **keyArgs):
300  """Create a subtask as a new instance as the ``name`` attribute of this
301  task.
302 
303  Parameters
304  ----------
305  name : `str`
306  Brief name of the subtask.
307  keyArgs
308  Extra keyword arguments used to construct the task. The following
309  arguments are automatically provided and cannot be overridden:
310 
311  - "config".
312  - "parentTask".
313 
314  Notes
315  -----
316  The subtask must be defined by ``Task.config.name``, an instance of
317  `~lsst.pex.config.ConfigurableField` or
318  `~lsst.pex.config.RegistryField`.
319  """
320  taskField = getattr(self.configconfig, name, None)
321  if taskField is None:
322  raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}")
323  subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
324  setattr(self, name, subtask)
325 
326  @contextlib.contextmanager
327  def timer(self, name, logLevel=Log.DEBUG):
328  """Context manager to log performance data for an arbitrary block of
329  code.
330 
331  Parameters
332  ----------
333  name : `str`
334  Name of code being timed; data will be logged using item name:
335  ``Start`` and ``End``.
336  logLevel
337  A `lsst.log` level constant.
338 
339  Examples
340  --------
341  Creating a timer context:
342 
343  .. code-block:: python
344 
345  with self.timer("someCodeToTime"):
346  pass # code to time
347 
348  See also
349  --------
350  timer.logInfo
351  """
352  logInfo(obj=self, prefix=name + "Start", logLevel=logLevel)
353  try:
354  yield
355  finally:
356  logInfo(obj=self, prefix=name + "End", logLevel=logLevel)
357 
358  @classmethod
359  def makeField(cls, doc):
360  """Make a `lsst.pex.config.ConfigurableField` for this task.
361 
362  Parameters
363  ----------
364  doc : `str`
365  Help text for the field.
366 
367  Returns
368  -------
369  configurableField : `lsst.pex.config.ConfigurableField`
370  A `~ConfigurableField` for this task.
371 
372  Examples
373  --------
374  Provides a convenient way to specify this task is a subtask of another
375  task.
376 
377  Here is an example of use:
378 
379  .. code-block:: python
380 
381  class OtherTaskConfig(lsst.pex.config.Config):
382  aSubtask = ATaskClass.makeField("brief description of task")
383  """
384  return ConfigurableField(doc=doc, target=cls)
385 
386  def _computeFullName(self, name):
387  """Compute the full name of a subtask or metadata item, given its brief
388  name.
389 
390  Parameters
391  ----------
392  name : `str`
393  Brief name of subtask or metadata item.
394 
395  Returns
396  -------
397  fullName : `str`
398  The full name: the ``name`` argument prefixed by the full task name
399  and a period.
400 
401  Notes
402  -----
403  For example: if the full name of this task is "top.sub.sub2"
404  then ``_computeFullName("subname")`` returns
405  ``"top.sub.sub2.subname"``.
406  """
407  return f"{self._fullName}.{name}"
408 
409  @staticmethod
410  def _unpickle_via_factory(factory, args, kwargs):
411  """Unpickle something by calling a factory
412 
413  Allows subclasses to unpickle using `__reduce__` with keyword
414  arguments as well as positional arguments.
415  """
416  return factory(*args, **kwargs)
417 
418  def _reduce_kwargs(self):
419  """Returns a dict of the keyword arguments that should be used
420  by `__reduce__`.
421 
422  Subclasses with additional arguments should always call the parent
423  class method to ensure that the standard parameters are included.
424 
425  Returns
426  -------
427  kwargs : `dict`
428  Keyword arguments to be used when pickling.
429  """
430  return dict(config=self.configconfig, name=self._name_name, parentTask=self._parentTask_parentTask,)
431 
432  def __reduce__(self):
433  """Pickler.
434  """
435  return self._unpickle_via_factory_unpickle_via_factory, (self.__class__, [], self._reduce_kwargs_reduce_kwargs())
std::vector< SchemaItem< Flag > > * items
Class for storing ordered metadata with comments.
Definition: PropertyList.h:68
Class for storing generic metadata.
Definition: PropertySet.h:67
def _reduce_kwargs(self)
Definition: task.py:418
def getTaskDict(self)
Definition: task.py:288
def makeField(cls, doc)
Definition: task.py:359
def emptyMetadata(self)
Definition: task.py:166
def _unpickle_via_factory(factory, args, kwargs)
Definition: task.py:410
def getSchemaCatalogs(self)
Definition: task.py:172
def getFullName(self)
Definition: task.py:256
def makeSubtask(self, name, **keyArgs)
Definition: task.py:299
def __init__(self, config=None, name=None, parentTask=None, log=None)
Definition: task.py:133
def __reduce__(self)
Definition: task.py:432
def getName(self)
Definition: task.py:274
def getAllSchemaCatalogs(self)
Definition: task.py:204
def timer(self, name, logLevel=Log.DEBUG)
Definition: task.py:327
def getFullMetadata(self)
Definition: task.py:229
Definition: Log.h:706
def logInfo(obj, prefix, logLevel=Log.DEBUG)
Definition: timer.py:63