LSSTApplications  20.0.0
LSSTDataManagementBasePackage
task.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2016 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 __all__ = ["Task", "TaskError"]
23 
24 import contextlib
25 
26 import lsstDebug
27 from lsst.pex.config import ConfigurableField
28 from lsst.log import Log
29 import lsst.daf.base as dafBase
30 from .timer import logInfo
31 
32 
33 class TaskError(Exception):
34  """Use to report errors for which a traceback is not useful.
35 
36  Notes
37  -----
38  Examples of such errors:
39 
40  - processCcd is asked to run detection, but not calibration, and no calexp is found.
41  - coadd finds no valid images in the specified patch.
42  """
43  pass
44 
45 
46 class Task:
47  r"""Base class for data processing tasks.
48 
49  See :ref:`task-framework-overview` to learn what tasks are, and :ref:`creating-a-task` for more
50  information about writing tasks.
51 
52  Parameters
53  ----------
54  config : `Task.ConfigClass` instance, optional
55  Configuration for this task (an instance of Task.ConfigClass, which is a task-specific subclass of
56  `lsst.pex.config.Config`, or `None`. If `None`:
57 
58  - If parentTask specified then defaults to parentTask.config.<name>
59  - If parentTask is None then defaults to self.ConfigClass()
60 
61  name : `str`, optional
62  Brief name of task, or `None`; if `None` then defaults to `Task._DefaultName`
63  parentTask : `Task`-type, optional
64  The parent task of this subtask, if any.
65 
66  - If `None` (a top-level task) then you must specify config and name is ignored.
67  - If not `None` (a subtask) then you must specify name.
68  log : `lsst.log.Log`, optional
69  Log whose name is used as a log name prefix, or `None` for no prefix. Ignored if is parentTask
70  specified, in which case ``parentTask.log``\ 's name is used as a prefix. The task's log name is
71  ``prefix + "." + name`` if a prefix exists, else ``name``. The task's log is then a child logger of
72  ``parentTask.log`` (if ``parentTask`` specified), or a child logger of the log from the argument
73  (if ``log`` is not `None`).
74 
75  Raises
76  ------
77  RuntimeError
78  Raised under these circumstances:
79 
80  - If ``parentTask`` is `None` and ``config`` is `None`.
81  - If ``parentTask`` is not `None` and ``name`` is `None`.
82  - If ``name`` is `None` and ``_DefaultName`` does not exist.
83 
84  Notes
85  -----
86  Useful attributes include:
87 
88  - ``log``: an lsst.log.Log
89  - ``config``: task-specific configuration; an instance of ``ConfigClass`` (see below).
90  - ``metadata``: an `lsst.daf.base.PropertyList` for collecting task-specific metadata,
91  e.g. data quality and performance metrics. This is data that is only meant to be
92  persisted, never to be used by the task.
93 
94  Subclasses typically have a method named ``runDataRef`` to perform the main data processing. Details:
95 
96  - ``runDataRef`` should process the minimum reasonable amount of data, typically a single CCD.
97  Iteration, if desired, is performed by a caller of the method. This is good design and allows
98  multiprocessing without the run method having to support it directly.
99  - If ``runDataRef`` can persist or unpersist data:
100 
101  - ``runDataRef`` should accept a butler data reference (or a collection of data references,
102  if appropriate, e.g. coaddition).
103  - There should be a way to run the task without persisting data. Typically the run method returns all
104  data, even if it is persisted, and the task's config method offers a flag to disable persistence.
105 
106  **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not* accept a blob such as a butler
107  data reference. How we will handle data references is still TBD, so don't make changes yet!
108  RHL 2014-06-27
109 
110  Subclasses must also have an attribute ``ConfigClass`` that is a subclass of `lsst.pex.config.Config`
111  which configures the task. Subclasses should also have an attribute ``_DefaultName``:
112  the default name if there is no parent task. ``_DefaultName`` is required for subclasses of
113  `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task because it simplifies construction
114  (e.g. for unit tests).
115 
116  Tasks intended to be run from the command line should be subclasses of `~lsst.pipe.base.CmdLineTask`
117  not Task.
118  """
119 
120  def __init__(self, config=None, name=None, parentTask=None, log=None):
122  self._parentTask = parentTask
123 
124  if parentTask is not None:
125  if name is None:
126  raise RuntimeError("name is required for a subtask")
127  self._name = name
128  self._fullName = parentTask._computeFullName(name)
129  if config is None:
130  config = getattr(parentTask.config, name)
131  self._taskDict = parentTask._taskDict
132  loggerName = parentTask.log.getName() + '.' + name
133  else:
134  if name is None:
135  name = getattr(self, "_DefaultName", None)
136  if name is None:
137  raise RuntimeError("name is required for a task unless it has attribute _DefaultName")
138  name = self._DefaultName
139  self._name = name
140  self._fullName = self._name
141  if config is None:
142  config = self.ConfigClass()
143  self._taskDict = dict()
144  loggerName = self._fullName
145  if log is not None and log.getName():
146  loggerName = log.getName() + '.' + loggerName
147 
148  self.log = Log.getLogger(loggerName)
149  self.config = config
150  self._display = lsstDebug.Info(self.__module__).display
151  self._taskDict[self._fullName] = self
152 
153  def emptyMetadata(self):
154  """Empty (clear) the metadata for this Task and all sub-Tasks.
155  """
156  for subtask in self._taskDict.values():
157  subtask.metadata = dafBase.PropertyList()
158 
159  def getSchemaCatalogs(self):
160  """Get the schemas generated by this task.
161 
162  Returns
163  -------
164  schemaCatalogs : `dict`
165  Keys are butler dataset type, values are an empty catalog (an instance of the appropriate
166  `lsst.afw.table` Catalog type) for this task.
167 
168  Notes
169  -----
170 
171  .. warning::
172 
173  Subclasses that use schemas must override this method. The default implemenation returns
174  an empty dict.
175 
176  This method may be called at any time after the Task is constructed, which means that all task
177  schemas should be computed at construction time, *not* when data is actually processed. This
178  reflects the philosophy that the schema should not depend on the data.
179 
180  Returning catalogs rather than just schemas allows us to save e.g. slots for SourceCatalog as well.
181 
182  See also
183  --------
184  Task.getAllSchemaCatalogs
185  """
186  return {}
187 
189  """Get schema catalogs for all tasks in the hierarchy, combining the results into a single dict.
190 
191  Returns
192  -------
193  schemacatalogs : `dict`
194  Keys are butler dataset type, values are a empty catalog (an instance of the appropriate
195  lsst.afw.table Catalog type) for all tasks in the hierarchy, from the top-level task down
196  through all subtasks.
197 
198  Notes
199  -----
200  This method may be called on any task in the hierarchy; it will return the same answer, regardless.
201 
202  The default implementation should always suffice. If your subtask uses schemas the override
203  `Task.getSchemaCatalogs`, not this method.
204  """
205  schemaDict = self.getSchemaCatalogs()
206  for subtask in self._taskDict.values():
207  schemaDict.update(subtask.getSchemaCatalogs())
208  return schemaDict
209 
210  def getFullMetadata(self):
211  """Get metadata for all tasks.
212 
213  Returns
214  -------
215  metadata : `lsst.daf.base.PropertySet`
216  The `~lsst.daf.base.PropertySet` keys are the full task name. Values are metadata
217  for the top-level task and all subtasks, sub-subtasks, etc..
218 
219  Notes
220  -----
221  The returned metadata includes timing information (if ``@timer.timeMethod`` is used)
222  and any metadata set by the task. The name of each item consists of the full task name
223  with ``.`` replaced by ``:``, followed by ``.`` and the name of the item, e.g.::
224 
225  topLevelTaskName:subtaskName:subsubtaskName.itemName
226 
227  using ``:`` in the full task name disambiguates the rare situation that a task has a subtask
228  and a metadata item with the same name.
229  """
230  fullMetadata = dafBase.PropertySet()
231  for fullName, task in self.getTaskDict().items():
232  fullMetadata.set(fullName.replace(".", ":"), task.metadata)
233  return fullMetadata
234 
235  def getFullName(self):
236  """Get the task name as a hierarchical name including parent task names.
237 
238  Returns
239  -------
240  fullName : `str`
241  The full name consists of the name of the parent task and each subtask separated by periods.
242  For example:
243 
244  - The full name of top-level task "top" is simply "top".
245  - The full name of subtask "sub" of top-level task "top" is "top.sub".
246  - The full name of subtask "sub2" of subtask "sub" of top-level task "top" is "top.sub.sub2".
247  """
248  return self._fullName
249 
250  def getName(self):
251  """Get the name of the task.
252 
253  Returns
254  -------
255  taskName : `str`
256  Name of the task.
257 
258  See also
259  --------
260  getFullName
261  """
262  return self._name
263 
264  def getTaskDict(self):
265  """Get a dictionary of all tasks as a shallow copy.
266 
267  Returns
268  -------
269  taskDict : `dict`
270  Dictionary containing full task name: task object for the top-level task and all subtasks,
271  sub-subtasks, etc..
272  """
273  return self._taskDict.copy()
274 
275  def makeSubtask(self, name, **keyArgs):
276  """Create a subtask as a new instance as the ``name`` attribute of this task.
277 
278  Parameters
279  ----------
280  name : `str`
281  Brief name of the subtask.
282  keyArgs
283  Extra keyword arguments used to construct the task. The following arguments are automatically
284  provided and cannot be overridden:
285 
286  - "config".
287  - "parentTask".
288 
289  Notes
290  -----
291  The subtask must be defined by ``Task.config.name``, an instance of pex_config ConfigurableField
292  or RegistryField.
293  """
294  taskField = getattr(self.config, name, None)
295  if taskField is None:
296  raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}")
297  subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
298  setattr(self, name, subtask)
299 
300  @contextlib.contextmanager
301  def timer(self, name, logLevel=Log.DEBUG):
302  """Context manager to log performance data for an arbitrary block of code.
303 
304  Parameters
305  ----------
306  name : `str`
307  Name of code being timed; data will be logged using item name: ``Start`` and ``End``.
308  logLevel
309  A `lsst.log` level constant.
310 
311  Examples
312  --------
313  Creating a timer context::
314 
315  with self.timer("someCodeToTime"):
316  pass # code to time
317 
318  See also
319  --------
320  timer.logInfo
321  """
322  logInfo(obj=self, prefix=name + "Start", logLevel=logLevel)
323  try:
324  yield
325  finally:
326  logInfo(obj=self, prefix=name + "End", logLevel=logLevel)
327 
328  @classmethod
329  def makeField(cls, doc):
330  """Make a `lsst.pex.config.ConfigurableField` for this task.
331 
332  Parameters
333  ----------
334  doc : `str`
335  Help text for the field.
336 
337  Returns
338  -------
339  configurableField : `lsst.pex.config.ConfigurableField`
340  A `~ConfigurableField` for this task.
341 
342  Examples
343  --------
344  Provides a convenient way to specify this task is a subtask of another task.
345 
346  Here is an example of use::
347 
348  class OtherTaskConfig(lsst.pex.config.Config)
349  aSubtask = ATaskClass.makeField("a brief description of what this task does")
350  """
351  return ConfigurableField(doc=doc, target=cls)
352 
353  def _computeFullName(self, name):
354  """Compute the full name of a subtask or metadata item, given its brief name.
355 
356  Parameters
357  ----------
358  name : `str`
359  Brief name of subtask or metadata item.
360 
361  Returns
362  -------
363  fullName : `str`
364  The full name: the ``name`` argument prefixed by the full task name and a period.
365 
366  Notes
367  -----
368  For example: if the full name of this task is "top.sub.sub2"
369  then ``_computeFullName("subname")`` returns ``"top.sub.sub2.subname"``.
370  """
371  return f"{self._fullName}.{name}"
372 
373  def __reduce__(self):
374  """Pickler.
375  """
376  return self.__class__, (self.config, self._name, self._parentTask, None)
lsst.pipe.base.task.Task.getName
def getName(self)
Definition: task.py:250
lsst.pipe.base.task.Task._parentTask
_parentTask
Definition: task.py:122
lsst::daf::base::PropertyList
Class for storing ordered metadata with comments.
Definition: PropertyList.h:68
lsst.pipe.base.task.Task.getAllSchemaCatalogs
def getAllSchemaCatalogs(self)
Definition: task.py:188
lsst.pipe.base.task.Task._fullName
_fullName
Definition: task.py:128
lsst.pipe.base.task.Task.getTaskDict
def getTaskDict(self)
Definition: task.py:264
lsst.pipe.base.task.Task.makeSubtask
def makeSubtask(self, name, **keyArgs)
Definition: task.py:275
lsst.pipe.base.task.Task.__reduce__
def __reduce__(self)
Definition: task.py:373
lsstDebug.Info
Definition: lsstDebug.py:28
lsst.pipe.base.task.Task.config
config
Definition: task.py:149
lsst.pipe.base.task.TaskError
Definition: task.py:33
lsst.pipe.base.task.Task.log
log
Definition: task.py:148
lsst::log
Definition: Log.h:706
lsst.pipe.base.timer.logInfo
def logInfo(obj, prefix, logLevel=Log.DEBUG)
Definition: timer.py:62
lsst.pipe.base.task.Task.__init__
def __init__(self, config=None, name=None, parentTask=None, log=None)
Definition: task.py:120
lsst.pipe.base.task.Task.getFullName
def getFullName(self)
Definition: task.py:235
lsst.pipe.base.task.Task._display
_display
Definition: task.py:150
lsst.pipe.base.task.Task.getFullMetadata
def getFullMetadata(self)
Definition: task.py:210
lsst.pipe.base.task.Task.getSchemaCatalogs
def getSchemaCatalogs(self)
Definition: task.py:159
lsst::daf::base
Definition: Utils.h:47
items
std::vector< SchemaItem< Flag > > * items
Definition: BaseColumnView.cc:142
lsst.pipe.base.task.Task
Definition: task.py:46
lsst::daf::base::PropertySet
Class for storing generic metadata.
Definition: PropertySet.h:67
lsst.pipe.base.task.Task.emptyMetadata
def emptyMetadata(self)
Definition: task.py:153
lsst.pipe.base.task.Task._taskDict
_taskDict
Definition: task.py:131
lsst.pipe.base.task.Task.timer
def timer(self, name, logLevel=Log.DEBUG)
Definition: task.py:301
lsst.pipe.base.task.Task._name
_name
Definition: task.py:127
lsst.pipe.base.task.Task.makeField
def makeField(cls, doc)
Definition: task.py:329
lsst.pipe.base.task.Task.metadata
metadata
Definition: task.py:121