LSSTApplications  11.0-13-gbb96280,12.1+18,12.1+7,12.1-1-g14f38d3+72,12.1-1-g16c0db7+5,12.1-1-g5961e7a+84,12.1-1-ge22e12b+23,12.1-11-g06625e2+4,12.1-11-g0d7f63b+4,12.1-19-gd507bfc,12.1-2-g7dda0ab+38,12.1-2-gc0bc6ab+81,12.1-21-g6ffe579+2,12.1-21-gbdb6c2a+4,12.1-24-g941c398+5,12.1-3-g57f6835+7,12.1-3-gf0736f3,12.1-37-g3ddd237,12.1-4-gf46015e+5,12.1-5-g06c326c+20,12.1-5-g648ee80+3,12.1-5-gc2189d7+4,12.1-6-ga608fc0+1,12.1-7-g3349e2a+5,12.1-7-gfd75620+9,12.1-9-g577b946+5,12.1-9-gc4df26a+10
LSSTDataManagementBasePackage
argumentParser.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division, print_function
23 import abc
24 import argparse
25 import collections
26 import fnmatch
27 import itertools
28 import os
29 import re
30 import shlex
31 import sys
32 import shutil
33 import textwrap
34 
35 from builtins import zip
36 from builtins import str
37 from builtins import range
38 from builtins import object
39 
40 import lsst.utils
41 import lsst.pex.config as pexConfig
43 import lsst.log as lsstLog
44 import lsst.daf.persistence as dafPersist
45 from future.utils import with_metaclass
46 
47 __all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
48  "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
49 
50 DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
51 DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
52 DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
53 
54 
55 def _fixPath(defName, path):
56  """!Apply environment variable as default root, if present, and abspath
57 
58  @param[in] defName name of environment variable containing default root path;
59  if the environment variable does not exist then the path is relative
60  to the current working directory
61  @param[in] path path relative to default root path
62  @return abspath: path that has been expanded, or None if the environment variable does not exist
63  and path is None
64  """
65  defRoot = os.environ.get(defName)
66  if defRoot is None:
67  if path is None:
68  return None
69  return os.path.abspath(path)
70  return os.path.abspath(os.path.join(defRoot, path or ""))
71 
72 
73 class DataIdContainer(object):
74  """!A container for data IDs and associated data references
75 
76  Override for data IDs that require special handling to be converted to data references,
77  and specify the override class as ContainerClass for add_id_argument.
78  (If you don't want the argument parser to compute data references, you may use this class
79  and specify doMakeDataRefList=False in add_id_argument.)
80  """
81 
82  def __init__(self, level=None):
83  """!Construct a DataIdContainer"""
84  self.datasetType = None # the actual dataset type, as specified on the command line (if dynamic)
85  self.level = level
86  self.idList = []
87  self.refList = []
88 
89  def setDatasetType(self, datasetType):
90  """!Set actual dataset type, once it is known"""
91  self.datasetType = datasetType
92 
93  def castDataIds(self, butler):
94  """!Validate data IDs and cast them to the correct type (modify idList in place).
95 
96  @param[in] butler data butler (a \ref lsst.daf.persistence.butler.Butler
97  "lsst.daf.persistence.Butler")
98  """
99  if self.datasetType is None:
100  raise RuntimeError("Must call setDatasetType first")
101  try:
102  idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
103  except KeyError:
104  raise KeyError("Cannot get keys for datasetType %s at level %s" % (self.datasetType, self.level))
105 
106  for dataDict in self.idList:
107  for key, strVal in dataDict.items():
108  try:
109  keyType = idKeyTypeDict[key]
110  except KeyError:
111  validKeys = sorted(idKeyTypeDict.keys())
112  raise KeyError("Unrecognized ID key %r; valid keys are: %s" % (key, validKeys))
113  if keyType != str:
114  try:
115  castVal = keyType(strVal)
116  except Exception:
117  raise TypeError("Cannot cast value %r to %s for ID key %r" % (strVal, keyType, key,))
118  dataDict[key] = castVal
119 
120  def makeDataRefList(self, namespace):
121  """!Compute refList based on idList
122 
123  Not called if add_id_argument called with doMakeDataRefList=False
124 
125  @param[in] namespace results of parsing command-line (with 'butler' and 'log' elements)
126  """
127  if self.datasetType is None:
128  raise RuntimeError("Must call setDatasetType first")
129  butler = namespace.butler
130  for dataId in self.idList:
131  refList = list(butler.subset(datasetType=self.datasetType, level=self.level, dataId=dataId))
132  # exclude nonexistent data
133  # this is a recursive test, e.g. for the sake of "raw" data
134  refList = [dr for dr in refList if dataExists(butler=butler, datasetType=self.datasetType,
135  dataRef=dr)]
136  if not refList:
137  namespace.log.warn("No data found for dataId=%s", dataId)
138  continue
139  self.refList += refList
140 
141 
142 class DataIdArgument(object):
143  """!Glorified struct for data about id arguments, used by ArgumentParser.add_id_argument"""
144 
145  def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
146  """!Constructor
147 
148  @param[in] name name of identifier (argument name without dashes)
149  @param[in] datasetType type of dataset; specify a string for a fixed dataset type
150  or a DatasetArgument for a dynamic dataset type (e.g. one specified by a command-line argument)
151  @param[in] level level of dataset, for butler
152  @param[in] doMakeDataRefList construct data references?
153  @param[in] ContainerClass class to contain data IDs and data references;
154  the default class will work for many kinds of data, but you may have to override
155  to compute some kinds of data references.
156  """
157  if name.startswith("-"):
158  raise RuntimeError("Name %s must not start with -" % (name,))
159  self.name = name
160  self.datasetType = datasetType
161  self.level = level
162  self.doMakeDataRefList = bool(doMakeDataRefList)
163  self.ContainerClass = ContainerClass
164  self.argName = name.lstrip("-")
165 
166  @property
168  """!Is the dataset type dynamic (specified on the command line)?"""
169  return isinstance(self.datasetType, DynamicDatasetType)
170 
171  def getDatasetType(self, namespace):
172  """!Return the dataset type as a string
173 
174  @param[in] namespace parsed command
175  """
176  if self.isDynamicDatasetType:
177  return self.datasetType.getDatasetType(namespace)
178  else:
179  return self.datasetType
180 
181 
182 class DynamicDatasetType(with_metaclass(abc.ABCMeta, object)):
183  """!Abstract base class for a dataset type determined from parsed command-line arguments
184  """
185 
186  def addArgument(self, parser, idName):
187  """!Add a command-line argument to specify dataset type name, if wanted
188 
189  @param[in] parser argument parser to which to add argument
190  @param[in] idName name of data ID argument, without the leading "--", e.g. "id"
191 
192  The default implementation does nothing
193  """
194  pass
195 
196  @abc.abstractmethod
197  def getDatasetType(self, namespace):
198  """Return the dataset type as a string, based on parsed command-line arguments
199 
200  @param[in] namespace parsed command
201  """
202  raise NotImplementedError("Subclasses must override")
203 
204 
206  """!A dataset type specified by a command-line argument.
207  """
208 
209  def __init__(self,
210  name=None,
211  help="dataset type to process from input data repository",
212  default=None,
213  ):
214  """!Construct a DatasetArgument
215 
216  @param[in] name name of command-line argument (including leading "--", if appropriate)
217  whose value is the dataset type; if None, uses --idName_dstype
218  where idName is the name of the data ID argument (e.g. "id")
219  @param[in] help help string for the command-line argument
220  @param[in] default default value; if None, then the command-line option is required;
221  ignored if the argument is positional (name does not start with "-")
222  because positional argument do not support default values
223  """
224  DynamicDatasetType.__init__(self)
225  self.name = name
226  self.help = help
227  self.default = default
228 
229  def getDatasetType(self, namespace):
230  """Return the dataset type as a string, from the appropriate command-line argument
231 
232  @param[in] namespace parsed command
233  """
234  argName = self.name.lstrip("-")
235  return getattr(namespace, argName)
236 
237  def addArgument(self, parser, idName):
238  """!Add a command-line argument to specify dataset type name
239 
240  Also set self.name if it is None
241  """
242  help = self.help if self.help else "dataset type for %s" % (idName,)
243  if self.name is None:
244  self.name = "--%s_dstype" % (idName,)
245  requiredDict = dict()
246  if self.name.startswith("-"):
247  requiredDict = dict(required=self.default is None)
248  parser.add_argument(
249  self.name,
250  default=self.default,
251  help=help,
252  **requiredDict) # cannot specify required=None for positional arguments
253 
254 
256  """!A dataset type specified by a config parameter
257  """
258 
259  def __init__(self, name):
260  """!Construct a ConfigDatasetType
261 
262  @param[in] name name of config option whose value is the dataset type
263  """
264  DynamicDatasetType.__init__(self)
265  self.name = name
266 
267  def getDatasetType(self, namespace):
268  """Return the dataset type as a string, from the appropriate config field
269 
270  @param[in] namespace parsed command
271  """
272  # getattr does not work reliably if the config field name is dotted,
273  # so step through one level at a time
274  keyList = self.name.split(".")
275  value = namespace.config
276  for key in keyList:
277  try:
278  value = getattr(value, key)
279  except KeyError:
280  raise RuntimeError("Cannot find config parameter %r" % (self.name,))
281  return value
282 
283 
284 class ArgumentParser(argparse.ArgumentParser):
285  """!An argument parser for pipeline tasks that is based on argparse.ArgumentParser
286 
287  Users may wish to add additional arguments before calling parse_args.
288 
289  @note
290  - I would prefer to check data ID keys and values as they are parsed,
291  but the required information comes from the butler, so I have to construct a butler
292  before I do this checking. Constructing a butler is slow, so I only want do it once,
293  after parsing the command line, so as to catch syntax errors quickly.
294  """
295  requireOutput = True # Require an output directory to be specified?
296 
297  def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
298  """!Construct an ArgumentParser
299 
300  @param[in] name name of top-level task; used to identify camera-specific override files
301  @param[in] usage usage string
302  @param[in] **kwargs additional keyword arguments for argparse.ArgumentParser
303  """
304  self._name = name
305  self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
306  argparse.ArgumentParser.__init__(self,
307  usage=usage,
308  fromfile_prefix_chars='@',
309  epilog=textwrap.dedent("""Notes:
310  * --config, --configfile, --id, --loglevel and @file may appear multiple times;
311  all values are used, in order left to right
312  * @file reads command-line options from the specified file:
313  * data may be distributed among multiple lines (e.g. one option per line)
314  * data after # is treated as a comment and ignored
315  * blank lines and lines starting with # are ignored
316  * To specify multiple values for an option, do not use = after the option name:
317  * right: --configfile foo bar
318  * wrong: --configfile=foo bar
319  """),
320  formatter_class=argparse.RawDescriptionHelpFormatter,
321  **kwargs)
322  self.add_argument(metavar='input', dest="rawInput",
323  help="path to input data repository, relative to $%s" % (DEFAULT_INPUT_NAME,))
324  self.add_argument("--calib", dest="rawCalib",
325  help="path to input calibration repository, relative to $%s" %
326  (DEFAULT_CALIB_NAME,))
327  self.add_argument("--output", dest="rawOutput",
328  help="path to output data repository (need not exist), relative to $%s" %
329  (DEFAULT_OUTPUT_NAME,))
330  self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
331  help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
332  "optionally sets ROOT to ROOT/rerun/INPUT")
333  self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
334  help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
335  self.add_argument("-C", "--configfile", dest="configfile", nargs="*", action=ConfigFileAction,
336  help="config override file(s)")
337  self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
338  help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
339  metavar="LEVEL|COMPONENT=LEVEL")
340  self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
341  self.add_argument("--debug", action="store_true", help="enable debugging output?")
342  self.add_argument("--doraise", action="store_true",
343  help="raise an exception on error (else log a message and continue)?")
344  self.add_argument("--profile", help="Dump cProfile statistics to filename")
345  self.add_argument("--show", nargs="+", default=(),
346  help="display the specified information to stdout and quit "
347  "(unless run is specified).")
348  self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
349  self.add_argument("-t", "--timeout", type=float,
350  help="Timeout for multiprocessing; maximum wall time (sec)")
351  self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
352  help=("remove and re-create the output directory if it already exists "
353  "(safe with -j, but not all other forms of parallel execution)"))
354  self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
355  help=("backup and then overwrite existing config files instead of checking them "
356  "(safe with -j, but not all other forms of parallel execution)"))
357  self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
358  help="Don't copy config to file~N backup.")
359  self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
360  help=("backup and then overwrite existing package versions instead of checking"
361  "them (safe with -j, but not all other forms of parallel execution)"))
362  self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
363  help="don't check package versions; useful for development")
364  lsstLog.configure_prop("""
365 log4j.rootLogger=INFO, A1
366 log4j.appender.A1=ConsoleAppender
367 log4j.appender.A1.Target=System.err
368 log4j.appender.A1.layout=PatternLayout
369 log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
370 """)
371 
372  def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
373  ContainerClass=DataIdContainer):
374  """!Add a data ID argument
375 
376  Add an argument to specify data IDs. If datasetType is an instance of DatasetArgument,
377  then add a second argument to specify the dataset type.
378 
379  @param[in] name data ID argument (including leading dashes, if wanted)
380  @param[in] datasetType type of dataset; supply a string for a fixed dataset type,
381  or a DynamicDatasetType, such as DatasetArgument, for a dynamically determined dataset type
382  @param[in] help help string for the argument
383  @param[in] level level of dataset, for butler
384  @param[in] doMakeDataRefList construct data references?
385  @param[in] ContainerClass data ID container class to use to contain results;
386  override the default if you need a special means of computing data references from data IDs
387 
388  The associated data is put into namespace.<dataIdArgument.name> as an instance of ContainerClass;
389  the container includes fields:
390  - idList: a list of data ID dicts
391  - refList: a list of butler data references (empty if doMakeDataRefList false)
392  """
393  argName = name.lstrip("-")
394 
395  if argName in self._dataIdArgDict:
396  raise RuntimeError("Data ID argument %s already exists" % (name,))
397  if argName in set(("camera", "config", "butler", "log", "obsPkg")):
398  raise RuntimeError("Data ID argument %s is a reserved name" % (name,))
399 
400  self.add_argument(name, nargs="*", action=IdValueAction, help=help,
401  metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
402 
403  dataIdArgument = DataIdArgument(
404  name=argName,
405  datasetType=datasetType,
406  level=level,
407  doMakeDataRefList=doMakeDataRefList,
408  ContainerClass=ContainerClass,
409  )
410 
411  if dataIdArgument.isDynamicDatasetType:
412  datasetType.addArgument(parser=self, idName=argName)
413 
414  self._dataIdArgDict[argName] = dataIdArgument
415 
416  def parse_args(self, config, args=None, log=None, override=None):
417  """!Parse arguments for a pipeline task
418 
419  @param[in,out] config config for the task being run
420  @param[in] args argument list; if None use sys.argv[1:]
421  @param[in] log log (instance lsst.log Log); if None use the default log
422  @param[in] override a config override function; it must take the root config object
423  as its only argument and must modify the config in place.
424  This function is called after camera-specific overrides files are applied, and before
425  command-line config overrides are applied (thus allowing the user the final word).
426 
427  @return namespace: an argparse.Namespace containing many useful fields including:
428  - camera: camera name
429  - config: the supplied config with all overrides applied, validated and frozen
430  - butler: a butler for the data
431  - an entry for each of the data ID arguments registered by add_id_argument(),
432  the value of which is a DataIdArgument that includes public elements 'idList' and 'refList'
433  - log: a lsst.log Log
434  - an entry for each command-line argument, with the following exceptions:
435  - config is the supplied config, suitably updated
436  - configfile, id and loglevel are all missing
437  - obsPkg: name of obs_ package for this camera
438  """
439  if args is None:
440  args = sys.argv[1:]
441 
442  if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
443  self.print_help()
444  if len(args) == 1 and args[0] in ("-h", "--help"):
445  self.exit()
446  else:
447  self.exit("%s: error: Must specify input as first argument" % self.prog)
448 
449  # Note that --rerun may change namespace.input, but if it does we verify that the
450  # new input has the same mapper class.
451  namespace = argparse.Namespace()
452  namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
453  if not os.path.isdir(namespace.input):
454  self.error("Error: input=%r not found" % (namespace.input,))
455 
456  namespace.config = config
457  namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
458  mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
459  namespace.camera = mapperClass.getCameraName()
460  namespace.obsPkg = mapperClass.getPackageName()
461 
462  self.handleCamera(namespace)
463 
464  self._applyInitialOverrides(namespace)
465  if override is not None:
466  override(namespace.config)
467 
468  # Add data ID containers to namespace
469  for dataIdArgument in self._dataIdArgDict.values():
470  setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
471 
472  namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
473  del namespace.configfile
474 
475  self._parseDirectories(namespace)
476 
477  if namespace.clobberOutput:
478  if namespace.output is None:
479  self.error("--clobber-output is only valid with --output or --rerun")
480  elif namespace.output == namespace.input:
481  self.error("--clobber-output is not valid when the output and input repos are the same")
482  if os.path.exists(namespace.output):
483  namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
484  shutil.rmtree(namespace.output)
485 
486  namespace.log.debug("input=%s", namespace.input)
487  namespace.log.debug("calib=%s", namespace.calib)
488  namespace.log.debug("output=%s", namespace.output)
489 
490  obeyShowArgument(namespace.show, namespace.config, exit=False)
491 
492  # No environment variable or --output or --rerun specified.
493  if self.requireOutput and namespace.output is None and namespace.rerun is None:
494  self.error("no output directory specified.\n"
495  "An output directory must be specified with the --output or --rerun\n"
496  "command-line arguments.\n")
497 
498  namespace.butler = dafPersist.Butler(
499  root=namespace.input,
500  calibRoot=namespace.calib,
501  outputRoot=namespace.output,
502  )
503 
504  # convert data in each of the identifier lists to proper types
505  # this is done after constructing the butler, hence after parsing the command line,
506  # because it takes a long time to construct a butler
507  self._processDataIds(namespace)
508  if "data" in namespace.show:
509  for dataIdName in self._dataIdArgDict.keys():
510  for dataRef in getattr(namespace, dataIdName).refList:
511  print("%s dataRef.dataId = %s" % (dataIdName, dataRef.dataId))
512 
513  if namespace.show and "run" not in namespace.show:
514  sys.exit(0)
515 
516  if namespace.debug:
517  try:
518  import debug
519  assert debug # silence pyflakes
520  except ImportError:
521  sys.stderr.write("Warning: no 'debug' module found\n")
522  namespace.debug = False
523 
524  del namespace.loglevel
525 
526  if namespace.longlog:
527  lsstLog.configure_prop("""
528 log4j.rootLogger=INFO, A1
529 log4j.appender.A1=ConsoleAppender
530 log4j.appender.A1.Target=System.err
531 log4j.appender.A1.layout=PatternLayout
532 log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddThh:mm:ss.sss} %c (%X{LABEL})(%F:%L)- %m%n
533 """)
534  del namespace.longlog
535 
536  namespace.config.validate()
537  namespace.config.freeze()
538 
539  return namespace
540 
541  def _parseDirectories(self, namespace):
542  """Parse input, output and calib directories
543 
544  This allows for hacking the directories, e.g., to include a "rerun".
545  Modifications are made to the 'namespace' object in-place.
546  """
547  mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
548  namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
549 
550  # If an output directory is specified, process it and assign it to the namespace
551  if namespace.rawOutput:
552  namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
553  else:
554  namespace.output = None
555 
556  # This section processes the rerun argument, if rerun is specified as a colon separated
557  # value, it will be parsed as an input and output. The input value will be overridden if
558  # previously specified (but a check is made to make sure both inputs use the same mapper)
559  if namespace.rawRerun:
560  if namespace.output:
561  self.error("Error: cannot specify both --output and --rerun")
562  namespace.rerun = namespace.rawRerun.split(":")
563  rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
564  modifiedInput = False
565  if len(rerunDir) == 2:
566  namespace.input, namespace.output = rerunDir
567  modifiedInput = True
568  elif len(rerunDir) == 1:
569  namespace.output = rerunDir[0]
570  if os.path.exists(os.path.join(namespace.output, "_parent")):
571  namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
572  modifiedInput = True
573  else:
574  self.error("Error: invalid argument for --rerun: %s" % namespace.rerun)
575  if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
576  self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
577  else:
578  namespace.rerun = None
579  del namespace.rawInput
580  del namespace.rawCalib
581  del namespace.rawOutput
582  del namespace.rawRerun
583 
584  def _processDataIds(self, namespace):
585  """!Process the parsed data for each data ID argument
586 
587  Processing includes:
588  - Validate data ID keys
589  - Cast the data ID values to the correct type
590  - Compute data references from data IDs
591 
592  @param[in,out] namespace parsed namespace (an argparse.Namespace);
593  reads these attributes:
594  - butler
595  - log
596  - config, if any dynamic dataset types are set by a config parameter
597  - dataset type arguments (e.g. id_dstype), if any dynamic dataset types are specified by such
598  and modifies these attributes:
599  - <name> for each data ID argument registered using add_id_argument
600  """
601  for dataIdArgument in self._dataIdArgDict.values():
602  dataIdContainer = getattr(namespace, dataIdArgument.name)
603  dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
604  if dataIdArgument.doMakeDataRefList:
605  try:
606  dataIdContainer.castDataIds(butler=namespace.butler)
607  except (KeyError, TypeError) as e:
608  # failure of castDataIds indicates invalid command args
609  self.error(e)
610 
611  # failure of makeDataRefList indicates a bug that wants a traceback
612  dataIdContainer.makeDataRefList(namespace)
613 
614  def _applyInitialOverrides(self, namespace):
615  """!Apply obs-package-specific and camera-specific config override files, if found
616 
617  @param[in] namespace parsed namespace (an argparse.Namespace);
618  reads these attributes:
619  - obsPkg
620 
621  Look in the package namespace.obsPkg for files:
622  - config/<task_name>.py
623  - config/<camera_name>/<task_name>.py
624  and load if found
625  """
626  obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
627  fileName = self._name + ".py"
628  for filePath in (
629  os.path.join(obsPkgDir, "config", fileName),
630  os.path.join(obsPkgDir, "config", namespace.camera, fileName),
631  ):
632  if os.path.exists(filePath):
633  namespace.log.info("Loading config overrride file %r", filePath)
634  namespace.config.load(filePath)
635  else:
636  namespace.log.debug("Config override file does not exist: %r", filePath)
637 
638  def handleCamera(self, namespace):
639  """!Perform camera-specific operations before parsing the command line.
640 
641  The default implementation does nothing.
642 
643  @param[in,out] namespace namespace (an argparse.Namespace) with the following fields:
644  - camera: the camera name
645  - config: the config passed to parse_args, with no overrides applied
646  - obsPkg: the obs_ package for this camera
647  - log: a lsst.log Log
648  """
649  pass
650 
651  def convert_arg_line_to_args(self, arg_line):
652  """!Allow files of arguments referenced by `@<path>` to contain multiple values on each line
653 
654  @param[in] arg_line line of text read from an argument file
655  """
656  arg_line = arg_line.strip()
657  if not arg_line or arg_line.startswith("#"):
658  return
659  for arg in shlex.split(arg_line, comments=True, posix=True):
660  if not arg.strip():
661  continue
662  yield arg
663 
664 
666  """An ArgumentParser for pipeline tasks that don't write any output"""
667  requireOutput = False # We're not going to write anything
668 
669 
670 def getTaskDict(config, taskDict=None, baseName=""):
671  """!Get a dictionary of task info for all subtasks in a config
672 
673  Designed to be called recursively; the user should call with only a config
674  (leaving taskDict and baseName at their default values).
675 
676  @param[in] config configuration to process, an instance of lsst.pex.config.Config
677  @param[in,out] taskDict users should not specify this argument;
678  (supports recursion; if provided, taskDict is updated in place, else a new dict is started)
679  @param[in] baseName users should not specify this argument.
680  (supports recursion: if a non-empty string then a period is appended and the result is used
681  as a prefix for additional entries in taskDict; otherwise no prefix is used)
682  @return taskDict: a dict of config field name: task name
683  """
684  if taskDict is None:
685  taskDict = dict()
686  for fieldName, field in config.items():
687  if hasattr(field, "value") and hasattr(field, "target"):
688  subConfig = field.value
689  if isinstance(subConfig, pexConfig.Config):
690  subBaseName = "%s.%s" % (baseName, fieldName) if baseName else fieldName
691  try:
692  taskName = "%s.%s" % (field.target.__module__, field.target.__name__)
693  except Exception:
694  taskName = repr(field.target)
695  taskDict[subBaseName] = taskName
696  getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
697  return taskDict
698 
699 
700 def obeyShowArgument(showOpts, config=None, exit=False):
701  """!Process arguments specified with --show (but ignores "data")
702 
703  @param showOpts List of options passed to --show
704  @param config The provided config
705  @param exit Exit if "run" isn't included in showOpts
706 
707  Supports the following options in showOpts:
708  - config[=PAT] Dump all the config entries, or just the ones that match the glob pattern
709  - history=PAT Show where the config entries that match the glob pattern were set
710  - tasks Show task hierarchy
711  - data Ignored; to be processed by caller
712  - run Keep going (the default behaviour is to exit if --show is specified)
713 
714  Calls sys.exit(1) if any other option found.
715  """
716  if not showOpts:
717  return
718 
719  for what in showOpts:
720  showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
721 
722  if showCommand == "config":
723  matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
724  pattern = matConfig.group(1)
725  if pattern:
726  class FilteredStream(object):
727  """A file object that only prints lines that match the glob "pattern"
728 
729  N.b. Newlines are silently discarded and reinserted; crude but effective.
730  """
731 
732  def __init__(self, pattern):
733  # obey case if pattern isn't lowecase or requests NOIGNORECASE
734  mat = re.search(r"(.*):NOIGNORECASE$", pattern)
735 
736  if mat:
737  pattern = mat.group(1)
738  self._pattern = re.compile(fnmatch.translate(pattern))
739  else:
740  if pattern != pattern.lower():
741  print(u"Matching \"%s\" without regard to case "
742  "(append :NOIGNORECASE to prevent this)" % (pattern,), file=sys.stdout)
743  self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
744 
745  def write(self, showStr):
746  showStr = showStr.rstrip()
747  # Strip off doc string line(s) and cut off at "=" for string matching
748  matchStr = showStr.split("\n")[-1].split("=")[0]
749  if self._pattern.search(matchStr):
750  print(u"\n" + showStr)
751 
752  fd = FilteredStream(pattern)
753  else:
754  fd = sys.stdout
755 
756  config.saveToStream(fd, "config")
757  elif showCommand == "history":
758  matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
759  pattern = matHistory.group(1)
760  if not pattern:
761  print("Please provide a value with --show history (e.g. history=XXX)", file=sys.stderr)
762  sys.exit(1)
763 
764  pattern = pattern.split(".")
765  cpath, cname = pattern[:-1], pattern[-1]
766  hconfig = config # the config that we're interested in
767  for i, cpt in enumerate(cpath):
768  try:
769  hconfig = getattr(hconfig, cpt)
770  except AttributeError:
771  print("Error: configuration %s has no subconfig %s" %
772  (".".join(["config"] + cpath[:i]), cpt), file=sys.stderr)
773 
774  sys.exit(1)
775 
776  try:
777  print(pexConfig.history.format(hconfig, cname))
778  except KeyError:
779  print("Error: %s has no field %s" % (".".join(["config"] + cpath), cname), file=sys.stderr)
780  sys.exit(1)
781 
782  elif showCommand == "data":
783  pass
784  elif showCommand == "run":
785  pass
786  elif showCommand == "tasks":
787  showTaskHierarchy(config)
788  else:
789  print(u"Unknown value for show: %s (choose from '%s')" %
790  (what, "', '".join("config[=XXX] data history=XXX tasks run".split())), file=sys.stderr)
791  sys.exit(1)
792 
793  if exit and "run" not in showOpts:
794  sys.exit(0)
795 
796 
797 def showTaskHierarchy(config):
798  """!Print task hierarchy to stdout
799 
800  @param[in] config: configuration to process (an lsst.pex.config.Config)
801  """
802  print(u"Subtasks:")
803  taskDict = getTaskDict(config=config)
804 
805  fieldNameList = sorted(taskDict.keys())
806  for fieldName in fieldNameList:
807  taskName = taskDict[fieldName]
808  print(u"%s: %s" % (fieldName, taskName))
809 
810 
811 class ConfigValueAction(argparse.Action):
812  """!argparse action callback to override config parameters using name=value pairs from the command line
813  """
814 
815  def __call__(self, parser, namespace, values, option_string):
816  """!Override one or more config name value pairs
817 
818  @param[in] parser argument parser (instance of ArgumentParser)
819  @param[in,out] namespace parsed command (an instance of argparse.Namespace);
820  updated values:
821  - namespace.config
822  @param[in] values a list of configItemName=value pairs
823  @param[in] option_string option value specified by the user (a str)
824  """
825  if namespace.config is None:
826  return
827  for nameValue in values:
828  name, sep, valueStr = nameValue.partition("=")
829  if not valueStr:
830  parser.error("%s value %s must be in form name=value" % (option_string, nameValue))
831 
832  # see if setting the string value works; if not, try eval
833  try:
834  setDottedAttr(namespace.config, name, valueStr)
835  except AttributeError:
836  parser.error("no config field: %s" % (name,))
837  except Exception:
838  try:
839  value = eval(valueStr, {})
840  except Exception:
841  parser.error("cannot parse %r as a value for %s" % (valueStr, name))
842  try:
843  setDottedAttr(namespace.config, name, value)
844  except Exception as e:
845  parser.error("cannot set config.%s=%r: %s" % (name, value, e))
846 
847 
848 class ConfigFileAction(argparse.Action):
849  """!argparse action to load config overrides from one or more files
850  """
851 
852  def __call__(self, parser, namespace, values, option_string=None):
853  """!Load one or more files of config overrides
854 
855  @param[in] parser argument parser (instance of ArgumentParser)
856  @param[in,out] namespace parsed command (an instance of argparse.Namespace);
857  updated values:
858  - namespace.config
859  @param[in] values a list of data config file paths
860  @param[in] option_string option value specified by the user (a str)
861  """
862  if namespace.config is None:
863  return
864  for configfile in values:
865  try:
866  namespace.config.load(configfile)
867  except Exception as e:
868  parser.error("cannot load config file %r: %s" % (configfile, e))
869 
870 
871 class IdValueAction(argparse.Action):
872  """!argparse action callback to process a data ID into a dict
873  """
874 
875  def __call__(self, parser, namespace, values, option_string):
876  """!Parse --id data and append results to namespace.<argument>.idList
877 
878  @param[in] parser argument parser (instance of ArgumentParser)
879  @param[in,out] namespace parsed command (an instance of argparse.Namespace);
880  updated values:
881  - <idName>.idList, where <idName> is the name of the ID argument,
882  for instance "id" for ID argument --id
883  @param[in] values a list of data IDs; see data format below
884  @param[in] option_string option value specified by the user (a str)
885 
886  The data format is:
887  key1=value1_1[^value1_2[^value1_3...] key2=value2_1[^value2_2[^value2_3...]...
888 
889  The values (e.g. value1_1) may either be a string, or of the form "int..int" (e.g. "1..3")
890  which is interpreted as "1^2^3" (inclusive, unlike a python range). So "0^2..4^7..9" is
891  equivalent to "0^2^3^4^7^8^9". You may also specify a stride: "1..5:2" is "1^3^5"
892 
893  The cross product is computed for keys with multiple values. For example:
894  --id visit 1^2 ccd 1,1^2,2
895  results in the following data ID dicts being appended to namespace.<argument>.idList:
896  {"visit":1, "ccd":"1,1"}
897  {"visit":2, "ccd":"1,1"}
898  {"visit":1, "ccd":"2,2"}
899  {"visit":2, "ccd":"2,2"}
900  """
901  if namespace.config is None:
902  return
903  idDict = collections.OrderedDict()
904  for nameValue in values:
905  name, sep, valueStr = nameValue.partition("=")
906  if name in idDict:
907  parser.error("%s appears multiple times in one ID argument: %s" % (name, option_string))
908  idDict[name] = []
909  for v in valueStr.split("^"):
910  mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
911  if mat:
912  v1 = int(mat.group(1))
913  v2 = int(mat.group(2))
914  v3 = mat.group(3)
915  v3 = int(v3) if v3 else 1
916  for v in range(v1, v2 + 1, v3):
917  idDict[name].append(str(v))
918  else:
919  idDict[name].append(v)
920 
921  iterList = [idDict[key] for key in idDict.keys()]
922  idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
923  for valList in itertools.product(*iterList)]
924 
925  argName = option_string.lstrip("-")
926  ident = getattr(namespace, argName)
927  ident.idList += idDictList
928 
929 
930 class LogLevelAction(argparse.Action):
931  """!argparse action to set log level
932  """
933 
934  def __call__(self, parser, namespace, values, option_string):
935  """!Set trace level
936 
937  @param[in] parser argument parser (instance of ArgumentParser)
938  @param[in] namespace parsed command (an instance of argparse.Namespace); ignored
939  @param[in] values a list of trace levels;
940  each item must be of the form 'component_name=level' or 'level',
941  where level is a keyword (not case sensitive) or an integer
942  @param[in] option_string option value specified by the user (a str)
943  """
944  permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
945  permittedLevelSet = set(permittedLevelList)
946  for componentLevel in values:
947  component, sep, levelStr = componentLevel.partition("=")
948  if not levelStr:
949  levelStr, component = component, None
950  logLevelUpr = levelStr.upper()
951  if logLevelUpr in permittedLevelSet:
952  logLevel = getattr(lsstLog.Log, logLevelUpr)
953  else:
954  parser.error("loglevel=%r not one of %s" % (levelStr, permittedLevelList))
955  if component is None:
956  namespace.log.setLevel(logLevel)
957  else:
958  lsstLog.Log.getLogger(component).setLevel(logLevel)
959 
960 
961 def setDottedAttr(item, name, value):
962  """!Like setattr, but accepts hierarchical names, e.g. foo.bar.baz
963 
964  @param[in,out] item object whose attribute is to be set
965  @param[in] name name of item to set
966  @param[in] value new value for the item
967 
968  For example if name is foo.bar.baz then item.foo.bar.baz is set to the specified value.
969  """
970  subitem = item
971  subnameList = name.split(".")
972  for subname in subnameList[:-1]:
973  subitem = getattr(subitem, subname)
974  setattr(subitem, subnameList[-1], value)
975 
976 
977 def getDottedAttr(item, name):
978  """!Like getattr, but accepts hierarchical names, e.g. foo.bar.baz
979 
980  @param[in] item object whose attribute is to be returned
981  @param[in] name name of item to get
982 
983  For example if name is foo.bar.baz then returns item.foo.bar.baz
984  """
985  subitem = item
986  for subname in name.split("."):
987  subitem = getattr(subitem, subname)
988  return subitem
989 
990 
991 def dataExists(butler, datasetType, dataRef):
992  """!Return True if data exists at the current level or any data exists at a deeper level, False otherwise
993 
994  @param[in] butler data butler (a \ref lsst.daf.persistence.butler.Butler
995  "lsst.daf.persistence.Butler")
996  @param[in] datasetType dataset type (a str)
997  @param[in] dataRef butler data reference (a \ref lsst.daf.persistence.butlerSubset.ButlerDataRef
998  "lsst.daf.persistence.ButlerDataRef")
999  """
1000  subDRList = dataRef.subItems()
1001  if subDRList:
1002  for subDR in subDRList:
1003  if dataExists(butler, datasetType, subDR):
1004  return True
1005  return False
1006  else:
1007  return butler.datasetExists(datasetType=datasetType, dataId=dataRef.dataId)
def setDatasetType
Set actual dataset type, once it is known.
def castDataIds
Validate data IDs and cast them to the correct type (modify idList in place).
argparse action to set log level
An argument parser for pipeline tasks that is based on argparse.ArgumentParser.
def dataExists
Return True if data exists at the current level or any data exists at a deeper level, False otherwise.
A dataset type specified by a command-line argument.
def showTaskHierarchy
Print task hierarchy to stdout.
Glorified struct for data about id arguments, used by ArgumentParser.add_id_argument.
def setDottedAttr
Like setattr, but accepts hierarchical names, e.g.
def __call__
Parse –id data and append results to namespace.
argparse action callback to override config parameters using name=value pairs from the command line ...
def getTaskDict
Get a dictionary of task info for all subtasks in a config.
std::string getPackageDir(std::string const &packageName)
return the root directory of a setup package
Definition: Utils.cc:34
def _applyInitialOverrides
Apply obs-package-specific and camera-specific config override files, if found.
def makeDataRefList
Compute refList based on idList.
def __init__
Construct a DatasetArgument.
def convert_arg_line_to_args
Allow files of arguments referenced by @&lt;path&gt; to contain multiple values on each line...
def getDottedAttr
Like getattr, but accepts hierarchical names, e.g.
def addArgument
Add a command-line argument to specify dataset type name.
def __call__
Load one or more files of config overrides.
def isDynamicDatasetType
Is the dataset type dynamic (specified on the command line)?
def __call__
Override one or more config name value pairs.
argparse action callback to process a data ID into a dict
def __init__
Construct a ConfigDatasetType.
def setLevel
Definition: log.py:71
def handleCamera
Perform camera-specific operations before parsing the command line.
def parse_args
Parse arguments for a pipeline task.
def _processDataIds
Process the parsed data for each data ID argument.
A dataset type specified by a config parameter.
def __init__
Construct an ArgumentParser.
def obeyShowArgument
Process arguments specified with –show (but ignores &quot;data&quot;)
Abstract base class for a dataset type determined from parsed command-line arguments.
def _fixPath
Apply environment variable as default root, if present, and abspath.
def addArgument
Add a command-line argument to specify dataset type name, if wanted.
argparse action to load config overrides from one or more files
def getDatasetType
Return the dataset type as a string.
def __init__
Construct a DataIdContainer.
A container for data IDs and associated data references.