LSSTApplications  11.0-13-gbb96280,12.1.rc1,12.1.rc1+1,12.1.rc1+2,12.1.rc1+5,12.1.rc1+8,12.1.rc1-1-g06d7636+1,12.1.rc1-1-g253890b+5,12.1.rc1-1-g3d31b68+7,12.1.rc1-1-g3db6b75+1,12.1.rc1-1-g5c1385a+3,12.1.rc1-1-g83b2247,12.1.rc1-1-g90cb4cf+6,12.1.rc1-1-g91da24b+3,12.1.rc1-2-g3521f8a,12.1.rc1-2-g39433dd+4,12.1.rc1-2-g486411b+2,12.1.rc1-2-g4c2be76,12.1.rc1-2-gc9c0491,12.1.rc1-2-gda2cd4f+6,12.1.rc1-3-g3391c73+2,12.1.rc1-3-g8c1bd6c+1,12.1.rc1-3-gcf4b6cb+2,12.1.rc1-4-g057223e+1,12.1.rc1-4-g19ed13b+2,12.1.rc1-4-g30492a7
LSSTDataManagementBasePackage
argumentParser.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division, print_function
23 import abc
24 import argparse
25 import collections
26 import fnmatch
27 import itertools
28 import os
29 import re
30 import shlex
31 import sys
32 import shutil
33 import textwrap
34 
35 from builtins import zip
36 from builtins import str
37 from builtins import range
38 from builtins import object
39 
40 
41 import lsst.utils
42 import lsst.pex.config as pexConfig
43 import lsst.log as lsstLog
44 import lsst.daf.persistence as dafPersist
45 from future.utils import with_metaclass
46 
47 __all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
48  "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
49 
50 DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
51 DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
52 DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
53 
54 
55 def _fixPath(defName, path):
56  """!Apply environment variable as default root, if present, and abspath
57 
58  @param[in] defName name of environment variable containing default root path;
59  if the environment variable does not exist then the path is relative
60  to the current working directory
61  @param[in] path path relative to default root path
62  @return abspath: path that has been expanded, or None if the environment variable does not exist
63  and path is None
64  """
65  defRoot = os.environ.get(defName)
66  if defRoot is None:
67  if path is None:
68  return None
69  return os.path.abspath(path)
70  return os.path.abspath(os.path.join(defRoot, path or ""))
71 
72 
73 class DataIdContainer(object):
74  """!A container for data IDs and associated data references
75 
76  Override for data IDs that require special handling to be converted to data references,
77  and specify the override class as ContainerClass for add_id_argument.
78  (If you don't want the argument parser to compute data references, you may use this class
79  and specify doMakeDataRefList=False in add_id_argument.)
80  """
81 
82  def __init__(self, level=None):
83  """!Construct a DataIdContainer"""
84  self.datasetType = None # the actual dataset type, as specified on the command line (if dynamic)
85  self.level = level
86  self.idList = []
87  self.refList = []
88 
89  def setDatasetType(self, datasetType):
90  """!Set actual dataset type, once it is known"""
91  self.datasetType = datasetType
92 
93  def castDataIds(self, butler):
94  """!Validate data IDs and cast them to the correct type (modify idList in place).
95 
96  @param[in] butler data butler (a \ref lsst.daf.persistence.butler.Butler
97  "lsst.daf.persistence.Butler")
98  """
99  if self.datasetType is None:
100  raise RuntimeError("Must call setDatasetType first")
101  try:
102  idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
103  except KeyError:
104  raise KeyError("Cannot get keys for datasetType %s at level %s" % (self.datasetType, self.level))
105 
106  for dataDict in self.idList:
107  for key, strVal in dataDict.items():
108  try:
109  keyType = idKeyTypeDict[key]
110  except KeyError:
111  validKeys = sorted(idKeyTypeDict.keys())
112  raise KeyError("Unrecognized ID key %r; valid keys are: %s" % (key, validKeys))
113  if keyType != str:
114  try:
115  castVal = keyType(strVal)
116  except Exception:
117  raise TypeError("Cannot cast value %r to %s for ID key %r" % (strVal, keyType, key,))
118  dataDict[key] = castVal
119 
120  def makeDataRefList(self, namespace):
121  """!Compute refList based on idList
122 
123  Not called if add_id_argument called with doMakeDataRefList=False
124 
125  @param[in] namespace results of parsing command-line (with 'butler' and 'log' elements)
126  """
127  if self.datasetType is None:
128  raise RuntimeError("Must call setDatasetType first")
129  butler = namespace.butler
130  for dataId in self.idList:
131  refList = list(butler.subset(datasetType=self.datasetType, level=self.level, dataId=dataId))
132  # exclude nonexistent data
133  # this is a recursive test, e.g. for the sake of "raw" data
134  refList = [dr for dr in refList if dataExists(butler=butler, datasetType=self.datasetType,
135  dataRef=dr)]
136  if not refList:
137  namespace.log.warn("No data found for dataId=%s" % (dataId,))
138  continue
139  self.refList += refList
140 
141 
142 class DataIdArgument(object):
143  """!Glorified struct for data about id arguments, used by ArgumentParser.add_id_argument"""
144 
145  def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
146  """!Constructor
147 
148  @param[in] name name of identifier (argument name without dashes)
149  @param[in] datasetType type of dataset; specify a string for a fixed dataset type
150  or a DatasetArgument for a dynamic dataset type (e.g. one specified by a command-line argument)
151  @param[in] level level of dataset, for butler
152  @param[in] doMakeDataRefList construct data references?
153  @param[in] ContainerClass class to contain data IDs and data references;
154  the default class will work for many kinds of data, but you may have to override
155  to compute some kinds of data references.
156  """
157  if name.startswith("-"):
158  raise RuntimeError("Name %s must not start with -" % (name,))
159  self.name = name
160  self.datasetType = datasetType
161  self.level = level
162  self.doMakeDataRefList = bool(doMakeDataRefList)
163  self.ContainerClass = ContainerClass
164  self.argName = name.lstrip("-")
165 
166  @property
168  """!Is the dataset type dynamic (specified on the command line)?"""
169  return isinstance(self.datasetType, DynamicDatasetType)
170 
171  def getDatasetType(self, namespace):
172  """!Return the dataset type as a string
173 
174  @param[in] namespace parsed command
175  """
176  if self.isDynamicDatasetType:
177  return self.datasetType.getDatasetType(namespace)
178  else:
179  return self.datasetType
180 
181 
182 class DynamicDatasetType(with_metaclass(abc.ABCMeta, object)):
183  """!Abstract base class for a dataset type determined from parsed command-line arguments
184  """
185 
186  def addArgument(self, parser, idName):
187  """!Add a command-line argument to specify dataset type name, if wanted
188 
189  @param[in] parser argument parser to which to add argument
190  @param[in] idName name of data ID argument, without the leading "--", e.g. "id"
191 
192  The default implementation does nothing
193  """
194  pass
195 
196  @abc.abstractmethod
197  def getDatasetType(self, namespace):
198  """Return the dataset type as a string, based on parsed command-line arguments
199 
200  @param[in] namespace parsed command
201  """
202  raise NotImplementedError("Subclasses must override")
203 
204 
206  """!A dataset type specified by a command-line argument.
207  """
208 
209  def __init__(self,
210  name=None,
211  help="dataset type to process from input data repository",
212  default=None,
213  ):
214  """!Construct a DatasetArgument
215 
216  @param[in] name name of command-line argument (including leading "--", if appropriate)
217  whose value is the dataset type; if None, uses --idName_dstype
218  where idName is the name of the data ID argument (e.g. "id")
219  @param[in] help help string for the command-line argument
220  @param[in] default default value; if None, then the command-line option is required;
221  ignored if the argument is positional (name does not start with "-")
222  because positional argument do not support default values
223  """
224  DynamicDatasetType.__init__(self)
225  self.name = name
226  self.help = help
227  self.default = default
228 
229  def getDatasetType(self, namespace):
230  """Return the dataset type as a string, from the appropriate command-line argument
231 
232  @param[in] namespace parsed command
233  """
234  argName = self.name.lstrip("-")
235  return getattr(namespace, argName)
236 
237  def addArgument(self, parser, idName):
238  """!Add a command-line argument to specify dataset type name
239 
240  Also set self.name if it is None
241  """
242  help = self.help if self.help else "dataset type for %s" % (idName,)
243  if self.name is None:
244  self.name = "--%s_dstype" % (idName,)
245  requiredDict = dict()
246  if self.name.startswith("-"):
247  requiredDict = dict(required=self.default is None)
248  parser.add_argument(
249  self.name,
250  default=self.default,
251  help=help,
252  **requiredDict) # cannot specify required=None for positional arguments
253 
254 
256  """!A dataset type specified by a config parameter
257  """
258 
259  def __init__(self, name):
260  """!Construct a ConfigDatasetType
261 
262  @param[in] name name of config option whose value is the dataset type
263  """
264  DynamicDatasetType.__init__(self)
265  self.name = name
266 
267  def getDatasetType(self, namespace):
268  """Return the dataset type as a string, from the appropriate config field
269 
270  @param[in] namespace parsed command
271  """
272  # getattr does not work reliably if the config field name is dotted,
273  # so step through one level at a time
274  keyList = self.name.split(".")
275  value = namespace.config
276  for key in keyList:
277  try:
278  value = getattr(value, key)
279  except KeyError:
280  raise RuntimeError("Cannot find config parameter %r" % (self.name,))
281  return value
282 
283 
284 class ArgumentParser(argparse.ArgumentParser):
285  """!An argument parser for pipeline tasks that is based on argparse.ArgumentParser
286 
287  Users may wish to add additional arguments before calling parse_args.
288 
289  @note
290  - I would prefer to check data ID keys and values as they are parsed,
291  but the required information comes from the butler, so I have to construct a butler
292  before I do this checking. Constructing a butler is slow, so I only want do it once,
293  after parsing the command line, so as to catch syntax errors quickly.
294  """
295  requireOutput = True # Require an output directory to be specified?
296 
297  def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
298  """!Construct an ArgumentParser
299 
300  @param[in] name name of top-level task; used to identify camera-specific override files
301  @param[in] usage usage string
302  @param[in] **kwargs additional keyword arguments for argparse.ArgumentParser
303  """
304  self._name = name
305  self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
306  argparse.ArgumentParser.__init__(self,
307  usage=usage,
308  fromfile_prefix_chars='@',
309  epilog=textwrap.dedent("""Notes:
310  * --config, --configfile, --id, --loglevel and @file may appear multiple times;
311  all values are used, in order left to right
312  * @file reads command-line options from the specified file:
313  * data may be distributed among multiple lines (e.g. one option per line)
314  * data after # is treated as a comment and ignored
315  * blank lines and lines starting with # are ignored
316  * To specify multiple values for an option, do not use = after the option name:
317  * right: --configfile foo bar
318  * wrong: --configfile=foo bar
319  """),
320  formatter_class=argparse.RawDescriptionHelpFormatter,
321  **kwargs)
322  self.add_argument(metavar='input', dest="rawInput",
323  help="path to input data repository, relative to $%s" % (DEFAULT_INPUT_NAME,))
324  self.add_argument("--calib", dest="rawCalib",
325  help="path to input calibration repository, relative to $%s" %
326  (DEFAULT_CALIB_NAME,))
327  self.add_argument("--output", dest="rawOutput",
328  help="path to output data repository (need not exist), relative to $%s" %
329  (DEFAULT_OUTPUT_NAME,))
330  self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
331  help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
332  "optionally sets ROOT to ROOT/rerun/INPUT")
333  self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
334  help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
335  self.add_argument("-C", "--configfile", dest="configfile", nargs="*", action=ConfigFileAction,
336  help="config override file(s)")
337  self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
338  help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
339  metavar="LEVEL|COMPONENT=LEVEL")
340  self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
341  self.add_argument("--debug", action="store_true", help="enable debugging output?")
342  self.add_argument("--doraise", action="store_true",
343  help="raise an exception on error (else log a message and continue)?")
344  self.add_argument("--profile", help="Dump cProfile statistics to filename")
345  self.add_argument("--show", nargs="+", default=(),
346  help="display the specified information to stdout and quit "
347  "(unless run is specified).")
348  self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
349  self.add_argument("-t", "--timeout", type=float,
350  help="Timeout for multiprocessing; maximum wall time (sec)")
351  self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
352  help=("remove and re-create the output directory if it already exists "
353  "(safe with -j, but not all other forms of parallel execution)"))
354  self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
355  help=("backup and then overwrite existing config files instead of checking them "
356  "(safe with -j, but not all other forms of parallel execution)"))
357  self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
358  help="Don't copy config to file~N backup.")
359  self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
360  help=("backup and then overwrite existing package versions instead of checking"
361  "them (safe with -j, but not all other forms of parallel execution)"))
362  self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
363  help="don't check package versions; useful for development")
364  lsstLog.configure_prop("""
365 log4j.rootLogger=INFO, A1
366 log4j.appender.A1=ConsoleAppender
367 log4j.appender.A1.Target=System.err
368 log4j.appender.A1.layout=PatternLayout
369 log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
370 """)
371 
372  def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
373  ContainerClass=DataIdContainer):
374  """!Add a data ID argument
375 
376  Add an argument to specify data IDs. If datasetType is an instance of DatasetArgument,
377  then add a second argument to specify the dataset type.
378 
379  @param[in] name data ID argument (including leading dashes, if wanted)
380  @param[in] datasetType type of dataset; supply a string for a fixed dataset type,
381  or a DynamicDatasetType, such as DatasetArgument, for a dynamically determined dataset type
382  @param[in] help help string for the argument
383  @param[in] level level of dataset, for butler
384  @param[in] doMakeDataRefList construct data references?
385  @param[in] ContainerClass data ID container class to use to contain results;
386  override the default if you need a special means of computing data references from data IDs
387 
388  The associated data is put into namespace.<dataIdArgument.name> as an instance of ContainerClass;
389  the container includes fields:
390  - idList: a list of data ID dicts
391  - refList: a list of butler data references (empty if doMakeDataRefList false)
392  """
393  argName = name.lstrip("-")
394 
395  if argName in self._dataIdArgDict:
396  raise RuntimeError("Data ID argument %s already exists" % (name,))
397  if argName in set(("camera", "config", "butler", "log", "obsPkg")):
398  raise RuntimeError("Data ID argument %s is a reserved name" % (name,))
399 
400  self.add_argument(name, nargs="*", action=IdValueAction, help=help,
401  metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
402 
403  dataIdArgument = DataIdArgument(
404  name=argName,
405  datasetType=datasetType,
406  level=level,
407  doMakeDataRefList=doMakeDataRefList,
408  ContainerClass=ContainerClass,
409  )
410 
411  if dataIdArgument.isDynamicDatasetType:
412  datasetType.addArgument(parser=self, idName=argName)
413 
414  self._dataIdArgDict[argName] = dataIdArgument
415 
416  def parse_args(self, config, args=None, log=None, override=None):
417  """!Parse arguments for a pipeline task
418 
419  @param[in,out] config config for the task being run
420  @param[in] args argument list; if None use sys.argv[1:]
421  @param[in] log log (instance lsst.log Log); if None use the default log
422  @param[in] override a config override function; it must take the root config object
423  as its only argument and must modify the config in place.
424  This function is called after camera-specific overrides files are applied, and before
425  command-line config overrides are applied (thus allowing the user the final word).
426 
427  @return namespace: an argparse.Namespace containing many useful fields including:
428  - camera: camera name
429  - config: the supplied config with all overrides applied, validated and frozen
430  - butler: a butler for the data
431  - an entry for each of the data ID arguments registered by add_id_argument(),
432  the value of which is a DataIdArgument that includes public elements 'idList' and 'refList'
433  - log: a lsst.log Log
434  - an entry for each command-line argument, with the following exceptions:
435  - config is the supplied config, suitably updated
436  - configfile, id and loglevel are all missing
437  - obsPkg: name of obs_ package for this camera
438  """
439  if args is None:
440  args = sys.argv[1:]
441 
442  if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
443  self.print_help()
444  if len(args) == 1 and args[0] in ("-h", "--help"):
445  self.exit()
446  else:
447  self.exit("%s: error: Must specify input as first argument" % self.prog)
448 
449  # Note that --rerun may change namespace.input, but if it does we verify that the
450  # new input has the same mapper class.
451  namespace = argparse.Namespace()
452  namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
453  if not os.path.isdir(namespace.input):
454  self.error("Error: input=%r not found" % (namespace.input,))
455 
456  namespace.config = config
457  namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
458  mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
459  namespace.camera = mapperClass.getCameraName()
460  namespace.obsPkg = mapperClass.getPackageName()
461 
462  self.handleCamera(namespace)
463 
464  self._applyInitialOverrides(namespace)
465  if override is not None:
466  override(namespace.config)
467 
468  # Add data ID containers to namespace
469  for dataIdArgument in self._dataIdArgDict.values():
470  setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
471 
472  namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
473  del namespace.configfile
474 
475  self._parseDirectories(namespace)
476 
477  if namespace.clobberOutput:
478  if namespace.output is None:
479  self.error("--clobber-output is only valid with --output or --rerun")
480  elif namespace.output == namespace.input:
481  self.error("--clobber-output is not valid when the output and input repos are the same")
482  if os.path.exists(namespace.output):
483  namespace.log.info("Removing output repo %s for --clobber-output" % namespace.output)
484  shutil.rmtree(namespace.output)
485 
486  namespace.log.info("input=%s" % (namespace.input,))
487  namespace.log.info("calib=%s" % (namespace.calib,))
488  namespace.log.info("output=%s" % (namespace.output,))
489 
490  obeyShowArgument(namespace.show, namespace.config, exit=False)
491 
492  # No environment variable or --output or --rerun specified.
493  if self.requireOutput and namespace.output is None and namespace.rerun is None:
494  self.error("no output directory specified.\n"
495  "An output directory must be specified with the --output or --rerun\n"
496  "command-line arguments.\n")
497 
498  namespace.butler = dafPersist.Butler(
499  root=namespace.input,
500  calibRoot=namespace.calib,
501  outputRoot=namespace.output,
502  )
503 
504  # convert data in each of the identifier lists to proper types
505  # this is done after constructing the butler, hence after parsing the command line,
506  # because it takes a long time to construct a butler
507  self._processDataIds(namespace)
508  if "data" in namespace.show:
509  for dataIdName in self._dataIdArgDict.keys():
510  for dataRef in getattr(namespace, dataIdName).refList:
511  print("%s dataRef.dataId = %s" % (dataIdName, dataRef.dataId))
512 
513  if namespace.show and "run" not in namespace.show:
514  sys.exit(0)
515 
516  if namespace.debug:
517  try:
518  import debug
519  assert debug # silence pyflakes
520  except ImportError:
521  sys.stderr.write("Warning: no 'debug' module found\n")
522  namespace.debug = False
523 
524  del namespace.loglevel
525 
526  if namespace.longlog:
527  lsstLog.configure_prop("""
528 log4j.rootLogger=INFO, A1
529 log4j.appender.A1=ConsoleAppender
530 log4j.appender.A1.Target=System.err
531 log4j.appender.A1.layout=PatternLayout
532 log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddThh:mm:ss.sss} %c (%X{LABEL})(%F:%L)- %m%n
533 """)
534  del namespace.longlog
535 
536  namespace.config.validate()
537  namespace.config.freeze()
538 
539  return namespace
540 
541  def _parseDirectories(self, namespace):
542  """Parse input, output and calib directories
543 
544  This allows for hacking the directories, e.g., to include a "rerun".
545  Modifications are made to the 'namespace' object in-place.
546  """
547  mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
548  namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
549 
550  # If an output directory is specified, process it and assign it to the namespace
551  if namespace.rawOutput:
552  namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
553  else:
554  namespace.output = None
555 
556  # This section processes the rerun argument, if rerun is specified as a colon separated
557  # value, it will be parsed as an input and output. The input value will be overridden if
558  # previously specified (but a check is made to make sure both inputs use the same mapper)
559  if namespace.rawRerun:
560  if namespace.output:
561  self.error("Error: cannot specify both --output and --rerun")
562  namespace.rerun = namespace.rawRerun.split(":")
563  rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
564  modifiedInput = False
565  if len(rerunDir) == 2:
566  namespace.input, namespace.output = rerunDir
567  modifiedInput = True
568  elif len(rerunDir) == 1:
569  namespace.output = rerunDir[0]
570  if os.path.exists(namespace.output):
571  namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
572  modifiedInput = True
573  else:
574  self.error("Error: invalid argument for --rerun: %s" % namespace.rerun)
575  if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
576  self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
577  else:
578  namespace.rerun = None
579  del namespace.rawInput
580  del namespace.rawCalib
581  del namespace.rawOutput
582  del namespace.rawRerun
583 
584  def _processDataIds(self, namespace):
585  """!Process the parsed data for each data ID argument
586 
587  Processing includes:
588  - Validate data ID keys
589  - Cast the data ID values to the correct type
590  - Compute data references from data IDs
591 
592  @param[in,out] namespace parsed namespace (an argparse.Namespace);
593  reads these attributes:
594  - butler
595  - log
596  - config, if any dynamic dataset types are set by a config parameter
597  - dataset type arguments (e.g. id_dstype), if any dynamic dataset types are specified by such
598  and modifies these attributes:
599  - <name> for each data ID argument registered using add_id_argument
600  """
601  for dataIdArgument in self._dataIdArgDict.values():
602  dataIdContainer = getattr(namespace, dataIdArgument.name)
603  dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
604  if dataIdArgument.doMakeDataRefList:
605  try:
606  dataIdContainer.castDataIds(butler=namespace.butler)
607  except (KeyError, TypeError) as e:
608  # failure of castDataIds indicates invalid command args
609  self.error(e)
610 
611  # failure of makeDataRefList indicates a bug that wants a traceback
612  dataIdContainer.makeDataRefList(namespace)
613 
614  def _applyInitialOverrides(self, namespace):
615  """!Apply obs-package-specific and camera-specific config override files, if found
616 
617  @param[in] namespace parsed namespace (an argparse.Namespace);
618  reads these attributes:
619  - obsPkg
620 
621  Look in the package namespace.obsPkg for files:
622  - config/<task_name>.py
623  - config/<camera_name>/<task_name>.py
624  and load if found
625  """
626  obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
627  fileName = self._name + ".py"
628  for filePath in (
629  os.path.join(obsPkgDir, "config", fileName),
630  os.path.join(obsPkgDir, "config", namespace.camera, fileName),
631  ):
632  if os.path.exists(filePath):
633  namespace.log.info("Loading config overrride file %r" % (filePath,))
634  namespace.config.load(filePath)
635  else:
636  namespace.log.info("Config override file does not exist: %r" % (filePath,))
637 
638  def handleCamera(self, namespace):
639  """!Perform camera-specific operations before parsing the command line.
640 
641  The default implementation does nothing.
642 
643  @param[in,out] namespace namespace (an argparse.Namespace) with the following fields:
644  - camera: the camera name
645  - config: the config passed to parse_args, with no overrides applied
646  - obsPkg: the obs_ package for this camera
647  - log: a lsst.log Log
648  """
649  pass
650 
651  def convert_arg_line_to_args(self, arg_line):
652  """!Allow files of arguments referenced by `@<path>` to contain multiple values on each line
653 
654  @param[in] arg_line line of text read from an argument file
655  """
656  arg_line = arg_line.strip()
657  if not arg_line or arg_line.startswith("#"):
658  return
659  for arg in shlex.split(arg_line, comments=True, posix=True):
660  if not arg.strip():
661  continue
662  yield arg
663 
664 
666  """An ArgumentParser for pipeline tasks that don't write any output"""
667  requireOutput = False # We're not going to write anything
668 
669 
670 def getTaskDict(config, taskDict=None, baseName=""):
671  """!Get a dictionary of task info for all subtasks in a config
672 
673  Designed to be called recursively; the user should call with only a config
674  (leaving taskDict and baseName at their default values).
675 
676  @param[in] config configuration to process, an instance of lsst.pex.config.Config
677  @param[in,out] taskDict users should not specify this argument;
678  (supports recursion; if provided, taskDict is updated in place, else a new dict is started)
679  @param[in] baseName users should not specify this argument.
680  (supports recursion: if a non-empty string then a period is appended and the result is used
681  as a prefix for additional entries in taskDict; otherwise no prefix is used)
682  @return taskDict: a dict of config field name: task name
683  """
684  if taskDict is None:
685  taskDict = dict()
686  for fieldName, field in config.items():
687  if hasattr(field, "value") and hasattr(field, "target"):
688  subConfig = field.value
689  if isinstance(subConfig, pexConfig.Config):
690  subBaseName = "%s.%s" % (baseName, fieldName) if baseName else fieldName
691  try:
692  taskName = "%s.%s" % (field.target.__module__, field.target.__name__)
693  except Exception:
694  taskName = repr(field.target)
695  taskDict[subBaseName] = taskName
696  getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
697  return taskDict
698 
699 
700 def obeyShowArgument(showOpts, config=None, exit=False):
701  """!Process arguments specified with --show (but ignores "data")
702 
703  @param showOpts List of options passed to --show
704  @param config The provided config
705  @param exit Exit if "run" isn't included in showOpts
706 
707  Supports the following options in showOpts:
708  - config[=PAT] Dump all the config entries, or just the ones that match the glob pattern
709  - tasks Show task hierarchy
710  - data Ignored; to be processed by caller
711  - run Keep going (the default behaviour is to exit if --show is specified)
712 
713  Calls sys.exit(1) if any other option found.
714  """
715  if not showOpts:
716  return
717 
718  for what in showOpts:
719  mat = re.search(r"^config(?:=(.+))?", what)
720  if mat:
721  pattern = mat.group(1)
722  if pattern:
723  class FilteredStream(object):
724  """A file object that only prints lines that match the glob "pattern"
725 
726  N.b. Newlines are silently discarded and reinserted; crude but effective.
727  """
728 
729  def __init__(self, pattern):
730  # obey case if pattern isn't lowecase or requests NOIGNORECASE
731  mat = re.search(r"(.*):NOIGNORECASE$", pattern)
732 
733  if mat:
734  pattern = mat.group(1)
735  self._pattern = re.compile(fnmatch.translate(pattern))
736  else:
737  if pattern != pattern.lower():
738  print(u"Matching \"%s\" without regard to case "
739  "(append :NOIGNORECASE to prevent this)" % (pattern,), file=sys.stdout)
740  self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
741 
742  def write(self, showStr):
743  showStr = showStr.rstrip()
744  # Strip off doc string line(s) and cut off at "=" for string matching
745  matchStr = showStr.split("\n")[-1].split("=")[0]
746  if self._pattern.search(matchStr):
747  print(u"\n" + showStr)
748 
749  fd = FilteredStream(pattern)
750  else:
751  fd = sys.stdout
752 
753  config.saveToStream(fd, "config")
754  elif what == "data":
755  pass
756  elif what == "run":
757  pass
758  elif what == "tasks":
759  showTaskHierarchy(config)
760  else:
761  print(u"Unknown value for show: %s (choose from '%s')" %
762  (what, "', '".join("config[=XXX] data tasks run".split())), file=sys.stderr)
763  sys.exit(1)
764 
765  if exit and "run" not in showOpts:
766  sys.exit(0)
767 
768 
769 def showTaskHierarchy(config):
770  """!Print task hierarchy to stdout
771 
772  @param[in] config: configuration to process (an lsst.pex.config.Config)
773  """
774  print(u"Subtasks:")
775  taskDict = getTaskDict(config=config)
776 
777  fieldNameList = sorted(taskDict.keys())
778  for fieldName in fieldNameList:
779  taskName = taskDict[fieldName]
780  print(u"%s: %s" % (fieldName, taskName))
781 
782 
783 class ConfigValueAction(argparse.Action):
784  """!argparse action callback to override config parameters using name=value pairs from the command line
785  """
786 
787  def __call__(self, parser, namespace, values, option_string):
788  """!Override one or more config name value pairs
789 
790  @param[in] parser argument parser (instance of ArgumentParser)
791  @param[in,out] namespace parsed command (an instance of argparse.Namespace);
792  updated values:
793  - namespace.config
794  @param[in] values a list of configItemName=value pairs
795  @param[in] option_string option value specified by the user (a str)
796  """
797  if namespace.config is None:
798  return
799  for nameValue in values:
800  name, sep, valueStr = nameValue.partition("=")
801  if not valueStr:
802  parser.error("%s value %s must be in form name=value" % (option_string, nameValue))
803 
804  # see if setting the string value works; if not, try eval
805  try:
806  setDottedAttr(namespace.config, name, valueStr)
807  except AttributeError:
808  parser.error("no config field: %s" % (name,))
809  except Exception:
810  try:
811  value = eval(valueStr, {})
812  except Exception:
813  parser.error("cannot parse %r as a value for %s" % (valueStr, name))
814  try:
815  setDottedAttr(namespace.config, name, value)
816  except Exception as e:
817  parser.error("cannot set config.%s=%r: %s" % (name, value, e))
818 
819 
820 class ConfigFileAction(argparse.Action):
821  """!argparse action to load config overrides from one or more files
822  """
823 
824  def __call__(self, parser, namespace, values, option_string=None):
825  """!Load one or more files of config overrides
826 
827  @param[in] parser argument parser (instance of ArgumentParser)
828  @param[in,out] namespace parsed command (an instance of argparse.Namespace);
829  updated values:
830  - namespace.config
831  @param[in] values a list of data config file paths
832  @param[in] option_string option value specified by the user (a str)
833  """
834  if namespace.config is None:
835  return
836  for configfile in values:
837  try:
838  namespace.config.load(configfile)
839  except Exception as e:
840  parser.error("cannot load config file %r: %s" % (configfile, e))
841 
842 
843 class IdValueAction(argparse.Action):
844  """!argparse action callback to process a data ID into a dict
845  """
846 
847  def __call__(self, parser, namespace, values, option_string):
848  """!Parse --id data and append results to namespace.<argument>.idList
849 
850  @param[in] parser argument parser (instance of ArgumentParser)
851  @param[in,out] namespace parsed command (an instance of argparse.Namespace);
852  updated values:
853  - <idName>.idList, where <idName> is the name of the ID argument,
854  for instance "id" for ID argument --id
855  @param[in] values a list of data IDs; see data format below
856  @param[in] option_string option value specified by the user (a str)
857 
858  The data format is:
859  key1=value1_1[^value1_2[^value1_3...] key2=value2_1[^value2_2[^value2_3...]...
860 
861  The values (e.g. value1_1) may either be a string, or of the form "int..int" (e.g. "1..3")
862  which is interpreted as "1^2^3" (inclusive, unlike a python range). So "0^2..4^7..9" is
863  equivalent to "0^2^3^4^7^8^9". You may also specify a stride: "1..5:2" is "1^3^5"
864 
865  The cross product is computed for keys with multiple values. For example:
866  --id visit 1^2 ccd 1,1^2,2
867  results in the following data ID dicts being appended to namespace.<argument>.idList:
868  {"visit":1, "ccd":"1,1"}
869  {"visit":2, "ccd":"1,1"}
870  {"visit":1, "ccd":"2,2"}
871  {"visit":2, "ccd":"2,2"}
872  """
873  if namespace.config is None:
874  return
875  idDict = collections.OrderedDict()
876  for nameValue in values:
877  name, sep, valueStr = nameValue.partition("=")
878  if name in idDict:
879  parser.error("%s appears multiple times in one ID argument: %s" % (name, option_string))
880  idDict[name] = []
881  for v in valueStr.split("^"):
882  mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
883  if mat:
884  v1 = int(mat.group(1))
885  v2 = int(mat.group(2))
886  v3 = mat.group(3)
887  v3 = int(v3) if v3 else 1
888  for v in range(v1, v2 + 1, v3):
889  idDict[name].append(str(v))
890  else:
891  idDict[name].append(v)
892 
893  iterList = [idDict[key] for key in idDict.keys()]
894  idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
895  for valList in itertools.product(*iterList)]
896 
897  argName = option_string.lstrip("-")
898  ident = getattr(namespace, argName)
899  ident.idList += idDictList
900 
901 
902 class LogLevelAction(argparse.Action):
903  """!argparse action to set log level
904  """
905 
906  def __call__(self, parser, namespace, values, option_string):
907  """!Set trace level
908 
909  @param[in] parser argument parser (instance of ArgumentParser)
910  @param[in] namespace parsed command (an instance of argparse.Namespace); ignored
911  @param[in] values a list of trace levels;
912  each item must be of the form 'component_name=level' or 'level',
913  where level is a keyword (not case sensitive) or an integer
914  @param[in] option_string option value specified by the user (a str)
915  """
916  permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
917  permittedLevelSet = set(permittedLevelList)
918  for componentLevel in values:
919  component, sep, levelStr = componentLevel.partition("=")
920  if not levelStr:
921  levelStr, component = component, None
922  logLevelUpr = levelStr.upper()
923  if logLevelUpr in permittedLevelSet:
924  logLevel = getattr(lsstLog.Log, logLevelUpr)
925  else:
926  parser.error("loglevel=%r not one of %s" % (levelStr, permittedLevelList))
927  if component is None:
928  namespace.log.setLevel(logLevel)
929  else:
930  lsstLog.Log.getLogger(component).setLevel(logLevel)
931 
932 
933 def setDottedAttr(item, name, value):
934  """!Like setattr, but accepts hierarchical names, e.g. foo.bar.baz
935 
936  @param[in,out] item object whose attribute is to be set
937  @param[in] name name of item to set
938  @param[in] value new value for the item
939 
940  For example if name is foo.bar.baz then item.foo.bar.baz is set to the specified value.
941  """
942  subitem = item
943  subnameList = name.split(".")
944  for subname in subnameList[:-1]:
945  subitem = getattr(subitem, subname)
946  setattr(subitem, subnameList[-1], value)
947 
948 
949 def getDottedAttr(item, name):
950  """!Like getattr, but accepts hierarchical names, e.g. foo.bar.baz
951 
952  @param[in] item object whose attribute is to be returned
953  @param[in] name name of item to get
954 
955  For example if name is foo.bar.baz then returns item.foo.bar.baz
956  """
957  subitem = item
958  for subname in name.split("."):
959  subitem = getattr(subitem, subname)
960  return subitem
961 
962 
963 def dataExists(butler, datasetType, dataRef):
964  """!Return True if data exists at the current level or any data exists at a deeper level, False otherwise
965 
966  @param[in] butler data butler (a \ref lsst.daf.persistence.butler.Butler
967  "lsst.daf.persistence.Butler")
968  @param[in] datasetType dataset type (a str)
969  @param[in] dataRef butler data reference (a \ref lsst.daf.persistence.butlerSubset.ButlerDataRef
970  "lsst.daf.persistence.ButlerDataRef")
971  """
972  subDRList = dataRef.subItems()
973  if subDRList:
974  for subDR in subDRList:
975  if dataExists(butler, datasetType, subDR):
976  return True
977  return False
978  else:
979  return butler.datasetExists(datasetType=datasetType, dataId=dataRef.dataId)
def setDatasetType
Set actual dataset type, once it is known.
def castDataIds
Validate data IDs and cast them to the correct type (modify idList in place).
argparse action to set log level
An argument parser for pipeline tasks that is based on argparse.ArgumentParser.
def dataExists
Return True if data exists at the current level or any data exists at a deeper level, False otherwise.
A dataset type specified by a command-line argument.
def showTaskHierarchy
Print task hierarchy to stdout.
Glorified struct for data about id arguments, used by ArgumentParser.add_id_argument.
def setDottedAttr
Like setattr, but accepts hierarchical names, e.g.
def __call__
Parse –id data and append results to namespace.
argparse action callback to override config parameters using name=value pairs from the command line ...
def getTaskDict
Get a dictionary of task info for all subtasks in a config.
def add_id_argument
Add a data ID argument.
std::string getPackageDir(std::string const &packageName)
return the root directory of a setup package
Definition: Utils.cc:34
Definition: Log.h:716
def _applyInitialOverrides
Apply obs-package-specific and camera-specific config override files, if found.
def makeDataRefList
Compute refList based on idList.
def __init__
Construct a DatasetArgument.
def convert_arg_line_to_args
Allow files of arguments referenced by @&lt;path&gt; to contain multiple values on each line...
def getDottedAttr
Like getattr, but accepts hierarchical names, e.g.
def addArgument
Add a command-line argument to specify dataset type name.
def __call__
Load one or more files of config overrides.
def isDynamicDatasetType
Is the dataset type dynamic (specified on the command line)?
def __call__
Override one or more config name value pairs.
argparse action callback to process a data ID into a dict
def __init__
Construct a ConfigDatasetType.
def setLevel
Definition: log.py:71
def handleCamera
Perform camera-specific operations before parsing the command line.
def parse_args
Parse arguments for a pipeline task.
def _processDataIds
Process the parsed data for each data ID argument.
A dataset type specified by a config parameter.
def __init__
Construct an ArgumentParser.
def obeyShowArgument
Process arguments specified with –show (but ignores &quot;data&quot;)
Abstract base class for a dataset type determined from parsed command-line arguments.
def _fixPath
Apply environment variable as default root, if present, and abspath.
def addArgument
Add a command-line argument to specify dataset type name, if wanted.
argparse action to load config overrides from one or more files
def getDatasetType
Return the dataset type as a string.
def __init__
Construct a DataIdContainer.
A container for data IDs and associated data references.