LSSTApplications  16.0-10-g0ee56ad+5,16.0-11-ga33d1f2+5,16.0-12-g3ef5c14+3,16.0-12-g71e5ef5+18,16.0-12-gbdf3636+3,16.0-13-g118c103+3,16.0-13-g8f68b0a+3,16.0-15-gbf5c1cb+4,16.0-16-gfd17674+3,16.0-17-g7c01f5c+3,16.0-18-g0a50484+1,16.0-20-ga20f992+8,16.0-21-g0e05fd4+6,16.0-21-g15e2d33+4,16.0-22-g62d8060+4,16.0-22-g847a80f+4,16.0-25-gf00d9b8+1,16.0-28-g3990c221+4,16.0-3-gf928089+3,16.0-32-g88a4f23+5,16.0-34-gd7987ad+3,16.0-37-gc7333cb+2,16.0-4-g10fc685+2,16.0-4-g18f3627+26,16.0-4-g5f3a788+26,16.0-5-gaf5c3d7+4,16.0-5-gcc1f4bb+1,16.0-6-g3b92700+4,16.0-6-g4412fcd+3,16.0-6-g7235603+4,16.0-69-g2562ce1b+2,16.0-8-g14ebd58+4,16.0-8-g2df868b+1,16.0-8-g4cec79c+6,16.0-8-gadf6c7a+1,16.0-8-gfc7ad86,16.0-82-g59ec2a54a+1,16.0-9-g5400cdc+2,16.0-9-ge6233d7+5,master-g2880f2d8cf+3,v17.0.rc1
LSSTDataManagementBasePackage
repository.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # LSST Data Management System
5 # Copyright 2016 LSST Corporation.
6 #
7 # This product includes software developed by the
8 # LSST Project (http://www.lsst.org/).
9 #
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the LSST License Statement and
21 # the GNU General Public License along with this program. If not,
22 # see <http://www.lsstcorp.org/LegalNotices/>.
23 #
24 from past.builtins import basestring
25 from builtins import object
26 
27 import copy
28 import inspect
29 import os
30 
31 from lsst.daf.persistence import Storage, listify, doImport, Policy
32 
33 
35 
36  """Arguments passed into a Butler that are used to instantiate a repository. This includes arguments that
37  can be used to create a new repository (cfgRoot, root, mapper, mapperArgs, policy) and are persisted along
38  with the new repository's configuration file. These arguments can also describe how a new or existing
39  repository are to be used (cfgRoot or root, tags, mode). When indicating an existing repository it is
40  better to not specify unnecessary arguments, as if they conflict with the persisted repository
41  configuration then a RuntimeError will be raised during Butler init.
42 
43  A RepositoryArgs class can be initialized from a dict, if the first argument to the initializer is a dict.
44 
45  Parameters
46  ----------
47  cfgRoot : URI or dict, optional
48  If dict, the initalizer is re-called with the expanded dict.
49  If URI, this is the location where the RepositoryCfg should be found (existing repo) or put (new repo)
50  root : URI, optional
51  If different than cfgRoot then this is the location where the repository should exist. A RepositoryCfg
52  will be put at cfgRoot and its root will be a path to root.
53  mapper : string or class object, optional
54  The mapper to use with this repository. If string, should refer an importable object. If class object,
55  should be a mapper to be instantiated by the Butler during Butler init.
56  mapperArgs : dict
57  Arguments & values to pass to the mapper when initializing it.
58  tags : list or object, optional
59  One or more unique identifiers to uniquely identify this repository and its parents when performing
60  Butler.get.
61  mode : string, optional
62  should be one of 'r', 'w', or 'rw', for 'read', 'write', or 'read-write'. Can be omitted; input
63  repositories will default to 'r', output repositories will default to 'w'. 'w' on an input repository
64  will raise a RuntimeError during Butler init, although 'rw' works and is equivalent to 'r'. Output
65  repositories may be 'r' or 'rw', 'r' for an output repository will raise a RuntimeError during Butler
66  init.
67  policy : dict
68  Policy associated with this repository, overrides all other policy data (which may be loaded from
69  policies in derived packages).
70  """
71  def __init__(self, cfgRoot=None, root=None, mapper=None, mapperArgs=None, tags=None,
72  mode=None, policy=None):
73  try:
74  # is cfgRoot a dict? try dict init:
75  self.__init__(**cfgRoot)
76  except TypeError:
77  self._root = Storage.absolutePath(os.getcwd(), root.rstrip(os.sep)) if root else root
78  self._cfgRoot = Storage.absolutePath(os.getcwd(), cfgRoot.rstrip(os.sep)) if cfgRoot else cfgRoot
79  self._mapper = mapper
80  self.mapperArgs = mapperArgs
81  self.tags = set(listify(tags))
82  self.mode = mode
83  self.policy = Policy(policy) if policy is not None else None
84 
85  def __repr__(self):
86  return "%s(root=%r, cfgRoot=%r, mapper=%r, mapperArgs=%r, tags=%s, mode=%r, policy=%s)" % (
87  self.__class__.__name__, self.root, self._cfgRoot, self._mapper, self.mapperArgs, self.tags,
88  self.mode, self.policy)
89 
90  @property
91  def mapper(self):
92  return self._mapper
93 
94  @mapper.setter
95  def mapper(self, mapper):
96  if mapper is not None and self._mapper:
97  raise RuntimeError("Explicity clear mapper (set to None) before changing its value.")
98  self._mapper = mapper
99 
100  @property
101  def cfgRoot(self):
102  return self._cfgRoot if self._cfgRoot is not None else self._root
103 
104  @property
105  def root(self):
106  return self._root if self._root is not None else self._cfgRoot
107 
108  @staticmethod
109  def inputRepo(storage, tags=None):
110  return RepositoryArgs(storage, tags)
111 
112  @staticmethod
113  def outputRepo(storage, mapper=None, mapperArgs=None, tags=None, mode=None):
114  return RepositoryArgs(storage, mapper, mapperArgs, tags, mode)
115 
116  def tag(self, tag):
117  """add a tag to the repository cfg"""
118  if isinstance(tag, basestring):
119  self.tags.add(tag)
120  else:
121  try:
122  self.tags.update(tag)
123  except TypeError:
124  self.tags.add(tag)
125 
126 
127 class Repository(object):
128  """Represents a repository of persisted data and has methods to access that data.
129  """
130 
131  def __init__(self, repoData):
132  """Initialize a Repository with parameters input via RepoData.
133 
134  Parameters
135  ----------
136  repoData : RepoData
137  Object that contains the parameters with which to init the Repository.
138  """
139  self._storage = Storage.makeFromURI(repoData.cfg.root)
140  if repoData.cfg.dirty and not repoData.isV1Repository and repoData.cfgOrigin != 'nested':
141  self._storage.putRepositoryCfg(repoData.cfg, repoData.cfgRoot)
142  self._mapperArgs = repoData.cfg.mapperArgs # keep for reference in matchesArgs
143  self._initMapper(repoData)
144 
145  def _initMapper(self, repoData):
146  '''Initialize and keep the mapper in a member var.
147 
148  Parameters
149  ----------
150  repoData : RepoData
151  The RepoData with the properties of this Repository.
152  '''
153 
154  # rule: If mapper is:
155  # - an object: use it as the mapper.
156  # - a string: import it and instantiate it with mapperArgs
157  # - a class object: instantiate it with mapperArgs
158  mapper = repoData.cfg.mapper
159 
160  # if mapper is a string, import it:
161  if isinstance(mapper, basestring):
162  mapper = doImport(mapper)
163  # now if mapper is a class type (not instance), instantiate it:
164  if inspect.isclass(mapper):
165  mapperArgs = copy.copy(repoData.cfg.mapperArgs)
166  if mapperArgs is None:
167  mapperArgs = {}
168  if 'root' not in mapperArgs:
169  mapperArgs['root'] = repoData.cfg.root
170  mapper = mapper(parentRegistry=repoData.parentRegistry,
171  repositoryCfg=repoData.cfg,
172  **mapperArgs)
173  self._mapper = mapper
174 
175  # todo want a way to make a repository read-only
176  def write(self, butlerLocation, obj):
177  """Write a dataset to Storage.
178 
179  :param butlerLocation: Contains the details needed to find the desired dataset.
180  :param dataset: The dataset to be written.
181  :return:
182  """
183  butlerLocationStorage = butlerLocation.getStorage()
184  if butlerLocationStorage:
185  return butlerLocationStorage.write(butlerLocation, obj)
186  else:
187  return self._storage.write(butlerLocation, obj)
188 
189  def read(self, butlerLocation):
190  """Read a dataset from Storage.
191 
192  :param butlerLocation: Contains the details needed to find the desired dataset.
193  :return: An instance of the dataset requested by butlerLocation.
194  """
195  butlerLocationStorage = butlerLocation.getStorage()
196  if butlerLocationStorage:
197  return butlerLocationStorage.read(butlerLocation)
198  else:
199  return self._storage.read(butlerLocation)
200 
201  #################
202  # Mapper Access #
203 
204  def mappers(self):
205  return (self._mapper, )
206 
207  def getRegistry(self):
208  """Get the registry from the mapper
209 
210  Returns
211  -------
212  Registry or None
213  The registry from the mapper or None if the mapper does not have one.
214  """
215  if self._mapper is None:
216  return None
217  return self._mapper.getRegistry()
218 
219  def getKeys(self, *args, **kwargs):
220  """
221  Get the keys available in the repository/repositories.
222  :param args:
223  :param kwargs:
224  :return: A dict of {key:valueType}
225  """
226  # todo: getKeys is not in the mapper API
227  if self._mapper is None:
228  return None
229  keys = self._mapper.getKeys(*args, **kwargs)
230  return keys
231 
232  def map(self, *args, **kwargs):
233  """Find a butler location for the given arguments.
234  See mapper.map for more information about args and kwargs.
235 
236  :param args: arguments to be passed on to mapper.map
237  :param kwargs: keyword arguments to be passed on to mapper.map
238  :return: The type of item is dependent on the mapper being used but is typically a ButlerLocation.
239  """
240  if self._mapper is None:
241  raise RuntimeError("No mapper assigned to Repository")
242  loc = self._mapper.map(*args, **kwargs)
243  if not loc:
244  return None
245  loc.setRepository(self)
246  return loc
247 
248  def queryMetadata(self, *args, **kwargs):
249  """Gets possible values for keys given a partial data id.
250 
251  See mapper documentation for more explanation about queryMetadata.
252 
253  :param args: arguments to be passed on to mapper.queryMetadata
254  :param kwargs: keyword arguments to be passed on to mapper.queryMetadata
255  :return:The type of item is dependent on the mapper being used but is typically a set that contains
256  available values for the keys in the format input argument.
257  """
258  if self._mapper is None:
259  return None
260  ret = self._mapper.queryMetadata(*args, **kwargs)
261  return ret
262 
263  def backup(self, *args, **kwargs):
264  """Perform mapper.backup.
265 
266  See mapper.backup for more information about args and kwargs.
267 
268  :param args: arguments to be passed on to mapper.backup
269  :param kwargs: keyword arguments to be passed on to mapper.backup
270  :return: None
271  """
272  if self._mapper is None:
273  return None
274  self._mapper.backup(*args, **kwargs)
275 
276  def getMapperDefaultLevel(self):
277  """Get the default level of the mapper.
278 
279  This is typically used if no level is passed into butler methods that call repository.getKeys and/or
280  repository.queryMetadata. There is a bug in that code because it gets the default level from this
281  repository but then uses that value when searching all repositories. If this and other repositories
282  have dissimilar data, the default level value will be nonsensical. A good example of this issue is in
283  Butler.subset; it needs refactoring.
284 
285  :return:
286  """
287  if self._mapper is None:
288  return None
289  return self._mapper.getDefaultLevel()
290 
291  def exists(self, location):
292  """Check if location exists in storage.
293 
294  Parameters
295  ----------
296  location : ButlerLocation
297  Desrcibes a location in storage to look for.
298 
299  Returns
300  -------
301  bool
302  True if location exists, False if not.
303  """
304  butlerLocationStorage = location.getStorage()
305  if butlerLocationStorage:
306  return butlerLocationStorage.exists(location)
307  else:
308  return self._storage.exists(location)