LSSTApplications  18.1.0
LSSTDataManagementBasePackage
butlerSubset.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # LSST Data Management System
5 # Copyright 2008, 2009, 2010 LSST Corporation.
6 #
7 # This product includes software developed by the
8 # LSST Project (http://www.lsst.org/).
9 #
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the LSST License Statement and
21 # the GNU General Public License along with this program. If not,
22 # see <http://www.lsstcorp.org/LegalNotices/>.
23 #
24 
25 # -*- python -*-
26 
27 """This module defines the ButlerSubset class and the ButlerDataRefs contained
28 within it as well as an iterator over the subset."""
29 from builtins import next
30 from builtins import range
31 from builtins import object
32 
33 from . import DataId
34 
35 
37 
38  """ButlerSubset is a container for ButlerDataRefs. It represents a
39  collection of data ids that can be used to obtain datasets of the type
40  used when creating the collection or a compatible dataset type. It can be
41  thought of as the result of a query for datasets matching a partial data
42  id.
43 
44  The ButlerDataRefs are generated at a specified level of the data id
45  hierarchy. If that is not the level at which datasets are specified, the
46  ButlerDataRef.subItems() method may be used to dive further into the
47  ButlerDataRefs.
48 
49  ButlerSubsets should generally be created using Butler.subset().
50 
51  This mechanism replaces the creation of butlers using partial dataIds.
52 
53  Public methods:
54 
55  __init__(self, butler, datasetType, level, dataId)
56 
57  __len__(self)
58 
59  __iter__(self)
60 
61  """
62 
63  GENERATION = 2
64  """This is a Generation 2 ButlerSubset.
65  """
66 
67  def __init__(self, butler, datasetType, level, dataId):
68  """
69  Create a ButlerSubset by querying a butler for data ids matching a
70  given partial data id for a given dataset type at a given hierarchy
71  level.
72 
73  @param butler (Butler) butler that is being queried.
74  @param datasetType (str) the type of dataset to query.
75  @param level (str) the hierarchy level to descend to. if empty string will look up the default
76  level.
77  @param dataId (dict) the (partial or complete) data id.
78  """
79  self.butler = butler
80  self.datasetType = datasetType
81  self.dataId = DataId(dataId)
82  self.cache = []
83  self.level = level
84 
85  keys = self.butler.getKeys(datasetType, level, tag=dataId.tag)
86  if keys is None:
87  return
88  fmt = list(keys.keys())
89 
90  # Don't query if we already have a complete dataId
91  completeId = True
92  for key in fmt:
93  if key not in dataId:
94  completeId = False
95  break
96  if completeId:
97  self.cache.append(dataId)
98  return
99 
100  idTuples = butler.queryMetadata(self.datasetType, fmt, self.dataId)
101  for idTuple in idTuples:
102  tempId = dict(self.dataId)
103  if len(fmt) == 1:
104  tempId[fmt[0]] = idTuple
105  else:
106  for i in range(len(fmt)):
107  tempId[fmt[i]] = idTuple[i]
108  self.cache.append(tempId)
109 
110  def __repr__(self):
111  return "ButlerSubset(butler=%s, datasetType=%s, dataId=%s, cache=%s, level=%s)" % (
112  self.butler, self.datasetType, self.dataId, self.cache, self.level)
113 
114  def __len__(self):
115  """
116  Number of ButlerDataRefs in the ButlerSubset.
117 
118  @returns (int)
119  """
120 
121  return len(self.cache)
122 
123  def __iter__(self):
124  """
125  Iterator over the ButlerDataRefs in the ButlerSubset.
126 
127  @returns (ButlerIterator)
128  """
129 
130  return ButlerSubsetIterator(self)
131 
132 
134  """
135  An iterator over the ButlerDataRefs in a ButlerSubset.
136  """
137 
138  def __init__(self, butlerSubset):
139  self.butlerSubset = butlerSubset
140  self.iter = iter(butlerSubset.cache)
141 
142  def __iter__(self):
143  return self
144 
145  def __next__(self):
146  return ButlerDataRef(self.butlerSubset, next(self.iter))
147 
148 
150  """
151  A ButlerDataRef is a reference to a potential dataset or group of datasets
152  that is portable between compatible dataset types. As such, it can be
153  used to create or retrieve datasets.
154 
155  ButlerDataRefs are (conceptually) created as elements of a ButlerSubset by
156  Butler.subset(). They are initially specific to the dataset type passed
157  to that call, but they may be used with any other compatible dataset type.
158  Dataset type compatibility must be determined externally (or by trial and
159  error).
160 
161  ButlerDataRefs may be created at any level of a data identifier hierarchy.
162  If the level is not one at which datasets exist, a ButlerSubset
163  with lower-level ButlerDataRefs can be created using
164  ButlerDataRef.subItems().
165 
166  Public methods:
167 
168  get(self, datasetType=None, **rest)
169 
170  put(self, obj, datasetType=None, **rest)
171 
172  subItems(self, level=None)
173 
174  datasetExists(self, datasetType=None, **rest)
175 
176  getButler(self)
177  """
178 
179  GENERATION = 2
180  """This is a Generation 2 DataRef.
181  """
182 
183  def __init__(self, butlerSubset, dataId):
184  """
185  For internal use only. ButlerDataRefs should only be created by
186  ButlerSubset and ButlerSubsetIterator.
187  """
188 
189  self.butlerSubset = butlerSubset
190  self.dataId = dataId
191 
192  def __repr__(self):
193  return 'ButlerDataRef(butlerSubset=%s, dataId=%s)' % (self.butlerSubset, self.dataId)
194 
195  def get(self, datasetType=None, **rest):
196  """
197  Retrieve a dataset of the given type (or the type used when creating
198  the ButlerSubset, if None) as specified by the ButlerDataRef.
199 
200  @param datasetType (str) dataset type to retrieve.
201  @param **rest keyword arguments with data identifiers
202  @returns object corresponding to the given dataset type.
203  """
204  if datasetType is None:
205  datasetType = self.butlerSubset.datasetType
206  return self.butlerSubset.butler.get(datasetType, self.dataId, **rest)
207 
208  def put(self, obj, datasetType=None, doBackup=False, **rest):
209  """
210  Persist a dataset of the given type (or the type used when creating
211  the ButlerSubset, if None) as specified by the ButlerDataRef.
212 
213  @param obj object to persist.
214  @param datasetType (str) dataset type to persist.
215  @param doBackup if True, rename existing instead of overwriting
216  @param **rest keyword arguments with data identifiers
217 
218  WARNING: Setting doBackup=True is not safe for parallel processing, as it
219  may be subject to race conditions.
220  """
221 
222  if datasetType is None:
223  datasetType = self.butlerSubset.datasetType
224  self.butlerSubset.butler.put(obj, datasetType, self.dataId, doBackup=doBackup, **rest)
225 
226  def getUri(self, datasetType=None, write=False, **rest):
227  """Return the URL for a dataset
228 
229  .. warning:: This is intended only for debugging. The URI should
230  never be used for anything other than printing.
231 
232  .. note:: In the event there are multiple URIs, we return only
233  the first.
234 
235  .. note:: getUri() does not currently support composite datasets.
236 
237  Parameters
238  ----------
239  datasetType : `str`, optional
240  The dataset type of interest.
241  write : `bool`, optional
242  Return the URI for writing?
243  rest : `dict`, optional
244  Keyword arguments for the data id.
245 
246  Returns
247  -------
248  uri : `str`
249  URI for dataset
250  """
251 
252  if datasetType is None:
253  datasetType = self.butlerSubset.datasetType
254  return self.butlerSubset.butler.getUri(datasetType, self.dataId, write=write, **rest)
255 
256  def subLevels(self):
257  """
258  Return a list of the lower levels of the hierarchy than this
259  ButlerDataRef.
260 
261  @returns (iterable) list of strings with level keys."""
262 
263  return set(
264  self.butlerSubset.butler.getKeys(
265  self.butlerSubset.datasetType,
266  tag=self.butlerSubset.dataId.tag).keys()
267  ) - set(
268  self.butlerSubset.butler.getKeys(
269  self.butlerSubset.datasetType,
270  self.butlerSubset.level,
271  tag=self.butlerSubset.dataId.tag).keys()
272  )
273 
274  def subItems(self, level=None):
275  """
276  Generate a ButlerSubset at a lower level of the hierarchy than this
277  ButlerDataRef, using it as a partial data id. If level is None, a
278  default lower level for the original ButlerSubset level and dataset
279  type is used.
280 
281  As currently implemented, the default sublevels for all the
282  repositories used by this Butler instance must match for the Butler to
283  be able to select a default sublevel to get the subset.
284 
285  @param level (str) the hierarchy level to descend to.
286  @returns (ButlerSubset) resulting from the lower-level query or () if
287  there is no lower level.
288  """
289 
290  if level is None:
291  levelSet = set()
292  for repoData in self.butlerSubset.butler._repos.all():
293  levelSet.add(repoData.repo._mapper.getDefaultSubLevel(
294  self.butlerSubset.level))
295  if len(levelSet) > 1:
296  raise RuntimeError(
297  "Support for multiple levels not implemented.")
298  level = levelSet.pop()
299  if level is None:
300  return ()
301  return self.butlerSubset.butler.subset(self.butlerSubset.datasetType,
302  level, self.dataId)
303 
304  def datasetExists(self, datasetType=None, write=False, **rest):
305  """
306  Determine if a dataset exists of the given type (or the type used when
307  creating the ButlerSubset, if None) as specified by the ButlerDataRef.
308 
309  @param datasetType (str) dataset type to check.
310  @param write (bool) if True, search only in output repositories
311  @param **rest keywords arguments with data identifiers
312  @returns bool
313  """
314  if datasetType is None:
315  datasetType = self.butlerSubset.datasetType
316  return self.butlerSubset.butler.datasetExists(
317  datasetType, self.dataId, write=write, **rest)
318 
319  def getButler(self):
320  """
321  Return the butler associated with this data reference.
322  """
323  return self.butlerSubset.butler
def __init__(self, butlerSubset, dataId)
std::shared_ptr< FrameSet > append(FrameSet const &first, FrameSet const &second)
Construct a FrameSet that performs two transformations in series.
Definition: functional.cc:33
daf::base::PropertySet * set
Definition: fits.cc:884
def __init__(self, butler, datasetType, level, dataId)
Definition: butlerSubset.py:67
def put(self, obj, datasetType=None, doBackup=False, rest)
def datasetExists(self, datasetType=None, write=False, rest)
def getUri(self, datasetType=None, write=False, rest)
def get(self, datasetType=None, rest)
daf::base::PropertyList * list
Definition: fits.cc:885