LSSTApplications  20.0.0
LSSTDataManagementBasePackage
butlerSubset.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # LSST Data Management System
5 # Copyright 2008, 2009, 2010 LSST Corporation.
6 #
7 # This product includes software developed by the
8 # LSST Project (http://www.lsst.org/).
9 #
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the LSST License Statement and
21 # the GNU General Public License along with this program. If not,
22 # see <http://www.lsstcorp.org/LegalNotices/>.
23 #
24 
25 # -*- python -*-
26 
27 """This module defines the ButlerSubset class and the ButlerDataRefs contained
28 within it as well as an iterator over the subset."""
29 
30 from . import DataId
31 
32 
34 
35  """ButlerSubset is a container for ButlerDataRefs. It represents a
36  collection of data ids that can be used to obtain datasets of the type
37  used when creating the collection or a compatible dataset type. It can be
38  thought of as the result of a query for datasets matching a partial data
39  id.
40 
41  The ButlerDataRefs are generated at a specified level of the data id
42  hierarchy. If that is not the level at which datasets are specified, the
43  ButlerDataRef.subItems() method may be used to dive further into the
44  ButlerDataRefs.
45 
46  ButlerSubsets should generally be created using Butler.subset().
47 
48  This mechanism replaces the creation of butlers using partial dataIds.
49 
50  Public methods:
51 
52  __init__(self, butler, datasetType, level, dataId)
53 
54  __len__(self)
55 
56  __iter__(self)
57 
58  """
59 
60  GENERATION = 2
61  """This is a Generation 2 ButlerSubset.
62  """
63 
64  def __init__(self, butler, datasetType, level, dataId):
65  """
66  Create a ButlerSubset by querying a butler for data ids matching a
67  given partial data id for a given dataset type at a given hierarchy
68  level.
69 
70  @param butler (Butler) butler that is being queried.
71  @param datasetType (str) the type of dataset to query.
72  @param level (str) the hierarchy level to descend to. if empty string will look up the default
73  level.
74  @param dataId (dict) the (partial or complete) data id.
75  """
76  self.butler = butler
77  self.datasetType = datasetType
78  self.dataId = DataId(dataId)
79  self.cache = []
80  self.level = level
81 
82  keys = self.butler.getKeys(datasetType, level, tag=dataId.tag)
83  if keys is None:
84  return
85  fmt = list(keys.keys())
86 
87  # Don't query if we already have a complete dataId
88  completeId = True
89  for key in fmt:
90  if key not in dataId:
91  completeId = False
92  break
93  if completeId:
94  self.cache.append(dataId)
95  return
96 
97  idTuples = butler.queryMetadata(self.datasetType, fmt, self.dataId)
98  for idTuple in idTuples:
99  tempId = dict(self.dataId)
100  if len(fmt) == 1:
101  tempId[fmt[0]] = idTuple
102  else:
103  for i in range(len(fmt)):
104  tempId[fmt[i]] = idTuple[i]
105  self.cache.append(tempId)
106 
107  def __repr__(self):
108  return "ButlerSubset(butler=%s, datasetType=%s, dataId=%s, cache=%s, level=%s)" % (
109  self.butler, self.datasetType, self.dataId, self.cache, self.level)
110 
111  def __len__(self):
112  """
113  Number of ButlerDataRefs in the ButlerSubset.
114 
115  @returns (int)
116  """
117 
118  return len(self.cache)
119 
120  def __iter__(self):
121  """
122  Iterator over the ButlerDataRefs in the ButlerSubset.
123 
124  @returns (ButlerIterator)
125  """
126 
127  return ButlerSubsetIterator(self)
128 
129 
131  """
132  An iterator over the ButlerDataRefs in a ButlerSubset.
133  """
134 
135  def __init__(self, butlerSubset):
136  self.butlerSubset = butlerSubset
137  self.iter = iter(butlerSubset.cache)
138 
139  def __iter__(self):
140  return self
141 
142  def __next__(self):
143  return ButlerDataRef(self.butlerSubset, next(self.iter))
144 
145 
147  """
148  A ButlerDataRef is a reference to a potential dataset or group of datasets
149  that is portable between compatible dataset types. As such, it can be
150  used to create or retrieve datasets.
151 
152  ButlerDataRefs are (conceptually) created as elements of a ButlerSubset by
153  Butler.subset(). They are initially specific to the dataset type passed
154  to that call, but they may be used with any other compatible dataset type.
155  Dataset type compatibility must be determined externally (or by trial and
156  error).
157 
158  ButlerDataRefs may be created at any level of a data identifier hierarchy.
159  If the level is not one at which datasets exist, a ButlerSubset
160  with lower-level ButlerDataRefs can be created using
161  ButlerDataRef.subItems().
162 
163  Public methods:
164 
165  get(self, datasetType=None, **rest)
166 
167  put(self, obj, datasetType=None, **rest)
168 
169  subItems(self, level=None)
170 
171  datasetExists(self, datasetType=None, **rest)
172 
173  getButler(self)
174  """
175 
176  GENERATION = 2
177  """This is a Generation 2 DataRef.
178  """
179 
180  def __init__(self, butlerSubset, dataId):
181  """
182  For internal use only. ButlerDataRefs should only be created by
183  ButlerSubset and ButlerSubsetIterator.
184  """
185 
186  self.butlerSubset = butlerSubset
187  self.dataId = dataId
188 
189  def __repr__(self):
190  return 'ButlerDataRef(butlerSubset=%s, dataId=%s)' % (self.butlerSubset, self.dataId)
191 
192  def get(self, datasetType=None, **rest):
193  """
194  Retrieve a dataset of the given type (or the type used when creating
195  the ButlerSubset, if None) as specified by the ButlerDataRef.
196 
197  @param datasetType (str) dataset type to retrieve.
198  @param **rest keyword arguments with data identifiers
199  @returns object corresponding to the given dataset type.
200  """
201  if datasetType is None:
202  datasetType = self.butlerSubset.datasetType
203  return self.butlerSubset.butler.get(datasetType, self.dataId, **rest)
204 
205  def put(self, obj, datasetType=None, doBackup=False, **rest):
206  """
207  Persist a dataset of the given type (or the type used when creating
208  the ButlerSubset, if None) as specified by the ButlerDataRef.
209 
210  @param obj object to persist.
211  @param datasetType (str) dataset type to persist.
212  @param doBackup if True, rename existing instead of overwriting
213  @param **rest keyword arguments with data identifiers
214 
215  WARNING: Setting doBackup=True is not safe for parallel processing, as it
216  may be subject to race conditions.
217  """
218 
219  if datasetType is None:
220  datasetType = self.butlerSubset.datasetType
221  self.butlerSubset.butler.put(obj, datasetType, self.dataId, doBackup=doBackup, **rest)
222 
223  def getUri(self, datasetType=None, write=False, **rest):
224  """Return the URL for a dataset
225 
226  .. warning:: This is intended only for debugging. The URI should
227  never be used for anything other than printing.
228 
229  .. note:: In the event there are multiple URIs, we return only
230  the first.
231 
232  .. note:: getUri() does not currently support composite datasets.
233 
234  Parameters
235  ----------
236  datasetType : `str`, optional
237  The dataset type of interest.
238  write : `bool`, optional
239  Return the URI for writing?
240  rest : `dict`, optional
241  Keyword arguments for the data id.
242 
243  Returns
244  -------
245  uri : `str`
246  URI for dataset
247  """
248 
249  if datasetType is None:
250  datasetType = self.butlerSubset.datasetType
251  return self.butlerSubset.butler.getUri(datasetType, self.dataId, write=write, **rest)
252 
253  def subLevels(self):
254  """
255  Return a list of the lower levels of the hierarchy than this
256  ButlerDataRef.
257 
258  @returns (iterable) list of strings with level keys."""
259 
260  return set(
261  self.butlerSubset.butler.getKeys(
262  self.butlerSubset.datasetType,
263  tag=self.butlerSubset.dataId.tag).keys()
264  ) - set(
265  self.butlerSubset.butler.getKeys(
266  self.butlerSubset.datasetType,
267  self.butlerSubset.level,
268  tag=self.butlerSubset.dataId.tag).keys()
269  )
270 
271  def subItems(self, level=None):
272  """
273  Generate a ButlerSubset at a lower level of the hierarchy than this
274  ButlerDataRef, using it as a partial data id. If level is None, a
275  default lower level for the original ButlerSubset level and dataset
276  type is used.
277 
278  As currently implemented, the default sublevels for all the
279  repositories used by this Butler instance must match for the Butler to
280  be able to select a default sublevel to get the subset.
281 
282  @param level (str) the hierarchy level to descend to.
283  @returns (ButlerSubset) resulting from the lower-level query or () if
284  there is no lower level.
285  """
286 
287  if level is None:
288  levelSet = set()
289  for repoData in self.butlerSubset.butler._repos.all():
290  levelSet.add(repoData.repo._mapper.getDefaultSubLevel(
291  self.butlerSubset.level))
292  if len(levelSet) > 1:
293  raise RuntimeError(
294  "Support for multiple levels not implemented.")
295  level = levelSet.pop()
296  if level is None:
297  return ()
298  return self.butlerSubset.butler.subset(self.butlerSubset.datasetType,
299  level, self.dataId)
300 
301  def datasetExists(self, datasetType=None, write=False, **rest):
302  """
303  Determine if a dataset exists of the given type (or the type used when
304  creating the ButlerSubset, if None) as specified by the ButlerDataRef.
305 
306  @param datasetType (str) dataset type to check.
307  @param write (bool) if True, search only in output repositories
308  @param **rest keywords arguments with data identifiers
309  @returns bool
310  """
311  if datasetType is None:
312  datasetType = self.butlerSubset.datasetType
313  return self.butlerSubset.butler.datasetExists(
314  datasetType, self.dataId, write=write, **rest)
315 
316  def getButler(self):
317  """
318  Return the butler associated with this data reference.
319  """
320  return self.butlerSubset.butler
lsst::daf::persistence.butlerSubset.ButlerSubset.__repr__
def __repr__(self)
Definition: butlerSubset.py:107
astshim.fitsChanContinued.next
def next(self)
Definition: fitsChanContinued.py:105
lsst::daf::persistence.butlerSubset.ButlerSubsetIterator.butlerSubset
butlerSubset
Definition: butlerSubset.py:136
lsst::daf::persistence.butlerSubset.ButlerDataRef.subItems
def subItems(self, level=None)
Definition: butlerSubset.py:271
lsst::daf::persistence.butlerSubset.ButlerSubset.__iter__
def __iter__(self)
Definition: butlerSubset.py:120
lsst::daf::persistence.butlerSubset.ButlerSubset.__len__
def __len__(self)
Definition: butlerSubset.py:111
lsst::daf::persistence.butlerSubset.ButlerDataRef.getUri
def getUri(self, datasetType=None, write=False, **rest)
Definition: butlerSubset.py:223
ast::append
std::shared_ptr< FrameSet > append(FrameSet const &first, FrameSet const &second)
Construct a FrameSet that performs two transformations in series.
Definition: functional.cc:33
lsst::daf::persistence.butlerSubset.ButlerSubsetIterator.iter
iter
Definition: butlerSubset.py:137
astshim.keyMap.keyMapContinued.keys
def keys(self)
Definition: keyMapContinued.py:6
lsst::daf::persistence.butlerSubset.ButlerSubsetIterator.__next__
def __next__(self)
Definition: butlerSubset.py:142
lsst::daf::persistence.butlerSubset.ButlerDataRef.__init__
def __init__(self, butlerSubset, dataId)
Definition: butlerSubset.py:180
lsst::daf::persistence.butlerSubset.ButlerSubset.__init__
def __init__(self, butler, datasetType, level, dataId)
Definition: butlerSubset.py:64
lsst::daf::persistence.butlerSubset.ButlerDataRef.datasetExists
def datasetExists(self, datasetType=None, write=False, **rest)
Definition: butlerSubset.py:301
lsst::daf::persistence.butlerSubset.ButlerDataRef.dataId
dataId
Definition: butlerSubset.py:187
lsst::daf::persistence.butlerSubset.ButlerSubset.dataId
dataId
Definition: butlerSubset.py:78
lsst::daf::persistence.butlerSubset.ButlerDataRef.get
def get(self, datasetType=None, **rest)
Definition: butlerSubset.py:192
lsst::daf::persistence.butlerSubset.ButlerSubset.datasetType
datasetType
Definition: butlerSubset.py:77
lsst::daf::persistence.butlerSubset.ButlerDataRef.put
def put(self, obj, datasetType=None, doBackup=False, **rest)
Definition: butlerSubset.py:205
lsst::daf::persistence.butlerSubset.ButlerDataRef.subLevels
def subLevels(self)
Definition: butlerSubset.py:253
lsst::daf::persistence.butlerSubset.ButlerSubset.level
level
Definition: butlerSubset.py:80
lsst::daf::persistence.butlerSubset.ButlerSubsetIterator.__iter__
def __iter__(self)
Definition: butlerSubset.py:139
lsst::daf::persistence.butlerSubset.ButlerSubsetIterator.__init__
def __init__(self, butlerSubset)
Definition: butlerSubset.py:135
lsst::daf::persistence.butlerSubset.ButlerSubset.cache
cache
Definition: butlerSubset.py:79
list
daf::base::PropertyList * list
Definition: fits.cc:913
lsst::daf::persistence.butlerSubset.ButlerSubsetIterator
Definition: butlerSubset.py:130
lsst::daf::persistence.butlerSubset.ButlerSubset.butler
butler
Definition: butlerSubset.py:76
lsst::daf::persistence.butlerSubset.ButlerDataRef.getButler
def getButler(self)
Definition: butlerSubset.py:316
lsst::daf::persistence.butlerSubset.ButlerDataRef
Definition: butlerSubset.py:146
lsst::daf::persistence.butlerSubset.ButlerSubset
Definition: butlerSubset.py:33
lsst::daf::persistence.butlerSubset.ButlerDataRef.__repr__
def __repr__(self)
Definition: butlerSubset.py:189
set
daf::base::PropertySet * set
Definition: fits.cc:912
lsst::daf::persistence.butlerSubset.ButlerDataRef.butlerSubset
butlerSubset
Definition: butlerSubset.py:186
lsst::daf::persistence.dataId.DataId
Definition: dataId.py:29