LSSTApplications  10.0-2-g4f67435,11.0.rc2+1,11.0.rc2+12,11.0.rc2+3,11.0.rc2+4,11.0.rc2+5,11.0.rc2+6,11.0.rc2+7,11.0.rc2+8
LSSTDataManagementBasePackage
butlerSubset.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # LSST Data Management System
5 # Copyright 2008, 2009, 2010 LSST Corporation.
6 #
7 # This product includes software developed by the
8 # LSST Project (http://www.lsst.org/).
9 #
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the LSST License Statement and
21 # the GNU General Public License along with this program. If not,
22 # see <http://www.lsstcorp.org/LegalNotices/>.
23 #
24 
25 # -*- python -*-
26 
27 """This module defines the ButlerSubset class and the ButlerDataRefs contained
28 within it as well as an iterator over the subset."""
29 
30 from __future__ import with_statement
31 
32 class ButlerSubset(object):
33 
34  """ButlerSubset is a container for ButlerDataRefs. It represents a
35  collection of data ids that can be used to obtain datasets of the type
36  used when creating the collection or a compatible dataset type. It can be
37  thought of as the result of a query for datasets matching a partial data
38  id.
39 
40  The ButlerDataRefs are generated at a specified level of the data id
41  hierarchy. If that is not the level at which datasets are specified, the
42  ButlerDataRef.subItems() method may be used to dive further into the
43  ButlerDataRefs.
44 
45  ButlerSubsets should generally be created using Butler.subset().
46 
47  This mechanism replaces the creation of butlers using partial dataIds.
48 
49  Public methods:
50 
51  __init__(self, butler, datasetType, level, dataId)
52 
53  __len__(self)
54 
55  __iter__(self)
56 
57  """
58 
59  def __init__(self, butler, datasetType, level, dataId):
60  """
61  Create a ButlerSubset by querying a butler for data ids matching a
62  given partial data id for a given dataset type at a given hierarchy
63  level.
64 
65  @param butler (Butler) butler that is being queried.
66  @param datasetType (str) the type of dataset to query.
67  @param level (str) the hierarchy level to descend to.
68  @param dataId (dict) the (partial or complete) data id.
69  """
70 
71  self.butler = butler
72  self.datasetType = datasetType
73  self.level = level
74  self.dataId = dataId
75  self.cache = []
76 
77  fmt = list(self.butler.getKeys(datasetType, level).iterkeys())
78 
79  # Don't query if we already have a complete dataId
80  completeId = True
81  for key in fmt:
82  if key not in dataId:
83  completeId = False
84  break
85  if completeId:
86  self.cache.append(dataId)
87  return
88 
89  for idTuple in butler.queryMetadata(self.datasetType,
90  level, fmt, self.dataId):
91  tempId = dict(self.dataId)
92  if len(fmt) == 1:
93  tempId[fmt[0]] = idTuple
94  else:
95  for i in xrange(len(fmt)):
96  tempId[fmt[i]] = idTuple[i]
97  self.cache.append(tempId)
98 
99  def __len__(self):
100  """
101  Number of ButlerDataRefs in the ButlerSubset.
102 
103  @returns (int)
104  """
105 
106  return len(self.cache)
107 
108  def __iter__(self):
109  """
110  Iterator over the ButlerDataRefs in the ButlerSubset.
111 
112  @returns (ButlerIterator)
113  """
114 
115  return ButlerSubsetIterator(self)
116 
117 class ButlerSubsetIterator(object):
118  """
119  An iterator over the ButlerDataRefs in a ButlerSubset.
120  """
121 
122  def __init__(self, butlerSubset):
123  self.butlerSubset = butlerSubset
124  self.iter = iter(butlerSubset.cache)
125 
126  def __iter__(self):
127  return self
128 
129  def next(self):
130  return ButlerDataRef(self.butlerSubset, self.iter.next())
131 
132 class ButlerDataRef(object):
133  """
134  A ButlerDataRef is a reference to a potential dataset or group of datasets
135  that is portable between compatible dataset types. As such, it can be
136  used to create or retrieve datasets.
137 
138  ButlerDataRefs are (conceptually) created as elements of a ButlerSubset by
139  Butler.subset(). They are initially specific to the dataset type passed
140  to that call, but they may be used with any other compatible dataset type.
141  Dataset type compatibility must be determined externally (or by trial and
142  error).
143 
144  ButlerDataRefs may be created at any level of a data identifier hierarchy.
145  If the level is not one at which datasets exist, a ButlerSubset
146  with lower-level ButlerDataRefs can be created using
147  ButlerDataRef.subItems().
148 
149  Public methods:
150 
151  get(self, datasetType=None, **rest)
152 
153  put(self, obj, datasetType=None, **rest)
154 
155  subItems(self, level=None)
156 
157  datasetExists(self, datasetType=None, **rest)
158 
159  getButler(self)
160  """
161 
162  def __init__(self, butlerSubset, dataId):
163  """
164  For internal use only. ButlerDataRefs should only be created by
165  ButlerSubset and ButlerSubsetIterator.
166  """
167 
168  self.butlerSubset = butlerSubset
169  self.dataId = dataId
170 
171  def get(self, datasetType=None, **rest):
172  """
173  Retrieve a dataset of the given type (or the type used when creating
174  the ButlerSubset, if None) as specified by the ButlerDataRef.
175 
176  @param datasetType (str) dataset type to retrieve.
177  @param **rest keyword arguments with data identifiers
178  @returns object corresponding to the given dataset type.
179  """
180 
181  if datasetType is None:
182  datasetType = self.butlerSubset.datasetType
183  return self.butlerSubset.butler.get(datasetType, self.dataId, **rest)
184 
185  def put(self, obj, datasetType=None, doBackup=False, **rest):
186  """
187  Persist a dataset of the given type (or the type used when creating
188  the ButlerSubset, if None) as specified by the ButlerDataRef.
189 
190  @param obj object to persist.
191  @param datasetType (str) dataset type to persist.
192  @param doBackup if True, rename existing instead of overwriting
193  @param **rest keyword arguments with data identifiers
194 
195  WARNING: Setting doBackup=True is not safe for parallel processing, as it
196  may be subject to race conditions.
197  """
198 
199  if datasetType is None:
200  datasetType = self.butlerSubset.datasetType
201  self.butlerSubset.butler.put(obj, datasetType, self.dataId, doBackup=doBackup, **rest)
202 
203  def subLevels(self):
204  """
205  Return a list of the lower levels of the hierarchy than this
206  ButlerDataRef.
207 
208  @returns (iterable) list of strings with level keys."""
209 
210  return set(
211  self.butlerSubset.butler.getKeys(
212  self.butlerSubset.datasetType).keys()
213  ) - set(
214  self.butlerSubset.butler.getKeys(
215  self.butlerSubset.datasetType,
216  self.butlerSubset.level).keys()
217  )
218 
219  def subItems(self, level=None):
220  """
221  Generate a ButlerSubset at a lower level of the hierarchy than this
222  ButlerDataRef, using it as a partial data id. If level is None, a
223  default lower level for the original ButlerSubset level and dataset
224  type is used.
225 
226  @param level (str) the hierarchy level to descend to.
227  @returns (ButlerSubset) resulting from the lower-level query or () if
228  there is no lower level.
229  """
230 
231  if level is None:
232  level = self.butlerSubset.butler.mapper.getDefaultSubLevel(
233  self.butlerSubset.level)
234  if level is None:
235  return ()
236  return self.butlerSubset.butler.subset(self.butlerSubset.datasetType,
237  level, self.dataId)
238 
239  def datasetExists(self, datasetType=None, **rest):
240  """
241  Determine if a dataset exists of the given type (or the type used when
242  creating the ButlerSubset, if None) as specified by the ButlerDataRef.
243 
244  @param datasetType (str) dataset type to check.
245  @param **rest keywords arguments with data identifiers
246  @returns bool
247  """
248  if datasetType is None:
249  datasetType = self.butlerSubset.datasetType
250  return self.butlerSubset.butler.datasetExists(
251  datasetType, self.dataId, **rest)
252 
253  def getButler(self):
254  """
255  Return the butler associated with this data reference.
256  """
257  return self.butlerSubset.butler