LSSTApplications  10.0+286,10.0+36,10.0+46,10.0-2-g4f67435,10.1+152,10.1+37,11.0,11.0+1,11.0-1-g47edd16,11.0-1-g60db491,11.0-1-g7418c06,11.0-2-g04d2804,11.0-2-g68503cd,11.0-2-g818369d,11.0-2-gb8b8ce7
LSSTDataManagementBasePackage
cudaLanczosWrapper.cc
Go to the documentation of this file.
1 // -*- LSST-C++ -*- // fixed format comment for emacs
2 
3 /*
4  * LSST Data Management System
5  * Copyright 2008 - 2012 LSST Corporation.
6  *
7  * This product includes software developed by the
8  * LSST Project (http://www.lsst.org/).
9  *
10  * This program is free software: you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation, either version 3 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the LSST License Statement and
21  * the GNU General Public License along with this program. If not,
22  * see <http://www.lsstcorp.org/LegalNotices/>.
23  */
24 
35 #ifdef GPU_BUILD
36 #include <cuda.h>
37 #include <cuda_runtime.h>
38 #endif
39 
40 #include "lsst/afw/geom/Box.h"
41 
42 #include "lsst/afw/math/Kernel.h"
44 #include "lsst/pex/logging/Trace.h"
46 #include "lsst/afw/image/Wcs.h"
47 
53 
56 
57 using namespace std;
61 
62 
63 namespace mathDetail = lsst::afw::math::detail;
64 namespace gpuDetail = lsst::afw::gpu::detail;
65 namespace afwMath = lsst::afw::math;
66 namespace afwGpu = lsst::afw::gpu;
67 namespace afwImage = lsst::afw::image;
68 namespace pexExcept = lsst::pex::exceptions;
69 namespace pexLog = lsst::pex::logging;
70 namespace afwGeom = lsst::afw::geom;
71 
72 namespace lsst {
73 namespace afw {
74 namespace math {
75 namespace detail {
76 
77 namespace {
78 
79 int CeilDivide(int num, int divisor)
80 {
81  return (num + divisor - 1) / divisor;
82 }
83 
84 // get the number of interpolation blocks given an image dimension
85 int InterpBlkN(int size , int interpLength)
86 {
87  return CeilDivide(size , interpLength) + 1;
88 }
89 
90 // calculate the interpolated value given the data for linear interpolation
91 gpu::SPoint2 GetInterpolatedValue(afwGpu::detail::GpuBuffer2D<gpu::BilinearInterp> const & interpBuf,
92  int blkX, int blkY, int subX, int subY
93  )
94 {
95  gpu::BilinearInterp interp = interpBuf.Pixel(blkX, blkY);
96  return interp.Interpolate(subX, subY);
97 }
98 
99 // calculate the interpolated value given the data for linear interpolation
100 gpu::SPoint2 GetInterpolatedValue(afwGpu::detail::GpuBuffer2D<gpu::BilinearInterp> const & interpBuf,
101  int interpLen, int x, int y
102  )
103 {
104  int blkX = x / interpLen;
105  int blkY = y / interpLen;
106 
107  int subX = x % interpLen;
108  int subY = y % interpLen;
109 
110  return GetInterpolatedValue(interpBuf, blkX, blkY, subX, subY);
111 }
112 
113 // calculate the number of points falling within the srcGoodBox,
114 // given a bilinearily interpolated coordinate transform function on integer range [0,width> x [0, height>
115 int NumGoodPixels(afwGpu::detail::GpuBuffer2D<gpu::BilinearInterp> const & interpBuf,
116  int const interpLen, int const width, int const height, SBox2I srcGoodBox)
117 {
118  int cnt = 0;
119 
120  int subY = 1, blkY = 0;
121  for (int row = 0; row < height; row++, subY++) {
122  if (subY >= interpLen) {
123  subY -= interpLen;
124  blkY++;
125  }
126 
127  int subX = 1, blkX = 0;
128  gpu::BilinearInterp interp = interpBuf.Pixel(blkX, blkY);
129  gpu::LinearInterp lineY = interp.GetLinearInterp(subY);
130 
131  for (int col = 0; col < width; col++, subX++) {
132  if (subX >= interpLen) {
133  subX -= interpLen;
134  blkX++;
135  interp = interpBuf.Pixel(blkX, blkY);
136  lineY = interp.GetLinearInterp(subY);
137  }
138  gpu::SPoint2 srcPos = lineY.Interpolate(subX);
139  if (srcGoodBox.isInsideBox(srcPos)) {
140  cnt++;
141  }
142  }
143  }
144  return cnt;
145 }
146 
147 #ifdef GPU_BUILD
148 // for (plain) Image::
149 // allocate CPU and GPU buffers, transfer data and call GPU kernel proxy
150 // precondition: order*2 < gpu::SIZE_MAX_WARPING_KERNEL
151 template< typename DestPixelT, typename SrcPixelT>
152 int WarpImageGpuWrapper(
153  afwImage::Image<DestPixelT> &destImage,
154  afwImage::Image<SrcPixelT> const &srcImage,
155  int mainKernelSize,
156  gpu::KernelType maskKernelType,
157  int maskKernelSize,
158  const lsst::afw::geom::Box2I srcBox,
160  int const interpLength,
162 )
163 {
164  typedef typename afwImage::Image<DestPixelT> DestImageT;
165 
166  typename DestImageT::SinglePixel const edgePixel = padValue;
167 
168  gpu::PixelIVM<DestPixelT> edgePixelGpu;
169  edgePixelGpu.img = edgePixel;
170  edgePixelGpu.var = -1;
171  edgePixelGpu.msk = -1;
172 
173  int const destWidth = destImage.getWidth();
174  int const destHeight = destImage.getHeight();
175  gpuDetail::GpuMemOwner<DestPixelT> destBufImgGpu;
176  gpuDetail::GpuMemOwner<SrcPixelT> srcBufImgGpu;
177  gpuDetail::GpuMemOwner<gpu::BilinearInterp> srcPosInterpGpu;
178 
179  gpu::ImageDataPtr<DestPixelT> destImgGpu;
180  destImgGpu.strideImg = destBufImgGpu.AllocImageBaseBuffer(destImage);
181  if (destBufImgGpu.ptr == NULL) {
182  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for output image");
183  }
184  destImgGpu.img = destBufImgGpu.ptr;
185  destImgGpu.var = NULL;
186  destImgGpu.msk = NULL;
187  destImgGpu.width = destWidth;
188  destImgGpu.height = destHeight;
189 
190  gpu::ImageDataPtr<SrcPixelT> srcImgGpu;
191  srcImgGpu.strideImg = srcBufImgGpu.TransferFromImageBase(srcImage);
192  if (srcBufImgGpu.ptr == NULL) {
193  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for input image");
194  }
195  srcImgGpu.img = srcBufImgGpu.ptr;
196  srcImgGpu.var = NULL;
197  srcImgGpu.msk = NULL;
198  srcImgGpu.width = srcImage.getWidth();
199  srcImgGpu.height = srcImage.getHeight();
200 
201  srcPosInterpGpu.Transfer(srcPosInterp);
202  if (srcPosInterpGpu.ptr == NULL) {
203  throw LSST_EXCEPT(afwGpu::GpuMemoryError,
204  "Not enough memory on GPU for interpolation data for coorinate transformation");
205  }
206 
207  SBox2I srcBoxConv(srcBox.getMinX(), srcBox.getMinY(), srcBox.getMaxX() + 1, srcBox.getMaxY() + 1);
208 
210  destImgGpu, srcImgGpu,
211  mainKernelSize,
212  maskKernelType,
213  maskKernelSize,
214  srcBoxConv,
215  edgePixelGpu,
216  srcPosInterpGpu.ptr, interpLength
217  );
218 
219  int numGoodPixels = NumGoodPixels(srcPosInterp, interpLength, destWidth, destHeight, srcBoxConv);
220 
221  cudaThreadSynchronize();
222  if (cudaGetLastError() != cudaSuccess) {
223  throw LSST_EXCEPT(afwGpu::GpuRuntimeError, "GPU calculation failed to run");
224  }
225 
226  destBufImgGpu.CopyToImageBase(destImage);
227  return numGoodPixels;
228 }
229 
230 // for MaskedImage::
231 // allocate CPU and GPU buffers, transfer data and call GPU kernel proxy
232 // precondition: order*2 < gpu::SIZE_MAX_WARPING_KERNEL
233 template< typename DestPixelT, typename SrcPixelT>
234 int WarpImageGpuWrapper(
236  afwImage::MaskedImage<SrcPixelT>const &srcImage,
237  int mainKernelSize,
238  gpu::KernelType maskKernelType,
239  int maskKernelSize,
240  const lsst::afw::geom::Box2I srcBox,
242  int const interpLength,
244 )
245 {
246  typedef typename afwImage::MaskedImage<DestPixelT> DestImageT;
247 
248  typename DestImageT::SinglePixel const edgePixel = padValue;
249 
250  gpu::PixelIVM<DestPixelT> edgePixelGpu;
251  edgePixelGpu.img = edgePixel.image();
252  edgePixelGpu.var = edgePixel.variance();
253  edgePixelGpu.msk = edgePixel.mask();
254 
255  int const destWidth = dstImage.getWidth();
256  int const destHeight = dstImage.getHeight();
257 
258  gpuDetail::GpuMemOwner<DestPixelT> destBufImgGpu;
259  gpuDetail::GpuMemOwner<gpu::VarPixel> destBufVarGpu;
260  gpuDetail::GpuMemOwner<gpu::MskPixel> destBufMskGpu;
261 
262  gpuDetail::GpuMemOwner<SrcPixelT> srcBufImgGpu;
263  gpuDetail::GpuMemOwner<gpu::VarPixel> srcBufVarGpu;
264  gpuDetail::GpuMemOwner<gpu::MskPixel> srcBufMskGpu;
265 
266  gpuDetail::GpuMemOwner<gpu::BilinearInterp> srcPosInterpGpu;
267 
268  mathDetail::gpu::ImageDataPtr<DestPixelT> destImgGpu;
269  destImgGpu.strideImg = destBufImgGpu.AllocImageBaseBuffer(*dstImage.getImage());
270  destImgGpu.strideVar = destBufVarGpu.AllocImageBaseBuffer(*dstImage.getVariance());
271  destImgGpu.strideMsk = destBufMskGpu.AllocImageBaseBuffer(*dstImage.getMask());
272  if (destBufImgGpu.ptr == NULL) {
273  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for output image");
274  }
275  if (destBufVarGpu.ptr == NULL) {
276  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for output variance");
277  }
278  if (destBufMskGpu.ptr == NULL) {
279  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for output mask");
280  }
281  destImgGpu.img = destBufImgGpu.ptr;
282  destImgGpu.var = destBufVarGpu.ptr;
283  destImgGpu.msk = destBufMskGpu.ptr;
284  destImgGpu.width = destWidth;
285  destImgGpu.height = destHeight;
286 
287  gpu::ImageDataPtr<SrcPixelT> srcImgGpu;
288  srcImgGpu.strideImg = srcBufImgGpu.TransferFromImageBase(*srcImage.getImage());
289  if (srcBufImgGpu.ptr == NULL) {
290  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for input image");
291  }
292  srcImgGpu.strideVar = srcBufVarGpu.TransferFromImageBase(*srcImage.getVariance());
293  if (srcBufVarGpu.ptr == NULL) {
294  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for input variance");
295  }
296  srcImgGpu.strideMsk = srcBufMskGpu.TransferFromImageBase(*srcImage.getMask());
297  if (srcBufMskGpu.ptr == NULL) {
298  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for input mask");
299  }
300 
301  srcImgGpu.img = srcBufImgGpu.ptr;
302  srcImgGpu.var = srcBufVarGpu.ptr;
303  srcImgGpu.msk = srcBufMskGpu.ptr;
304  srcImgGpu.width = srcImage.getWidth();
305  srcImgGpu.height = srcImage.getHeight();
306 
307  srcPosInterpGpu.Transfer(srcPosInterp);
308  if (srcPosInterpGpu.ptr == NULL) {
309  throw LSST_EXCEPT(afwGpu::GpuMemoryError,
310  "Not enough memory on GPU for interpolation data for coorinate transformation");
311  }
312 
313  SBox2I srcBoxConv(srcBox.getMinX(), srcBox.getMinY(), srcBox.getMaxX() + 1, srcBox.getMaxY() + 1);
314 
316  destImgGpu, srcImgGpu,
317  mainKernelSize,
318  maskKernelType,
319  maskKernelSize,
320  srcBoxConv,
321  edgePixelGpu,
322  srcPosInterpGpu.ptr, interpLength
323  );
324  int numGoodPixels = NumGoodPixels(srcPosInterp, interpLength, destWidth, destHeight, srcBoxConv);
325 
326  cudaThreadSynchronize();
327  if (cudaGetLastError() != cudaSuccess) {
328  throw LSST_EXCEPT(afwGpu::GpuRuntimeError, "GPU calculation failed to run");
329  }
330 
331  destBufImgGpu.CopyToImageBase(*dstImage.getImage());
332  destBufVarGpu.CopyToImageBase(*dstImage.getVariance());
333  destBufMskGpu.CopyToImageBase(*dstImage.getMask());
334 
335  return numGoodPixels;
336 }
337 #endif //GPU_BUILD
338 
339 // Calculate bilinear interpolation data based on given function values
340 // input:
341 // srcPosInterp - contains values of original function at a mesh of equally distanced points
342 // the values are stored in .o member
343 // interpLength - distance between points
344 // destWidth, destHeight - size of function domain
345 // output:
346 // srcPosInterp - all members are calculated and set, ready to calculate interpolation values
347 void CalculateInterpolationData(gpuDetail::GpuBuffer2D<gpu::BilinearInterp>& srcPosInterp, int interpLength,
348  int destWidth, int destHeight)
349 {
350  int const interpBlkNX = InterpBlkN(destWidth , interpLength);
351  int const interpBlkNY = InterpBlkN(destHeight, interpLength);
352 
353  for (int row = -1, rowBand = 0; rowBand < interpBlkNY - 1; row += interpLength, rowBand++) {
354  double const invInterpLen = 1.0 / interpLength;
355  double const invInterpLenRow = row + interpLength <= destHeight - 1 ?
356  invInterpLen : 1.0 / (destHeight - 1 - row);
357 
358  for (int col = -1, colBand = 0; colBand < interpBlkNX - 1; col += interpLength, colBand++) {
359 
360  const SPoint2 p11 = srcPosInterp.Pixel(colBand , rowBand ).o;
361  const SPoint2 p12 = srcPosInterp.Pixel(colBand + 1, rowBand ).o;
362  const SPoint2 p21 = srcPosInterp.Pixel(colBand , rowBand + 1).o;
363  const SPoint2 p22 = srcPosInterp.Pixel(colBand + 1, rowBand + 1).o;
364  const SVec2 band_dY = SVec2(p11, p21);
365  const SVec2 band_d0X = SVec2(p11, p12);
366  const SVec2 band_d1X = SVec2(p21, p22);
367  const SVec2 band_ddX = VecMul( VecSub(band_d1X, band_d0X), invInterpLenRow);
368 
369  double const invInterpLenCol = col + interpLength <= destWidth - 1 ?
370  invInterpLen : 1.0 / (destWidth - 1 - col);
371 
372  gpu::BilinearInterp lin = srcPosInterp.Pixel(colBand, rowBand); //sets lin.o
373  lin.deltaY = VecMul(band_dY , invInterpLenRow);
374  lin.d0X = VecMul(band_d0X, invInterpLenCol);
375  lin.ddX = VecMul(band_ddX, invInterpLenCol);
376  srcPosInterp.Pixel(colBand, rowBand) = lin;
377 
378  // partially fill the last column and row, too
379  if (colBand == interpBlkNX - 2) {
380  srcPosInterp.Pixel(interpBlkNX - 1, rowBand).deltaY =
381  VecMul( SVec2(p12, p22), invInterpLenRow);
382  }
383  if (rowBand == interpBlkNY - 2) {
384  srcPosInterp.Pixel(colBand, interpBlkNY - 1).d0X =
385  VecMul( SVec2(p21, p22), invInterpLenCol);
386  }
387  }
388  }
389 }
390 
391 } //local namespace ends
392 
393 // a part of public interface, see header file for description
394 template<typename DestImageT, typename SrcImageT>
395 std::pair<int, WarpImageGpuStatus::ReturnCode> warpImageGPU(
396  DestImageT &destImage,
397  SrcImageT const &srcImage,
398  afwMath::LanczosWarpingKernel const &lanczosKernel,
399  lsst::afw::math::SeparableKernel const &maskWarpingKernel,
400  PositionFunctor const &computeSrcPos,
401  int const interpLength,
402  typename DestImageT::SinglePixel padValue,
403  const bool forceProcessing
404 )
405 {
406  if (interpLength < 1) {
407  throw LSST_EXCEPT(pexExcept::InvalidParameterError,
408  "GPU accelerated warping must use interpolation");
409  }
410 
411  int const srcWidth = srcImage.getWidth();
412  int const srcHeight = srcImage.getHeight();
413  pexLog::TTrace<3>("lsst.afw.math.warp", "(GPU) source image width=%d; height=%d", srcWidth, srcHeight);
414 
416  throw LSST_EXCEPT(afwGpu::GpuRuntimeError, "Afw not compiled with GPU support");
417  }
418 
419 #ifdef GPU_BUILD
420  gpu::KernelType maskKernelType;
421  {
422  if (dynamic_cast<afwMath::LanczosWarpingKernel const*>(&maskWarpingKernel)) {
423  maskKernelType = gpu::KERNEL_TYPE_LANCZOS;
424  } else if (dynamic_cast<afwMath::BilinearWarpingKernel const*>(&maskWarpingKernel)) {
425  maskKernelType = gpu::KERNEL_TYPE_BILINEAR;
426  } else if (dynamic_cast<afwMath::NearestWarpingKernel const*>(&maskWarpingKernel)) {
427  maskKernelType = gpu::KERNEL_TYPE_NEAREST_NEIGHBOR;
428  } else {
429  throw LSST_EXCEPT(pexExcept::InvalidParameterError, "unknown type of mask warping kernel");
430  }
431  }
432 #endif
433 
434  if (gpuDetail::TryToSelectCudaDevice(!forceProcessing) == false) {
435  return std::pair<int, WarpImageGpuStatus::ReturnCode>(-1, WarpImageGpuStatus::NO_GPU);
436  }
437 
438  int const mainKernelSize = 2 * lanczosKernel.getOrder();
439  //do not process if the kernel is too large for allocated GPU local memory
440  if (mainKernelSize * 2 > gpu::SIZE_MAX_WARPING_KERNEL) {
441  return std::pair<int, WarpImageGpuStatus::ReturnCode>(-1, WarpImageGpuStatus::KERNEL_TOO_LARGE);
442  }
443 
444  //do not process if the interpolation data is too large to make any speed gains
445  if (!forceProcessing && interpLength < 3) {
446  return std::pair<int, WarpImageGpuStatus::ReturnCode>(-1, WarpImageGpuStatus::INTERP_LEN_TOO_SMALL);
447  }
448 
449  int const destWidth = destImage.getWidth();
450  int const destHeight = destImage.getHeight();
451  pexLog::TTrace<3>("lsst.afw.math.warp", "(GPU) remap image width=%d; height=%d", destWidth, destHeight);
452 
453  int const maxCol = destWidth - 1;
454  int const maxRow = destHeight - 1;
455 
456 #ifdef GPU_BUILD
457  // Compute borders; use to prevent applying kernel outside of srcImage
458  afwGeom::Box2I srcGoodBBox = lanczosKernel.shrinkBBox(srcImage.getBBox(afwImage::LOCAL));
459 #endif
460 
461  int const interpBlkNX = InterpBlkN(destWidth , interpLength);
462  int const interpBlkNY = InterpBlkN(destHeight, interpLength);
463  //GPU kernel input, will contain: for each interpolation block, all interpolation parameters
464  gpuDetail::GpuBuffer2D<gpu::BilinearInterp> srcPosInterp(interpBlkNX, interpBlkNY);
465 
466  // calculate values of coordinate transform function
467  for (int rowBand = 0; rowBand < interpBlkNY; rowBand++) {
468  int row = min(maxRow, (rowBand * interpLength - 1));
469  for (int colBand = 0; colBand < interpBlkNX; colBand++) {
470  int col = min(maxCol, (colBand * interpLength - 1));
471  afwGeom::Point2D srcPos = computeSrcPos(col, row);
472  SPoint2 sSrcPos(srcPos);
473  sSrcPos = MovePoint(sSrcPos, SVec2(-srcImage.getX0(), -srcImage.getY0()));
474  srcPosInterp.Pixel(colBand, rowBand).o = sSrcPos;
475  }
476  }
477 
478  CalculateInterpolationData(/*in,out*/srcPosInterp, interpLength, destWidth, destHeight);
479 
480  int numGoodPixels = 0;
481 
482  pexLog::TTrace<3>("lsst.afw.math.warp", "using GPU acceleration, remapping masked image");
483 
484 #ifdef GPU_BUILD
485  int maskKernelSize;
486  if (maskKernelType == gpu::KERNEL_TYPE_LANCZOS) {
487  maskKernelSize = 2 * dynamic_cast<afwMath::LanczosWarpingKernel const*>(&maskWarpingKernel)->getOrder();
488  } else {
489  maskKernelSize = 2;
490  }
491  numGoodPixels = WarpImageGpuWrapper(destImage,
492  srcImage,
493  mainKernelSize,
494  maskKernelType,
495  maskKernelSize,
496  srcGoodBBox,
497  srcPosInterp, interpLength, padValue
498  );
499 #endif
500  return std::pair<int, WarpImageGpuStatus::ReturnCode>(numGoodPixels, WarpImageGpuStatus::OK);
501 }
502 
503 //
504 // Explicit instantiations
505 //
507 #define MASKEDIMAGE(PIXTYPE) afwImage::MaskedImage<PIXTYPE, afwImage::MaskPixel, afwImage::VariancePixel>
508 #define IMAGE(PIXTYPE) afwImage::Image<PIXTYPE>
509 #define NL /* */
510 
511 #define INSTANTIATE(DESTIMAGEPIXELT, SRCIMAGEPIXELT) \
512  template std::pair<int,WarpImageGpuStatus::ReturnCode> warpImageGPU( \
513  IMAGE(DESTIMAGEPIXELT) &destImage, \
514  IMAGE(SRCIMAGEPIXELT) const &srcImage, \
515  afwMath::LanczosWarpingKernel const &warpingKernel, \
516  afwMath::SeparableKernel const &maskWarpingKernel, \
517  PositionFunctor const &computeSrcPos, \
518  int const interpLength, \
519  IMAGE(DESTIMAGEPIXELT)::SinglePixel padValue, \
520  const bool forceProcessing); NL \
521  template std::pair<int,WarpImageGpuStatus::ReturnCode> warpImageGPU( \
522  MASKEDIMAGE(DESTIMAGEPIXELT) &destImage, \
523  MASKEDIMAGE(SRCIMAGEPIXELT) const &srcImage, \
524  afwMath::LanczosWarpingKernel const &warpingKernel, \
525  afwMath::SeparableKernel const &maskWarpingKernel, \
526  PositionFunctor const &computeSrcPos, \
527  int const interpLength, \
528  MASKEDIMAGE(DESTIMAGEPIXELT)::SinglePixel padValue, \
529  const bool forceProcessing);
530 
531 INSTANTIATE(double, double)
532 INSTANTIATE(double, float)
533 INSTANTIATE(double, int)
534 INSTANTIATE(double, boost::uint16_t)
535 INSTANTIATE(float, float)
536 INSTANTIATE(float, int)
537 INSTANTIATE(float, boost::uint16_t)
538 INSTANTIATE(int, int)
539 INSTANTIATE(boost::uint16_t, boost::uint16_t)
541 
542 }
543 }
544 }
545 } //namespace lsst::afw::math::detail ends
int y
GPU accelerared image warping.
int getMaxY() const
Definition: Box.h:129
Declare the Kernel class and subclasses.
Class for representing an image or 2D array in general)
Definition: GpuBuffer2D.h:54
SPoint2 MovePoint(SPoint2 p, SVec2 v)
Definition: CudaLanczos.h:94
Declaration of a GPU kernel for image warping and declarations of requred datatypes.
SVec2 VecMul(SVec2 v, double m)
Definition: CudaLanczos.h:90
additional GPU exceptions
int getHeight() const
Return the number of rows in the image.
Definition: MaskedImage.h:903
#define INSTANTIATE(MATCH)
definition of the Trace messaging facilities
A kernel described by a pair of functions: func(x, y) = colFunc(x) * rowFunc(y)
Definition: Kernel.h:986
bool isInsideBox(gpu::SPoint2 p)
Definition: CudaLanczos.h:113
bool TryToSelectCudaDevice(bool noExceptions, bool reselect=false)
ImagePtr getImage(bool const noThrow=false) const
Return a (Ptr to) the MaskedImage&#39;s image.
Definition: MaskedImage.h:869
void WarpImageGpuCallKernel(bool isMaskedImage, ImageDataPtr< DestPixelT > destImageGpu, ImageDataPtr< SrcPixelT > srcImageGpu, int mainKernelSize, KernelType maskKernelType, int maskKernelSize, SBox2I srcGoodBox, PixelIVM< DestPixelT > edgePixel, BilinearInterp *srcPosInterp, int interpLength)
Calls the GPU kernel for lanczos resampling.
defines a 2D range of integer values begX &lt;= x &lt; endX, begY &lt;= y &lt; endY
Definition: CudaLanczos.h:101
contains GpuBuffer2D class (for simple handling of images or 2D arrays)
An integer coordinate rectangle.
Definition: Box.h:53
VariancePtr getVariance(bool const noThrow=false) const
Return a (Ptr to) the MaskedImage&#39;s variance.
Definition: MaskedImage.h:890
table::Key< table::Array< Kernel::Pixel > > image
Definition: FixedKernel.cc:117
int getOrder() const
get the order of the kernel
Base class to transform pixel position for a destination image to its position in the original source...
void ImageT ImageT int float saturatedPixelValue int const width
Definition: saturated.cc:44
Simple 2D point (suitable for use on a GPU)
Definition: CudaLanczos.h:56
int getMinY() const
Definition: Box.h:125
MaskPtr getMask(bool const noThrow=false) const
Return a (Ptr to) the MaskedImage&#39;s mask.
Definition: MaskedImage.h:879
A class to manipulate images, masks, and variance as a single object.
Definition: MaskedImage.h:77
bool isGpuBuild()
Inline function which returns true only when GPU_BUILD macro is defined.
Definition: IsGpuBuild.h:45
int getMinX() const
Definition: Box.h:124
int getHeight() const
Return the number of rows in the image.
Definition: Image.h:239
std::pair< int, WarpImageGpuStatus::ReturnCode > warpImageGPU(DestImageT &destImage, SrcImageT const &srcImage, lsst::afw::math::LanczosWarpingKernel const &warpingKernel, lsst::afw::math::SeparableKernel const &maskWarpingKernel, PositionFunctor const &computeSrcPos, int const interpLength, typename DestImageT::SinglePixel padValue, const bool forceProcessing=true)
GPU accelerated image warping using Lanczos resampling.
ImageT::SinglePixel edgePixel(lsst::afw::image::detail::Image_tag)
Return an off-the-edge pixel appropriate for a given Image type.
double x
void ImageT ImageT int float saturatedPixelValue int const height
Definition: saturated.cc:44
Simple 2D vector (suitable for use on a GPU)
Definition: CudaLanczos.h:69
Functions to help managing setup for GPU kernels.
int row
Definition: CR.cc:153
#define LSST_EXCEPT(type,...)
Definition: Exception.h:46
Support for warping an image to a new WCS.
PixelT & Pixel(int x, int y)
Definition: GpuBuffer2D.h:133
int getWidth() const
Return the number of columns in the image.
Definition: MaskedImage.h:901
Lanczos warping: accurate but slow and can introduce ringing artifacts.
Definition: warpExposure.h:72
Implementation of the Class MaskedImage.
int getMaxX() const
Definition: Box.h:128
SVec2 VecSub(SVec2 a, SVec2 b)
Definition: CudaLanczos.h:86
int getWidth() const
Return the number of columns in the image.
Definition: Image.h:237
Functions and a class to help allocating GPU global memory and transferring data to and from a GPU...
A class to represent a 2-dimensional array of pixels.
Definition: Image.h:415
int col
Definition: CR.cc:152
lsst::afw::geom::Box2I shrinkBBox(lsst::afw::geom::Box2I const &bbox) const
Definition: Kernel.cc:207
A function to determine whether compiling for GPU is enabled.