LSSTApplications  11.0-13-gbb96280,12.1.rc1,12.1.rc1+1,12.1.rc1+2,12.1.rc1+5,12.1.rc1+8,12.1.rc1-1-g06d7636+1,12.1.rc1-1-g253890b+5,12.1.rc1-1-g3d31b68+7,12.1.rc1-1-g3db6b75+1,12.1.rc1-1-g5c1385a+3,12.1.rc1-1-g83b2247,12.1.rc1-1-g90cb4cf+6,12.1.rc1-1-g91da24b+3,12.1.rc1-2-g3521f8a,12.1.rc1-2-g39433dd+4,12.1.rc1-2-g486411b+2,12.1.rc1-2-g4c2be76,12.1.rc1-2-gc9c0491,12.1.rc1-2-gda2cd4f+6,12.1.rc1-3-g3391c73+2,12.1.rc1-3-g8c1bd6c+1,12.1.rc1-3-gcf4b6cb+2,12.1.rc1-4-g057223e+1,12.1.rc1-4-g19ed13b+2,12.1.rc1-4-g30492a7
LSSTDataManagementBasePackage
cudaLanczosWrapper.cc
Go to the documentation of this file.
1 // -*- LSST-C++ -*- // fixed format comment for emacs
2 
3 /*
4  * LSST Data Management System
5  * Copyright 2008 - 2012 LSST Corporation.
6  *
7  * This product includes software developed by the
8  * LSST Project (http://www.lsst.org/).
9  *
10  * This program is free software: you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation, either version 3 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the LSST License Statement and
21  * the GNU General Public License along with this program. If not,
22  * see <http://www.lsstcorp.org/LegalNotices/>.
23  */
24 
35 #ifdef GPU_BUILD
36 #include <cuda.h>
37 #include <cuda_runtime.h>
38 #endif
39 
40 #include <cstdint>
41 
42 #include "lsst/afw/geom/Box.h"
43 
44 #include "lsst/afw/math/Kernel.h"
46 #include "lsst/log/Log.h"
48 #include "lsst/afw/image/Wcs.h"
49 
55 
58 
59 using namespace std;
63 
64 
65 namespace mathDetail = lsst::afw::math::detail;
66 namespace gpuDetail = lsst::afw::gpu::detail;
67 namespace afwMath = lsst::afw::math;
68 namespace afwGpu = lsst::afw::gpu;
69 namespace afwImage = lsst::afw::image;
70 namespace pexExcept = lsst::pex::exceptions;
71 namespace afwGeom = lsst::afw::geom;
72 
73 namespace lsst {
74 namespace afw {
75 namespace math {
76 namespace detail {
77 
78 namespace {
79 
80 int CeilDivide(int num, int divisor)
81 {
82  return (num + divisor - 1) / divisor;
83 }
84 
85 // get the number of interpolation blocks given an image dimension
86 int InterpBlkN(int size , int interpLength)
87 {
88  return CeilDivide(size , interpLength) + 1;
89 }
90 
91 // calculate the interpolated value given the data for linear interpolation
92 gpu::SPoint2 GetInterpolatedValue(afwGpu::detail::GpuBuffer2D<gpu::BilinearInterp> const & interpBuf,
93  int blkX, int blkY, int subX, int subY
94  )
95 {
96  gpu::BilinearInterp interp = interpBuf.Pixel(blkX, blkY);
97  return interp.Interpolate(subX, subY);
98 }
99 
100 // calculate the interpolated value given the data for linear interpolation
101 gpu::SPoint2 GetInterpolatedValue(afwGpu::detail::GpuBuffer2D<gpu::BilinearInterp> const & interpBuf,
102  int interpLen, int x, int y
103  )
104 {
105  int blkX = x / interpLen;
106  int blkY = y / interpLen;
107 
108  int subX = x % interpLen;
109  int subY = y % interpLen;
110 
111  return GetInterpolatedValue(interpBuf, blkX, blkY, subX, subY);
112 }
113 
114 // calculate the number of points falling within the srcGoodBox,
115 // given a bilinearily interpolated coordinate transform function on integer range [0,width> x [0, height>
116 int NumGoodPixels(afwGpu::detail::GpuBuffer2D<gpu::BilinearInterp> const & interpBuf,
117  int const interpLen, int const width, int const height, SBox2I srcGoodBox)
118 {
119  int cnt = 0;
120 
121  int subY = 1, blkY = 0;
122  for (int row = 0; row < height; row++, subY++) {
123  if (subY >= interpLen) {
124  subY -= interpLen;
125  blkY++;
126  }
127 
128  int subX = 1, blkX = 0;
129  gpu::BilinearInterp interp = interpBuf.Pixel(blkX, blkY);
130  gpu::LinearInterp lineY = interp.GetLinearInterp(subY);
131 
132  for (int col = 0; col < width; col++, subX++) {
133  if (subX >= interpLen) {
134  subX -= interpLen;
135  blkX++;
136  interp = interpBuf.Pixel(blkX, blkY);
137  lineY = interp.GetLinearInterp(subY);
138  }
139  gpu::SPoint2 srcPos = lineY.Interpolate(subX);
140  if (srcGoodBox.isInsideBox(srcPos)) {
141  cnt++;
142  }
143  }
144  }
145  return cnt;
146 }
147 
148 #ifdef GPU_BUILD
149 // for (plain) Image::
150 // allocate CPU and GPU buffers, transfer data and call GPU kernel proxy
151 // precondition: order*2 < gpu::SIZE_MAX_WARPING_KERNEL
152 template< typename DestPixelT, typename SrcPixelT>
153 int WarpImageGpuWrapper(
154  afwImage::Image<DestPixelT> &destImage,
155  afwImage::Image<SrcPixelT> const &srcImage,
156  int mainKernelSize,
157  gpu::KernelType maskKernelType,
158  int maskKernelSize,
159  const lsst::afw::geom::Box2I srcBox,
161  int const interpLength,
163 )
164 {
165  typedef typename afwImage::Image<DestPixelT> DestImageT;
166 
167  typename DestImageT::SinglePixel const edgePixel = padValue;
168 
169  gpu::PixelIVM<DestPixelT> edgePixelGpu;
170  edgePixelGpu.img = edgePixel;
171  edgePixelGpu.var = -1;
172  edgePixelGpu.msk = -1;
173 
174  int const destWidth = destImage.getWidth();
175  int const destHeight = destImage.getHeight();
176  gpuDetail::GpuMemOwner<DestPixelT> destBufImgGpu;
177  gpuDetail::GpuMemOwner<SrcPixelT> srcBufImgGpu;
178  gpuDetail::GpuMemOwner<gpu::BilinearInterp> srcPosInterpGpu;
179 
180  gpu::ImageDataPtr<DestPixelT> destImgGpu;
181  destImgGpu.strideImg = destBufImgGpu.AllocImageBaseBuffer(destImage);
182  if (destBufImgGpu.ptr == NULL) {
183  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for output image");
184  }
185  destImgGpu.img = destBufImgGpu.ptr;
186  destImgGpu.var = NULL;
187  destImgGpu.msk = NULL;
188  destImgGpu.width = destWidth;
189  destImgGpu.height = destHeight;
190 
191  gpu::ImageDataPtr<SrcPixelT> srcImgGpu;
192  srcImgGpu.strideImg = srcBufImgGpu.TransferFromImageBase(srcImage);
193  if (srcBufImgGpu.ptr == NULL) {
194  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for input image");
195  }
196  srcImgGpu.img = srcBufImgGpu.ptr;
197  srcImgGpu.var = NULL;
198  srcImgGpu.msk = NULL;
199  srcImgGpu.width = srcImage.getWidth();
200  srcImgGpu.height = srcImage.getHeight();
201 
202  srcPosInterpGpu.Transfer(srcPosInterp);
203  if (srcPosInterpGpu.ptr == NULL) {
204  throw LSST_EXCEPT(afwGpu::GpuMemoryError,
205  "Not enough memory on GPU for interpolation data for coorinate transformation");
206  }
207 
208  SBox2I srcBoxConv(srcBox.getMinX(), srcBox.getMinY(), srcBox.getMaxX() + 1, srcBox.getMaxY() + 1);
209 
211  destImgGpu, srcImgGpu,
212  mainKernelSize,
213  maskKernelType,
214  maskKernelSize,
215  srcBoxConv,
216  edgePixelGpu,
217  srcPosInterpGpu.ptr, interpLength
218  );
219 
220  int numGoodPixels = NumGoodPixels(srcPosInterp, interpLength, destWidth, destHeight, srcBoxConv);
221 
222  cudaThreadSynchronize();
223  if (cudaGetLastError() != cudaSuccess) {
224  throw LSST_EXCEPT(afwGpu::GpuRuntimeError, "GPU calculation failed to run");
225  }
226 
227  destBufImgGpu.CopyToImageBase(destImage);
228  return numGoodPixels;
229 }
230 
231 // for MaskedImage::
232 // allocate CPU and GPU buffers, transfer data and call GPU kernel proxy
233 // precondition: order*2 < gpu::SIZE_MAX_WARPING_KERNEL
234 template< typename DestPixelT, typename SrcPixelT>
235 int WarpImageGpuWrapper(
237  afwImage::MaskedImage<SrcPixelT>const &srcImage,
238  int mainKernelSize,
239  gpu::KernelType maskKernelType,
240  int maskKernelSize,
241  const lsst::afw::geom::Box2I srcBox,
243  int const interpLength,
245 )
246 {
247  typedef typename afwImage::MaskedImage<DestPixelT> DestImageT;
248 
249  typename DestImageT::SinglePixel const edgePixel = padValue;
250 
251  gpu::PixelIVM<DestPixelT> edgePixelGpu;
252  edgePixelGpu.img = edgePixel.image();
253  edgePixelGpu.var = edgePixel.variance();
254  edgePixelGpu.msk = edgePixel.mask();
255 
256  int const destWidth = dstImage.getWidth();
257  int const destHeight = dstImage.getHeight();
258 
259  gpuDetail::GpuMemOwner<DestPixelT> destBufImgGpu;
260  gpuDetail::GpuMemOwner<gpu::VarPixel> destBufVarGpu;
261  gpuDetail::GpuMemOwner<gpu::MskPixel> destBufMskGpu;
262 
263  gpuDetail::GpuMemOwner<SrcPixelT> srcBufImgGpu;
264  gpuDetail::GpuMemOwner<gpu::VarPixel> srcBufVarGpu;
265  gpuDetail::GpuMemOwner<gpu::MskPixel> srcBufMskGpu;
266 
267  gpuDetail::GpuMemOwner<gpu::BilinearInterp> srcPosInterpGpu;
268 
269  mathDetail::gpu::ImageDataPtr<DestPixelT> destImgGpu;
270  destImgGpu.strideImg = destBufImgGpu.AllocImageBaseBuffer(*dstImage.getImage());
271  destImgGpu.strideVar = destBufVarGpu.AllocImageBaseBuffer(*dstImage.getVariance());
272  destImgGpu.strideMsk = destBufMskGpu.AllocImageBaseBuffer(*dstImage.getMask());
273  if (destBufImgGpu.ptr == NULL) {
274  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for output image");
275  }
276  if (destBufVarGpu.ptr == NULL) {
277  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for output variance");
278  }
279  if (destBufMskGpu.ptr == NULL) {
280  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for output mask");
281  }
282  destImgGpu.img = destBufImgGpu.ptr;
283  destImgGpu.var = destBufVarGpu.ptr;
284  destImgGpu.msk = destBufMskGpu.ptr;
285  destImgGpu.width = destWidth;
286  destImgGpu.height = destHeight;
287 
288  gpu::ImageDataPtr<SrcPixelT> srcImgGpu;
289  srcImgGpu.strideImg = srcBufImgGpu.TransferFromImageBase(*srcImage.getImage());
290  if (srcBufImgGpu.ptr == NULL) {
291  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for input image");
292  }
293  srcImgGpu.strideVar = srcBufVarGpu.TransferFromImageBase(*srcImage.getVariance());
294  if (srcBufVarGpu.ptr == NULL) {
295  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for input variance");
296  }
297  srcImgGpu.strideMsk = srcBufMskGpu.TransferFromImageBase(*srcImage.getMask());
298  if (srcBufMskGpu.ptr == NULL) {
299  throw LSST_EXCEPT(afwGpu::GpuMemoryError, "Not enough memory on GPU for input mask");
300  }
301 
302  srcImgGpu.img = srcBufImgGpu.ptr;
303  srcImgGpu.var = srcBufVarGpu.ptr;
304  srcImgGpu.msk = srcBufMskGpu.ptr;
305  srcImgGpu.width = srcImage.getWidth();
306  srcImgGpu.height = srcImage.getHeight();
307 
308  srcPosInterpGpu.Transfer(srcPosInterp);
309  if (srcPosInterpGpu.ptr == NULL) {
310  throw LSST_EXCEPT(afwGpu::GpuMemoryError,
311  "Not enough memory on GPU for interpolation data for coorinate transformation");
312  }
313 
314  SBox2I srcBoxConv(srcBox.getMinX(), srcBox.getMinY(), srcBox.getMaxX() + 1, srcBox.getMaxY() + 1);
315 
317  destImgGpu, srcImgGpu,
318  mainKernelSize,
319  maskKernelType,
320  maskKernelSize,
321  srcBoxConv,
322  edgePixelGpu,
323  srcPosInterpGpu.ptr, interpLength
324  );
325  int numGoodPixels = NumGoodPixels(srcPosInterp, interpLength, destWidth, destHeight, srcBoxConv);
326 
327  cudaThreadSynchronize();
328  if (cudaGetLastError() != cudaSuccess) {
329  throw LSST_EXCEPT(afwGpu::GpuRuntimeError, "GPU calculation failed to run");
330  }
331 
332  destBufImgGpu.CopyToImageBase(*dstImage.getImage());
333  destBufVarGpu.CopyToImageBase(*dstImage.getVariance());
334  destBufMskGpu.CopyToImageBase(*dstImage.getMask());
335 
336  return numGoodPixels;
337 }
338 #endif //GPU_BUILD
339 
340 // Calculate bilinear interpolation data based on given function values
341 // input:
342 // srcPosInterp - contains values of original function at a mesh of equally distanced points
343 // the values are stored in .o member
344 // interpLength - distance between points
345 // destWidth, destHeight - size of function domain
346 // output:
347 // srcPosInterp - all members are calculated and set, ready to calculate interpolation values
348 void CalculateInterpolationData(gpuDetail::GpuBuffer2D<gpu::BilinearInterp>& srcPosInterp, int interpLength,
349  int destWidth, int destHeight)
350 {
351  int const interpBlkNX = InterpBlkN(destWidth , interpLength);
352  int const interpBlkNY = InterpBlkN(destHeight, interpLength);
353 
354  for (int row = -1, rowBand = 0; rowBand < interpBlkNY - 1; row += interpLength, rowBand++) {
355  double const invInterpLen = 1.0 / interpLength;
356  double const invInterpLenRow = row + interpLength <= destHeight - 1 ?
357  invInterpLen : 1.0 / (destHeight - 1 - row);
358 
359  for (int col = -1, colBand = 0; colBand < interpBlkNX - 1; col += interpLength, colBand++) {
360 
361  const SPoint2 p11 = srcPosInterp.Pixel(colBand , rowBand ).o;
362  const SPoint2 p12 = srcPosInterp.Pixel(colBand + 1, rowBand ).o;
363  const SPoint2 p21 = srcPosInterp.Pixel(colBand , rowBand + 1).o;
364  const SPoint2 p22 = srcPosInterp.Pixel(colBand + 1, rowBand + 1).o;
365  const SVec2 band_dY = SVec2(p11, p21);
366  const SVec2 band_d0X = SVec2(p11, p12);
367  const SVec2 band_d1X = SVec2(p21, p22);
368  const SVec2 band_ddX = VecMul( VecSub(band_d1X, band_d0X), invInterpLenRow);
369 
370  double const invInterpLenCol = col + interpLength <= destWidth - 1 ?
371  invInterpLen : 1.0 / (destWidth - 1 - col);
372 
373  gpu::BilinearInterp lin = srcPosInterp.Pixel(colBand, rowBand); //sets lin.o
374  lin.deltaY = VecMul(band_dY , invInterpLenRow);
375  lin.d0X = VecMul(band_d0X, invInterpLenCol);
376  lin.ddX = VecMul(band_ddX, invInterpLenCol);
377  srcPosInterp.Pixel(colBand, rowBand) = lin;
378 
379  // partially fill the last column and row, too
380  if (colBand == interpBlkNX - 2) {
381  srcPosInterp.Pixel(interpBlkNX - 1, rowBand).deltaY =
382  VecMul( SVec2(p12, p22), invInterpLenRow);
383  }
384  if (rowBand == interpBlkNY - 2) {
385  srcPosInterp.Pixel(colBand, interpBlkNY - 1).d0X =
386  VecMul( SVec2(p21, p22), invInterpLenCol);
387  }
388  }
389  }
390 }
391 
392 } //local namespace ends
393 
394 // a part of public interface, see header file for description
395 template<typename DestImageT, typename SrcImageT>
396 std::pair<int, WarpImageGpuStatus::ReturnCode> warpImageGPU(
397  DestImageT &destImage,
398  SrcImageT const &srcImage,
399  afwMath::LanczosWarpingKernel const &lanczosKernel,
400  lsst::afw::math::SeparableKernel const &maskWarpingKernel,
401  PositionFunctor const &computeSrcPos,
402  int const interpLength,
403  typename DestImageT::SinglePixel padValue,
404  const bool forceProcessing
405 )
406 {
407  if (interpLength < 1) {
408  throw LSST_EXCEPT(pexExcept::InvalidParameterError,
409  "GPU accelerated warping must use interpolation");
410  }
411 
412  int const srcWidth = srcImage.getWidth();
413  int const srcHeight = srcImage.getHeight();
414  LOGL_DEBUG("TRACE2.afw.math.warp", "(GPU) source image width=%d; height=%d", srcWidth, srcHeight);
415 
417  throw LSST_EXCEPT(afwGpu::GpuRuntimeError, "Afw not compiled with GPU support");
418  }
419 
420 #ifdef GPU_BUILD
421  gpu::KernelType maskKernelType;
422  {
423  if (dynamic_cast<afwMath::LanczosWarpingKernel const*>(&maskWarpingKernel)) {
424  maskKernelType = gpu::KERNEL_TYPE_LANCZOS;
425  } else if (dynamic_cast<afwMath::BilinearWarpingKernel const*>(&maskWarpingKernel)) {
426  maskKernelType = gpu::KERNEL_TYPE_BILINEAR;
427  } else if (dynamic_cast<afwMath::NearestWarpingKernel const*>(&maskWarpingKernel)) {
428  maskKernelType = gpu::KERNEL_TYPE_NEAREST_NEIGHBOR;
429  } else {
430  throw LSST_EXCEPT(pexExcept::InvalidParameterError, "unknown type of mask warping kernel");
431  }
432  }
433 #endif
434 
435  if (gpuDetail::TryToSelectCudaDevice(!forceProcessing) == false) {
436  return std::pair<int, WarpImageGpuStatus::ReturnCode>(-1, WarpImageGpuStatus::NO_GPU);
437  }
438 
439  int const mainKernelSize = 2 * lanczosKernel.getOrder();
440  //do not process if the kernel is too large for allocated GPU local memory
441  if (mainKernelSize * 2 > gpu::SIZE_MAX_WARPING_KERNEL) {
442  return std::pair<int, WarpImageGpuStatus::ReturnCode>(-1, WarpImageGpuStatus::KERNEL_TOO_LARGE);
443  }
444 
445  //do not process if the interpolation data is too large to make any speed gains
446  if (!forceProcessing && interpLength < 3) {
447  return std::pair<int, WarpImageGpuStatus::ReturnCode>(-1, WarpImageGpuStatus::INTERP_LEN_TOO_SMALL);
448  }
449 
450  int const destWidth = destImage.getWidth();
451  int const destHeight = destImage.getHeight();
452  LOGL_DEBUG("TRACE2.afw.math.warp", "(GPU) remap image width=%d; height=%d", destWidth, destHeight);
453 
454  int const maxCol = destWidth - 1;
455  int const maxRow = destHeight - 1;
456 
457 #ifdef GPU_BUILD
458  // Compute borders; use to prevent applying kernel outside of srcImage
459  afwGeom::Box2I srcGoodBBox = lanczosKernel.shrinkBBox(srcImage.getBBox(afwImage::LOCAL));
460 #endif
461 
462  int const interpBlkNX = InterpBlkN(destWidth , interpLength);
463  int const interpBlkNY = InterpBlkN(destHeight, interpLength);
464  //GPU kernel input, will contain: for each interpolation block, all interpolation parameters
465  gpuDetail::GpuBuffer2D<gpu::BilinearInterp> srcPosInterp(interpBlkNX, interpBlkNY);
466 
467  // calculate values of coordinate transform function
468  for (int rowBand = 0; rowBand < interpBlkNY; rowBand++) {
469  int row = min(maxRow, (rowBand * interpLength - 1));
470  for (int colBand = 0; colBand < interpBlkNX; colBand++) {
471  int col = min(maxCol, (colBand * interpLength - 1));
472  afwGeom::Point2D srcPos = computeSrcPos(col, row);
473  SPoint2 sSrcPos(srcPos);
474  sSrcPos = MovePoint(sSrcPos, SVec2(-srcImage.getX0(), -srcImage.getY0()));
475  srcPosInterp.Pixel(colBand, rowBand).o = sSrcPos;
476  }
477  }
478 
479  CalculateInterpolationData(/*in,out*/srcPosInterp, interpLength, destWidth, destHeight);
480 
481  int numGoodPixels = 0;
482 
483  LOGL_DEBUG("TRACE2.afw.math.warp", "using GPU acceleration, remapping masked image");
484 
485 #ifdef GPU_BUILD
486  int maskKernelSize;
487  if (maskKernelType == gpu::KERNEL_TYPE_LANCZOS) {
488  maskKernelSize = 2 * dynamic_cast<afwMath::LanczosWarpingKernel const*>(&maskWarpingKernel)->getOrder();
489  } else {
490  maskKernelSize = 2;
491  }
492  numGoodPixels = WarpImageGpuWrapper(destImage,
493  srcImage,
494  mainKernelSize,
495  maskKernelType,
496  maskKernelSize,
497  srcGoodBBox,
498  srcPosInterp, interpLength, padValue
499  );
500 #endif
501  return std::pair<int, WarpImageGpuStatus::ReturnCode>(numGoodPixels, WarpImageGpuStatus::OK);
502 }
503 
504 //
505 // Explicit instantiations
506 //
508 #define MASKEDIMAGE(PIXTYPE) afwImage::MaskedImage<PIXTYPE, afwImage::MaskPixel, afwImage::VariancePixel>
509 #define IMAGE(PIXTYPE) afwImage::Image<PIXTYPE>
510 #define NL /* */
511 
512 #define INSTANTIATE(DESTIMAGEPIXELT, SRCIMAGEPIXELT) \
513  template std::pair<int,WarpImageGpuStatus::ReturnCode> warpImageGPU( \
514  IMAGE(DESTIMAGEPIXELT) &destImage, \
515  IMAGE(SRCIMAGEPIXELT) const &srcImage, \
516  afwMath::LanczosWarpingKernel const &warpingKernel, \
517  afwMath::SeparableKernel const &maskWarpingKernel, \
518  PositionFunctor const &computeSrcPos, \
519  int const interpLength, \
520  IMAGE(DESTIMAGEPIXELT)::SinglePixel padValue, \
521  const bool forceProcessing); NL \
522  template std::pair<int,WarpImageGpuStatus::ReturnCode> warpImageGPU( \
523  MASKEDIMAGE(DESTIMAGEPIXELT) &destImage, \
524  MASKEDIMAGE(SRCIMAGEPIXELT) const &srcImage, \
525  afwMath::LanczosWarpingKernel const &warpingKernel, \
526  afwMath::SeparableKernel const &maskWarpingKernel, \
527  PositionFunctor const &computeSrcPos, \
528  int const interpLength, \
529  MASKEDIMAGE(DESTIMAGEPIXELT)::SinglePixel padValue, \
530  const bool forceProcessing);
531 
532 INSTANTIATE(double, double)
533 INSTANTIATE(double, float)
534 INSTANTIATE(double, int)
535 INSTANTIATE(double, std::uint16_t)
536 INSTANTIATE(float, float)
537 INSTANTIATE(float, int)
538 INSTANTIATE(float, std::uint16_t)
539 INSTANTIATE(int, int)
540 INSTANTIATE(std::uint16_t, std::uint16_t)
542 
543 }
544 }
545 }
546 } //namespace lsst::afw::math::detail ends
int y
bool TryToSelectCudaDevice(bool noExceptions, bool reselect)
GPU accelerared image warping.
int getMaxY() const
Definition: Box.h:129
Declare the Kernel class and subclasses.
Class for representing an image or 2D array in general)
Definition: GpuBuffer2D.h:54
SPoint2 MovePoint(SPoint2 p, SVec2 v)
Definition: CudaLanczos.h:94
Declaration of a GPU kernel for image warping and declarations of requred datatypes.
#define LOGL_DEBUG(logger, message...)
Definition: Log.h:513
SVec2 VecMul(SVec2 v, double m)
Definition: CudaLanczos.h:90
additional GPU exceptions
int getHeight() const
Return the number of rows in the image.
Definition: MaskedImage.h:909
A kernel described by a pair of functions: func(x, y) = colFunc(x) * rowFunc(y)
Definition: Kernel.h:983
bool isInsideBox(gpu::SPoint2 p)
Definition: CudaLanczos.h:113
ImagePtr getImage(bool const noThrow=false) const
Return a (Ptr to) the MaskedImage&#39;s image.
Definition: MaskedImage.h:875
void WarpImageGpuCallKernel(bool isMaskedImage, ImageDataPtr< DestPixelT > destImageGpu, ImageDataPtr< SrcPixelT > srcImageGpu, int mainKernelSize, KernelType maskKernelType, int maskKernelSize, SBox2I srcGoodBox, PixelIVM< DestPixelT > edgePixel, BilinearInterp *srcPosInterp, int interpLength)
Calls the GPU kernel for lanczos resampling.
defines a 2D range of integer values begX &lt;= x &lt; endX, begY &lt;= y &lt; endY
Definition: CudaLanczos.h:101
contains GpuBuffer2D class (for simple handling of images or 2D arrays)
#define INSTANTIATE(T)
An integer coordinate rectangle.
Definition: Box.h:53
VariancePtr getVariance(bool const noThrow=false) const
Return a (Ptr to) the MaskedImage&#39;s variance.
Definition: MaskedImage.h:896
table::Key< table::Array< Kernel::Pixel > > image
Definition: FixedKernel.cc:117
int getOrder() const
get the order of the kernel
std::pair< int, WarpImageGpuStatus::ReturnCode > warpImageGPU(DestImageT &destImage, SrcImageT const &srcImage, afwMath::LanczosWarpingKernel const &lanczosKernel, lsst::afw::math::SeparableKernel const &maskWarpingKernel, PositionFunctor const &computeSrcPos, int const interpLength, typename DestImageT::SinglePixel padValue, const bool forceProcessing)
GPU accelerated image warping using Lanczos resampling.
Base class to transform pixel position for a destination image to its position in the original source...
Simple 2D point (suitable for use on a GPU)
Definition: CudaLanczos.h:56
int getMinY() const
Definition: Box.h:125
MaskPtr getMask(bool const noThrow=false) const
Return a (Ptr to) the MaskedImage&#39;s mask.
Definition: MaskedImage.h:885
A class to manipulate images, masks, and variance as a single object.
Definition: MaskedImage.h:78
bool isGpuBuild()
Inline function which returns true only when GPU_BUILD macro is defined.
Definition: IsGpuBuild.h:45
int getMinX() const
Definition: Box.h:124
int getHeight() const
Return the number of rows in the image.
Definition: Image.h:241
ImageT::SinglePixel edgePixel(lsst::afw::image::detail::Image_tag)
Return an off-the-edge pixel appropriate for a given Image type.
double x
Simple 2D vector (suitable for use on a GPU)
Definition: CudaLanczos.h:69
Functions to help managing setup for GPU kernels.
int row
Definition: CR.cc:159
#define LSST_EXCEPT(type,...)
Definition: Exception.h:46
Support for warping an image to a new WCS.
PixelT & Pixel(int x, int y)
Definition: GpuBuffer2D.h:133
int getWidth() const
Return the number of columns in the image.
Definition: MaskedImage.h:907
Lanczos warping: accurate but slow and can introduce ringing artifacts.
Definition: warpExposure.h:71
Implementation of the Class MaskedImage.
int getMaxX() const
Definition: Box.h:128
SVec2 VecSub(SVec2 a, SVec2 b)
Definition: CudaLanczos.h:86
int getWidth() const
Return the number of columns in the image.
Definition: Image.h:239
Functions and a class to help allocating GPU global memory and transferring data to and from a GPU...
A class to represent a 2-dimensional array of pixels.
Definition: PSF.h:43
int col
Definition: CR.cc:158
lsst::afw::geom::Box2I shrinkBBox(lsst::afw::geom::Box2I const &bbox) const
Definition: Kernel.cc:207
A function to determine whether compiling for GPU is enabled.