LSSTApplications  10.0+286,10.0+36,10.0+46,10.0-2-g4f67435,10.1+152,10.1+37,11.0,11.0+1,11.0-1-g47edd16,11.0-1-g60db491,11.0-1-g7418c06,11.0-2-g04d2804,11.0-2-g68503cd,11.0-2-g818369d,11.0-2-gb8b8ce7
LSSTDataManagementBasePackage
BasicConvolveGPU.cc
Go to the documentation of this file.
1 // -*- LSST-C++ -*-
2 
3 /*
4  * LSST Data Management System
5  * Copyright 2008, 2009, 2010 LSST Corporation.
6  *
7  * This product includes software developed by the
8  * LSST Project (http://www.lsst.org/).
9  *
10  * This program is free software: you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation, either version 3 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the LSST License Statement and
21  * the GNU General Public License along with this program. If not,
22  * see <http://www.lsstcorp.org/LegalNotices/>.
23  */
24 
37 #include <algorithm>
38 #include <cmath>
39 #include <sstream>
40 #include <vector>
41 
42 #include "boost/cstdint.hpp"
43 
44 #include "lsst/pex/exceptions.h"
45 #include "lsst/pex/logging/Trace.h"
48 #include "lsst/afw/math/Kernel.h"
50 #include "lsst/afw/geom.h"
53 
61 
62 namespace pexExcept = lsst::pex::exceptions;
63 namespace pexLog = lsst::pex::logging;
64 namespace afwGeom = lsst::afw::geom;
65 namespace afwImage = lsst::afw::image;
66 namespace afwMath = lsst::afw::math;
67 namespace afwGpu = lsst::afw::gpu;
68 namespace mathDetail = lsst::afw::math::detail;
69 namespace gpuDetail = lsst::afw::gpu::detail;
70 
71 namespace {
72 
76 
77 // copies data from MaskedImage to three image buffers
78 template <typename PixelT>
79 void CopyFromMaskedImage(afwImage::MaskedImage<PixelT, MskPixel, VarPixel> const& image,
83  )
84 {
85  int width = image.getWidth();
86  int height = image.getHeight();
87  img.Init(width, height);
88  var.Init(width, height);
89  msk.Init(width, height);
90 
92  ::x_iterator x_iterator;
93 
94  //copy input image data to buffer
95  for (int i = 0; i < height; ++i) {
96  x_iterator inPtr = image.x_at(0, i);
97  PixelT* imageDataPtr = img.GetImgLinePtr(i);
98  MskPixel* imageMaskPtr = msk.GetImgLinePtr(i);
99  VarPixel* imageVarPtr = var.GetImgLinePtr(i);
100 
101  for (x_iterator cnvEnd = inPtr + width; inPtr != cnvEnd;
102  ++inPtr, ++imageDataPtr, ++imageMaskPtr, ++imageVarPtr ) {
103  *imageDataPtr = (*inPtr).image();
104  *imageMaskPtr = (*inPtr).mask();
105  *imageVarPtr = (*inPtr).variance();
106  }
107  }
108 }
109 
110 // copies data from three image buffers to MaskedImage
111 template <typename PixelT>
112 void CopyToImage(afwImage::MaskedImage<PixelT, MskPixel, VarPixel>& outImage,
113  int startX, int startY,
117  )
118 {
119  assert(img.height == var.height);
120  assert(img.height == msk.height);
121  assert(img.width == var.width);
122  assert(img.width == msk.width);
123 
125  ::x_iterator x_iterator;
126 
127  for (int i = 0; i < img.height; ++i) {
128  const PixelT* outPtrImg = img.GetImgLinePtr(i);
129  const MskPixel* outPtrMsk = msk.GetImgLinePtr(i);
130  const VarPixel* outPtrVar = var.GetImgLinePtr(i);
131 
132  for (x_iterator cnvPtr = outImage.x_at(startX, i + startY),
133  cnvEnd = cnvPtr + img.width; cnvPtr != cnvEnd; ++cnvPtr )
134  {
135  *cnvPtr = typename x_iterator::type(*outPtrImg, *outPtrMsk, *outPtrVar);
136  ++outPtrImg;
137  ++outPtrMsk;
138  ++outPtrVar;
139  }
140  }
141 }
142 
143 } // anonymous namespace
144 
169 template <typename OutImageT, typename InImageT>
171  OutImageT &convolvedImage,
172  InImageT const& inImage,
173  afwMath::Kernel const& kernel,
174  afwMath::ConvolutionControl const& convolutionControl)
175 {
176  if (!afwGpu::isGpuBuild()) {
177  throw LSST_EXCEPT(afwGpu::GpuRuntimeError, "Afw not compiled with GPU support");
178  }
179 
180  // Because convolve isn't a method of Kernel we can't always use Kernel's vtbl to dynamically
181  // dispatch the correct version of basicConvolve. The case that fails is convolving with a kernel
182  // obtained from a pointer or reference to a Kernel (base class), e.g. as used in LinearCombinationKernel.
185  } else if (IS_INSTANCE(kernel, afwMath::SeparableKernel)) {
187  } else if (IS_INSTANCE(kernel, afwMath::LinearCombinationKernel) && kernel.isSpatiallyVarying()) {
188  pexLog::TTrace<4>("lsst.afw.math.convolve",
189  "generic basicConvolve (GPU): dispatch to convolveLinearCombinationGPU");
190  return mathDetail::convolveLinearCombinationGPU(convolvedImage, inImage,
191  *dynamic_cast<afwMath::LinearCombinationKernel const*>(&kernel),
192  convolutionControl);
193  }
194 
195  // use brute force
196  pexLog::TTrace<3>("lsst.afw.math.convolve",
197  "generic basicConvolve (GPU): dispatch to convolveSpatiallyInvariantGPU");
198  return mathDetail::convolveSpatiallyInvariantGPU(convolvedImage, inImage, kernel, convolutionControl);
199 }
200 
225 template <typename OutPixelT, typename InPixelT>
229  afwMath::LinearCombinationKernel const& kernel,
230  afwMath::ConvolutionControl const & convolutionControl)
231 {
232  if (!afwGpu::isGpuBuild()) {
233  throw LSST_EXCEPT(afwGpu::GpuRuntimeError, "Afw not compiled with GPU support");
234  }
235  typedef typename afwMath::Kernel::Pixel KernelPixel;
236  typedef afwImage::Image<KernelPixel> KernelImage;
237  typedef gpuDetail::GpuBuffer2D<KernelPixel> KernelBuffer;
238 
239  if (!kernel.isSpatiallyVarying()) {
240  // use the standard algorithm for the spatially invariant case
241  pexLog::TTrace<3>("lsst.afw.math.convolve",
242  "convolveLinearCombinationGPU: spatially invariant; will delegate");
243  return mathDetail::convolveSpatiallyInvariantGPU(convolvedImage, inImage, kernel,
244  convolutionControl.getDoNormalize());
245  } else {
246  bool throwExceptionsOn=convolutionControl.getDevicePreference() == afwGpu::USE_GPU;
247  if (afwGpu::detail::TryToSelectCudaDevice(!throwExceptionsOn) == false){
249  }
250 
251  // refactor the kernel if this is reasonable and possible;
252  // then use the standard algorithm for the spatially varying case
253  afwMath::Kernel::Ptr refKernelPtr; // possibly refactored version of kernel
254  if (static_cast<int>(kernel.getNKernelParameters()) > kernel.getNSpatialParameters()) {
255  // refactoring will speed convolution, so try it
256  refKernelPtr = kernel.refactor();
257  if (!refKernelPtr) {
258  refKernelPtr = kernel.clone();
259  }
260  } else {
261  // too few basis kernels for refactoring to be worthwhile
262  refKernelPtr = kernel.clone();
263  }
264  assertDimensionsOK(convolvedImage, inImage, kernel);
265 
266  const afwMath::LinearCombinationKernel* newKernel =
267  dynamic_cast<afwMath::LinearCombinationKernel*> (refKernelPtr.get());
268  assert(newKernel!=NULL);
269 
270  const int kernelN = newKernel->getNBasisKernels();
271  const std::vector< afwMath::Kernel::SpatialFunctionPtr > sFn = newKernel->getSpatialFunctionList();
272  if (sFn.size() < 1) {
274  }
275  if (int(sFn.size()) != kernelN) {
277  }
278  bool isAllCheby = true;
279  for (int i = 0; i < kernelN; i++) {
281  isAllCheby = false;
282  }
283  }
284  bool isAllPoly = true;
285  for (int i = 0; i < kernelN; i++) {
287  isAllPoly = false;
288  }
289  }
290 
291  int order = 0;
292 #ifdef GPU_BUILD
293  SpatialFunctionType_t sfType;
294 #endif
295  if (isAllPoly) {
296  order = dynamic_cast<const afwMath::PolynomialFunction2<double>*>( sFn[0].get() ) ->getOrder();
297 #ifdef GPU_BUILD
298  sfType = sftPolynomial;
299 #endif
300  } else if(isAllCheby) {
301  order = dynamic_cast<const afwMath::Chebyshev1Function2<double>*>( sFn[0].get() ) ->getOrder();
302 #ifdef GPU_BUILD
303  sfType = sftChebyshev;
304 #endif
305  } else
307 
308  //get copies of basis kernels
309  const afwMath::KernelList kernelList = newKernel->getKernelList();
310 
311  //if kernel is too small, call CPU convolution
312  const int minKernelSize = 25;
313  if (newKernel->getWidth() * newKernel->getHeight() < minKernelSize &&
314  convolutionControl.getDevicePreference() != lsst::afw::gpu::USE_GPU) {
316  }
317 
318  //if something is wrong, call CPU convolution
320  newKernel->getWidth(), newKernel->getHeight(), sizeof(double));
321  const bool shMemOkB = IsSufficientSharedMemoryAvailable_ForSfn(order, kernelN);
322  if (!shMemOkA || !shMemOkB) {
323  //cannot fit kernels into shared memory, revert to convolution by CPU
325  }
326 
327  if (kernelN == 0 || kernelN > detail::gpu::maxGpuSfCount) {
329  }
330  for (int i = 0; i < kernelN; i++) {
331  if (kernelList[i]->getDimensions() != newKernel->getDimensions()
332  || kernelList[i]->getCtr() != newKernel->getCtr()
333  ) {
335  }
336  }
337  pexLog::TTrace<3>("lsst.afw.math.convolve",
338  "MaskedImage, convolveLinearCombinationGPU: will use GPU acceleration");
339 
340  std::vector< KernelBuffer > basisKernels(kernelN);
341  for (int i = 0; i < kernelN; i++) {
342  KernelImage kernelImage(kernelList[i]->getDimensions());
343  (void)kernelList[i]->computeImage(kernelImage, false);
344  basisKernels[i].Init(kernelImage);
345  }
346 
347  int const inImageWidth = inImage.getWidth();
348  int const inImageHeight = inImage.getHeight();
349  int const cnvWidth = inImageWidth + 1 - newKernel->getWidth();
350  int const cnvHeight = inImageHeight + 1 - newKernel->getHeight();
351  int const cnvStartX = newKernel->getCtrX();
352  int const cnvStartY = newKernel->getCtrY();
353 
354  std::vector<double> colPos(cnvWidth);
355  std::vector<double> rowPos(cnvHeight);
356 
357  for (int i = 0; i < cnvWidth; i++) {
358  colPos[i] = inImage.indexToPosition(i + cnvStartX, afwImage::X);
359  }
360  for (int i = 0; i < cnvHeight; i++) {
361  rowPos[i] = inImage.indexToPosition(i + cnvStartY, afwImage::Y);
362  }
366 
367  CopyFromMaskedImage(inImage, inBufImg, inBufVar, inBufMsk);
368 
369  gpuDetail::GpuBuffer2D<OutPixelT> outBufImg(cnvWidth, cnvHeight);
370  gpuDetail::GpuBuffer2D<VarPixel> outBufVar(cnvWidth, cnvHeight);
371  gpuDetail::GpuBuffer2D<MskPixel> outBufMsk(cnvWidth, cnvHeight);
372 
373  pexLog::TTrace<3>("lsst.afw.math.convolve",
374  "MaskedImage, convolveLinearCombinationGPU: will use GPU acceleration");
375 
376 #ifdef GPU_BUILD
377  GPU_ConvolutionMI_LinearCombinationKernel<OutPixelT, InPixelT>(
378  inBufImg, inBufVar, inBufMsk,
379  colPos, rowPos,
380  sFn,
381  outBufImg, outBufVar, outBufMsk,
382  basisKernels,
383  sfType,
384  convolutionControl.getDoNormalize()
385  );
386 #endif
387 
388  CopyToImage(convolvedImage, cnvStartX, cnvStartY,
389  outBufImg, outBufVar, outBufMsk);
390  }
392 }
393 
417 template <typename OutPixelT, typename InPixelT>
419  afwImage::Image<OutPixelT>& convolvedImage,
420  afwImage::Image<InPixelT > const& inImage,
421  afwMath::LinearCombinationKernel const& kernel,
422  afwMath::ConvolutionControl const & convolutionControl)
423 {
424  if (!afwGpu::isGpuBuild()) {
425  throw LSST_EXCEPT(afwGpu::GpuRuntimeError, "Afw not compiled with GPU support");
426  }
427  typedef typename afwMath::Kernel::Pixel KernelPixel;
428  typedef afwImage::Image<KernelPixel> KernelImage;
429  typedef gpuDetail::GpuBuffer2D<KernelPixel> KernelBuffer;
430 
431  if (!kernel.isSpatiallyVarying()) {
432  // use the standard algorithm for the spatially invariant case
433  pexLog::TTrace<3>("lsst.afw.math.convolve",
434  "convolveLinearCombinationGPU: spatially invariant; delegate");
435  return mathDetail::convolveSpatiallyInvariantGPU(convolvedImage, inImage, kernel,
436  convolutionControl.getDoNormalize());
437  } else {
438  bool throwExceptionsOn=convolutionControl.getDevicePreference() == afwGpu::USE_GPU;
439  if (afwGpu::detail::TryToSelectCudaDevice(!throwExceptionsOn) == false){
441  }
442 
443  // refactor the kernel if this is reasonable and possible;
444  // then use the standard algorithm for the spatially varying case
445  afwMath::Kernel::Ptr refKernelPtr; // possibly refactored version of kernel
446  if (static_cast<int>(kernel.getNKernelParameters()) > kernel.getNSpatialParameters()) {
447  // refactoring will speed convolution, so try it
448  refKernelPtr = kernel.refactor();
449  if (!refKernelPtr) {
450  refKernelPtr = kernel.clone();
451  }
452  } else {
453  // too few basis kernels for refactoring to be worthwhile
454  refKernelPtr = kernel.clone();
455  }
456 
457  {
458  assertDimensionsOK(convolvedImage, inImage, kernel);
459 
460  const afwMath::LinearCombinationKernel* newKernel =
461  dynamic_cast<afwMath::LinearCombinationKernel*> (refKernelPtr.get());
462  assert(newKernel!=NULL);
463 
464  const int kernelN = newKernel->getNBasisKernels();
465  const std::vector< afwMath::Kernel::SpatialFunctionPtr > sFn = newKernel->getSpatialFunctionList();
466  if (sFn.size() < 1) {
468  }
469  if (int(sFn.size()) != kernelN) {
471  }
472 
473  bool isAllCheby = true;
474  for (int i = 0; i < kernelN; i++) {
476  isAllCheby = false;
477  }
478  }
479  bool isAllPoly = true;
480  for (int i = 0; i < kernelN; i++) {
482  isAllPoly = false;
483  }
484  }
485  if (!isAllPoly && !isAllCheby) {
487  }
488 
489  int order = 0;
490 #ifdef GPU_BUILD
491  SpatialFunctionType_t sfType;
492 #endif
493  if (isAllPoly) {
494  order = dynamic_cast<const afwMath::PolynomialFunction2<double>*>( sFn[0].get() ) ->getOrder();
495 #ifdef GPU_BUILD
496  sfType = sftPolynomial;
497 #endif
498  } else if(isAllCheby) {
499  order = dynamic_cast<const afwMath::Chebyshev1Function2<double>*>( sFn[0].get() ) ->getOrder();
500 #ifdef GPU_BUILD
501  sfType = sftChebyshev;
502 #endif
503  } else {
505  }
506  //get copies of basis kernels
507  const afwMath::KernelList kernelList = newKernel->getKernelList();
508 
509  //if kernel is too small, call CPU convolution
510  const int minKernelSize = 20;
511  if (newKernel->getWidth() * newKernel->getHeight() < minKernelSize &&
512  convolutionControl.getDevicePreference() != lsst::afw::gpu::USE_GPU) {
514  }
515 
516  //if something is wrong, call CPU convolution
517  const bool shMemOkA = IsSufficientSharedMemoryAvailable_ForImgBlock(
518  newKernel->getWidth(), newKernel->getHeight(), sizeof(double));
519  const bool shMemOkB = IsSufficientSharedMemoryAvailable_ForSfn(order, kernelN);
520  if (!shMemOkA || !shMemOkB) {
521  //cannot fit kernels into shared memory, revert to convolution by CPU
523  }
524 
525  if (kernelN == 0) {
527  }
528 
529  for (int i = 0; i < kernelN; i++) {
530  if (kernelList[i]->getDimensions() != newKernel->getDimensions()
531  || kernelList[i]->getCtr() != newKernel->getCtr()
532  ) {
534  }
535  }
536 
537  std::vector< KernelBuffer > basisKernels(kernelN);
538  for (int i = 0; i < kernelN; i++) {
539  KernelImage kernelImage(kernelList[i]->getDimensions());
540  (void)kernelList[i]->computeImage(kernelImage, false);
541  basisKernels[i].Init(kernelImage);
542  }
543 
544  int const inImageWidth = inImage.getWidth();
545  int const inImageHeight = inImage.getHeight();
546  int const cnvWidth = inImageWidth + 1 - newKernel->getWidth();
547  int const cnvHeight = inImageHeight + 1 - newKernel->getHeight();
548  int const cnvStartX = newKernel->getCtrX();
549  int const cnvStartY = newKernel->getCtrY();
550 
551  std::vector<double> colPos(cnvWidth);
552  std::vector<double> rowPos(cnvHeight);
553 
554  for (int i = 0; i < cnvWidth; i++) {
555  colPos[i] = inImage.indexToPosition(i + cnvStartX, afwImage::X);
556  }
557  for (int i = 0; i < cnvHeight; i++) {
558  rowPos[i] = inImage.indexToPosition(i + cnvStartY, afwImage::Y);
559  }
560  gpuDetail::GpuBuffer2D<InPixelT> inBuf(inImage);
561  gpuDetail::GpuBuffer2D<OutPixelT> outBuf(cnvWidth, cnvHeight);
562 
563  pexLog::TTrace<3>("lsst.afw.math.convolve",
564  "plain Image, convolveLinearCombinationGPU: will use GPU acceleration");
565 
566 #ifdef GPU_BUILD
567  GPU_ConvolutionImage_LinearCombinationKernel<OutPixelT, InPixelT>(
568  inBuf, colPos, rowPos,
569  sFn,
570  outBuf,
571  basisKernels,
572  sfType,
573  convolutionControl.getDoNormalize()
574  );
575 #endif
576 
577  outBuf.CopyToImage(convolvedImage, cnvStartX, cnvStartY);
578  }
579  }
581 }
582 
613 template <typename OutPixelT, typename InPixelT>
615  afwImage::Image<OutPixelT>& convolvedImage,
616  afwImage::Image<InPixelT > const& inImage,
617  afwMath::Kernel const& kernel,
618  afwMath::ConvolutionControl const & convolutionControl)
619 {
620  if (!afwGpu::isGpuBuild()) {
621  throw LSST_EXCEPT(afwGpu::GpuRuntimeError, "Afw not compiled with GPU support");
622  }
623  const bool doNormalize = convolutionControl.getDoNormalize();
624 
625  const bool throwExceptionsOn=convolutionControl.getDevicePreference() == afwGpu::USE_GPU;
626  if (afwGpu::detail::TryToSelectCudaDevice(!throwExceptionsOn) == false){
628  }
629 
630  typedef typename afwMath::Kernel::Pixel KernelPixel;
631  typedef afwImage::Image<KernelPixel> KernelImage;
632  typedef typename KernelImage::const_x_iterator KernelXIterator;
633  typedef typename KernelImage::const_xy_locator KernelXYLocator;
634 
635  if (kernel.isSpatiallyVarying()) {
637  }
638 
639  assertDimensionsOK(convolvedImage, inImage, kernel);
640 
641  const int minKernelSize = 25;
642 
643  int const inImageWidth = inImage.getWidth();
644  int const inImageHeight = inImage.getHeight();
645  int const kWidth = kernel.getWidth();
646  int const kHeight = kernel.getHeight();
647  int const cnvWidth = inImageWidth + 1 - kernel.getWidth();
648  int const cnvHeight = inImageHeight + 1 - kernel.getHeight();
649  int const cnvStartX = kernel.getCtrX();
650  int const cnvStartY = kernel.getCtrY();
651 
652  KernelImage kernelImage(kernel.getDimensions());
653 
654  pexLog::TTrace<3>("lsst.afw.math.convolve",
655  "convolveSpatiallyInvariantGPU: using GPU acceleration, "
656  "plain Image, kernel is spatially invariant");
657  (void)kernel.computeImage(kernelImage, doNormalize);
658 
659  typedef afwImage::Image<InPixelT > InImageT;
660  typedef afwImage::Image<OutPixelT > OutImageT;
661 
662  const bool shMemOk = IsSufficientSharedMemoryAvailable_ForImgBlock(kWidth, kHeight, sizeof(double));
663  if (!shMemOk) {
664  //cannot fit kernels into shared memory, revert to convolution by CPU
666  }
667  //if kernel is too small, call CPU convolution
668  if (kWidth * kHeight < minKernelSize &&
669  convolutionControl.getDevicePreference() != lsst::afw::gpu::USE_GPU) {
671  }
672 
673  gpuDetail::GpuBuffer2D<InPixelT> inBuf(inImage);
674  gpuDetail::GpuBuffer2D<OutPixelT> outBuf(cnvWidth, cnvHeight);
675  gpuDetail::GpuBuffer2D<KernelPixel> kernelBuf(kernelImage);
676 
677 #ifdef GPU_BUILD
678  GPU_ConvolutionImage_SpatiallyInvariantKernel<OutPixelT, InPixelT>(inBuf, outBuf, kernelBuf);
679 #endif
680  outBuf.CopyToImage(convolvedImage, cnvStartX, cnvStartY);
682 }
683 
714 template <typename OutPixelT, typename InPixelT>
718  afwMath::Kernel const& kernel,
719  afwMath::ConvolutionControl const & convolutionControl)
720 {
721  if (!afwGpu::isGpuBuild()) {
722  throw LSST_EXCEPT(afwGpu::GpuRuntimeError, "Afw not compiled with GPU support");
723  }
724  bool doNormalize = convolutionControl.getDoNormalize();
725 
726  typedef afwImage::MaskedImage<InPixelT > InImageT;
727  typedef afwImage::MaskedImage<OutPixelT > OutImageT;
728  typedef typename afwMath::Kernel::Pixel KernelPixel;
729  typedef afwImage::Image<KernelPixel> KernelImage;
730  typedef typename KernelImage::const_x_iterator KernelXIterator;
731  typedef typename KernelImage::const_xy_locator KernelXYLocator;
732 
733  if (kernel.isSpatiallyVarying()) {
735  }
736 
737  assertDimensionsOK(convolvedImage, inImage, kernel);
738 
739  const int minKernelSize = 20;
740 
741  int const inImageWidth = inImage.getWidth();
742  int const inImageHeight = inImage.getHeight();
743  int const kWidth = kernel.getWidth();
744  int const kHeight = kernel.getHeight();
745  int const cnvWidth = inImageWidth + 1 - kernel.getWidth();
746  int const cnvHeight = inImageHeight + 1 - kernel.getHeight();
747  int const cnvStartX = kernel.getCtrX();
748  int const cnvStartY = kernel.getCtrY();
749 
750  const bool throwExceptionsOn=convolutionControl.getDevicePreference() == afwGpu::USE_GPU;
751  if (afwGpu::detail::TryToSelectCudaDevice(!throwExceptionsOn) == false){
753  }
754 
755  const bool shMemOk = IsSufficientSharedMemoryAvailable_ForImgAndMaskBlock(kWidth, kHeight, sizeof(double));
756  if (!shMemOk) {
757  //cannot fit kernels into shared memory, revert to convolution by CPU
759  }
760 
761  //if kernel is too small, call CPU convolution
762  if (kWidth * kHeight < minKernelSize
763  && convolutionControl.getDevicePreference() != lsst::afw::gpu::USE_GPU) {
765  }
766 
767  KernelImage kernelImage(kernel.getDimensions());
768 
769  pexLog::TTrace<3>("lsst.afw.math.convolve",
770  "convolveSpatiallyInvariantGPU: using GPU acceleration, "
771  "MaskedImage, kernel is spatially invariant");
772  (void)kernel.computeImage(kernelImage, doNormalize);
773 
777  CopyFromMaskedImage(inImage, inBufImg, inBufVar, inBufMsk);
778 
779  gpuDetail::GpuBuffer2D<OutPixelT> outBufImg(cnvWidth, cnvHeight);
780  gpuDetail::GpuBuffer2D<VarPixel> outBufVar(cnvWidth, cnvHeight);
781  gpuDetail::GpuBuffer2D<MskPixel> outBufMsk(cnvWidth, cnvHeight);
782 
783  gpuDetail::GpuBuffer2D<KernelPixel> kernelBuf(kernelImage);
784 #ifdef GPU_BUILD
785  GPU_ConvolutionMI_SpatiallyInvariantKernel<OutPixelT, InPixelT>(
786  inBufImg, inBufVar, inBufMsk,
787  outBufImg, outBufVar, outBufMsk,
788  kernelBuf
789  );
790 #endif
791  CopyToImage(convolvedImage, cnvStartX, cnvStartY,
792  outBufImg, outBufVar, outBufMsk);
794 }
795 
796 /*
797  * Explicit instantiation
798  */
800 #define IMAGE(PIXTYPE) afwImage::Image<PIXTYPE>
801 #define MASKEDIMAGE(PIXTYPE) afwImage::MaskedImage<PIXTYPE, afwImage::MaskPixel, afwImage::VariancePixel>
802 #define NL /* */
803 // Instantiate Image or MaskedImage versions
804 #define INSTANTIATE_IM_OR_MI(IMGMACRO, OUTPIXTYPE, INPIXTYPE) \
805  template mathDetail::ConvolveGpuStatus::ReturnCode mathDetail::basicConvolveGPU( \
806  IMGMACRO(OUTPIXTYPE)&, IMGMACRO(INPIXTYPE) const&, afwMath::Kernel const&, \
807  afwMath::ConvolutionControl const&); NL \
808  template mathDetail::ConvolveGpuStatus::ReturnCode mathDetail::convolveLinearCombinationGPU( \
809  IMGMACRO(OUTPIXTYPE)&, IMGMACRO(INPIXTYPE) const&, afwMath::LinearCombinationKernel const&, \
810  afwMath::ConvolutionControl const&); NL \
811  template mathDetail::ConvolveGpuStatus::ReturnCode mathDetail::convolveSpatiallyInvariantGPU( \
812  IMGMACRO(OUTPIXTYPE)&, IMGMACRO(INPIXTYPE) const&, afwMath::Kernel const&, \
813  afwMath::ConvolutionControl const&);
814 // Instantiate both Image and MaskedImage versions
815 #define INSTANTIATE(OUTPIXTYPE, INPIXTYPE) \
816  INSTANTIATE_IM_OR_MI(IMAGE, OUTPIXTYPE, INPIXTYPE) \
817  INSTANTIATE_IM_OR_MI(MASKEDIMAGE, OUTPIXTYPE, INPIXTYPE)
818 
819 INSTANTIATE(double, double)
820 INSTANTIATE(double, float)
821 INSTANTIATE(double, int)
822 INSTANTIATE(double, boost::uint16_t)
823 INSTANTIATE(float, float)
824 INSTANTIATE(float, int)
825 INSTANTIATE(float, boost::uint16_t)
826 INSTANTIATE(int, int)
827 INSTANTIATE(boost::uint16_t, boost::uint16_t)
829 
830 
2-dimensional weighted sum of Chebyshev polynomials of the first kind.
Convolution support.
An include file to include the header files for lsst::afw::geom.
Declare the Kernel class and subclasses.
Class for representing an image or 2D array in general)
Definition: GpuBuffer2D.h:54
ConvolveGpuStatus::ReturnCode convolveLinearCombinationGPU(lsst::afw::image::MaskedImage< OutPixelT, lsst::afw::image::MaskPixel, lsst::afw::image::VariancePixel > &convolvedImage, lsst::afw::image::MaskedImage< InPixelT, lsst::afw::image::MaskPixel, lsst::afw::image::VariancePixel > const &inImage, lsst::afw::math::LinearCombinationKernel const &kernel, lsst::afw::math::ConvolutionControl const &convolutionControl)
geom::Extent2I const getDimensions() const
Return the Kernel&#39;s dimensions (width, height)
Definition: Kernel.h:226
std::vector< SpatialFunctionPtr > getSpatialFunctionList() const
Return a list of clones of the spatial functions.
Definition: Kernel.cc:186
boost::shared_ptr< Kernel > refactor() const
Refactor the kernel as a linear combination of N bases where N is the number of parameters for the sp...
bool IsSufficientSharedMemoryAvailable_ForImgBlock(int filterW, int filterH, int pixSize)
Include files required for standard LSST Exception handling.
additional GPU exceptions
int getHeight() const
Return the number of rows in the image.
Definition: MaskedImage.h:903
Parameters to control convolution.
Definition: ConvolveImage.h:58
int getCtrY() const
Return y index of kernel&#39;s center.
Definition: Kernel.h:272
2-dimensional polynomial function with cross terms
#define INSTANTIATE(MATCH)
definition of the Trace messaging facilities
A kernel described by a pair of functions: func(x, y) = colFunc(x) * rowFunc(y)
Definition: Kernel.h:986
int getNSpatialParameters() const
Return the number of spatial parameters (0 if not spatially varying)
Definition: Kernel.h:296
double indexToPosition(double ind, lsst::afw::image::xOrY const xy) const
Convert image index to image position.
Definition: Image.h:290
Define a collection of useful Functions.
bool TryToSelectCudaDevice(bool noExceptions, bool reselect=false)
CPU and GPU convolution shared code.
int getCtrX() const
Return x index of kernel&#39;s center.
Definition: Kernel.h:263
ConvolveGpuStatus::ReturnCode convolveSpatiallyInvariantGPU(lsst::afw::image::MaskedImage< OutPixelT, lsst::afw::image::MaskPixel, lsst::afw::image::VariancePixel > &convolvedImage, lsst::afw::image::MaskedImage< InPixelT, lsst::afw::image::MaskPixel, lsst::afw::image::VariancePixel > const &inImage, lsst::afw::math::Kernel const &kernel, lsst::afw::math::ConvolutionControl const &convolutionControl)
contains GpuBuffer2D class (for simple handling of images or 2D arrays)
int getHeight() const
Return the Kernel&#39;s height.
Definition: Kernel.h:247
table::Key< table::Array< Kernel::Pixel > > image
Definition: FixedKernel.cc:117
int getWidth() const
Return the Kernel&#39;s width.
Definition: Kernel.h:240
virtual boost::shared_ptr< Kernel > clone() const
Return a pointer to a deep copy of this kernel.
double indexToPosition(double ind, lsst::afw::image::xOrY const xy) const
Convert image index to image position (see Image::indexToPosition)
Definition: MaskedImage.h:971
x_iterator x_at(int x, int y) const
Return an x_iterator at the point (x, y)
Definition: MaskedImage.h:1006
boost::shared_ptr< Kernel > Ptr
Definition: Kernel.h:141
GPU convolution code.
Set up for convolution, calls GPU convolution kernels.
void ImageT ImageT int float saturatedPixelValue int const width
Definition: saturated.cc:44
int getNBasisKernels() const
Get the number of basis kernels.
Definition: Kernel.h:875
unsigned int getNKernelParameters() const
Return the number of kernel parameters (0 if none)
Definition: Kernel.h:289
virtual KernelList const & getKernelList() const
Get the fixed basis kernels.
double computeImage(lsst::afw::image::Image< Pixel > &image, bool doNormalize, double x=0.0, double y=0.0) const
Compute an image (pixellized representation of the kernel) in place.
Definition: Kernel.cc:94
A kernel that is a linear combination of fixed basis kernels.
Definition: Kernel.h:814
A class to manipulate images, masks, and variance as a single object.
Definition: MaskedImage.h:77
bool isGpuBuild()
Inline function which returns true only when GPU_BUILD macro is defined.
Definition: IsGpuBuild.h:45
int getHeight() const
Return the number of rows in the image.
Definition: Image.h:239
bool IsSufficientSharedMemoryAvailable_ForSfn(int order, int kernelN)
lsst::afw::gpu::DevicePreference getDevicePreference() const
Definition: ConvolveImage.h:79
lsst::afw::geom::Point2I getCtr() const
Return index of kernel&#39;s center.
Definition: Kernel.h:254
void ImageT ImageT int float saturatedPixelValue int const height
Definition: saturated.cc:44
Functions to help managing setup for GPU kernels.
Convolve and convolveAtAPoint functions for Image and Kernel.
#define LSST_EXCEPT(type,...)
Definition: Exception.h:46
void assertDimensionsOK(OutImageT const &convolvedImage, InImageT const &inImage, lsst::afw::math::Kernel const &kernel)
void Init(const ImageT &image)
Definition: GpuBuffer2D.h:71
int getWidth() const
Return the number of columns in the image.
Definition: MaskedImage.h:901
lsst::afw::image::VariancePixel VarPixel
Definition: convCUDA.h:44
lsst::afw::image::MaskPixel MskPixel
Definition: convCUDA.h:45
Implementation of the Class MaskedImage.
Convolution support.
int getWidth() const
Return the number of columns in the image.
Definition: Image.h:237
Kernels are used for convolution with MaskedImages and (eventually) Images.
Definition: Kernel.h:134
A class to represent a 2-dimensional array of pixels.
Definition: Image.h:415
#define IS_INSTANCE(A, B)
Definition: Convolve.h:48
A kernel that has only one non-zero pixel (of value 1)
Definition: Kernel.h:744
bool IsSufficientSharedMemoryAvailable_ForImgAndMaskBlock(int filterW, int filterH, int pixSize)
std::vector< boost::shared_ptr< Kernel > > KernelList
Definition: Kernel.h:542
A function to determine whether compiling for GPU is enabled.
ConvolveGpuStatus::ReturnCode basicConvolveGPU(OutImageT &convolvedImage, InImageT const &inImage, lsst::afw::math::Kernel const &kernel, lsst::afw::math::ConvolutionControl const &convolutionControl)
bool isSpatiallyVarying() const
Return true iff the kernel is spatially varying (has a spatial function)
Definition: Kernel.h:402