40 using namespace lsst::afw::gpu;
42 #ifndef GPU_BUILD //if no GPU support, throw exceptions
51 throw LSST_EXCEPT(GpuRuntimeError,
"AFW not built with GPU support");
55 throw LSST_EXCEPT(GpuRuntimeError,
"AFW not built with GPU support");
59 throw LSST_EXCEPT(GpuRuntimeError,
"AFW not built with GPU support");
64 throw LSST_EXCEPT(GpuRuntimeError,
"AFW not built with gpu support");
68 throw LSST_EXCEPT(GpuRuntimeError,
"AFW not built with gpu support");
72 throw LSST_EXCEPT(GpuRuntimeError,
"AFW not built with gpu support");
76 throw LSST_EXCEPT(GpuRuntimeError,
"AFW not built with gpu support");
80 throw LSST_EXCEPT(GpuRuntimeError,
"AFW not built with gpu support");
91 #include <cuda_runtime.h>
103 void PrintCudaErrorInfo(cudaError_t cudaError,
const char* errorStr);
107 const char *devStr = getenv(
"CUDA_DEVICE");
108 if (devStr == NULL)
return -2;
109 else return atoi(devStr);
119 cudaError_t err = cudaSetDevice(devId);
120 if (err != cudaSuccess) {
123 sprintf(errorStr,
"Error selecting device %d:\n %s\n", devId, cudaGetErrorString(err));
129 if (devId != -2)
return true;
134 cudaDeviceProp GetDesiredDeviceProperties()
137 memset(&prop, 1,
sizeof(prop));
143 prop.maxGridSize[0] = 128;
144 prop.maxThreadsDim[0] = 256;
146 prop.multiProcessorCount = 2;
147 prop.clockRate = 700.0 * 1000 ;
149 prop.sharedMemPerBlock = 32 * (1 << 10);
150 prop.regsPerBlock = 256 * 60 ;
151 prop.maxThreadsPerBlock = 256;
152 prop.totalGlobalMem = 500 * 1024 * 1024;
159 int cudaDevicesN = 0;
160 cudaGetDeviceCount(&cudaDevicesN);
161 if (cudaDevicesN == 0) {
162 throw LSST_EXCEPT(GpuRuntimeError,
"No CUDA capable GPUs found");
165 cudaDeviceProp prop = GetDesiredDeviceProperties();
169 cudaError_t cudaError = cudaChooseDevice(&devId, &prop);
171 if (cudaError != cudaSuccess) {
172 throw LSST_EXCEPT(GpuRuntimeError,
"Error choosing device automatically");
174 cudaError = cudaSetDevice(devId);
175 if (cudaError == cudaErrorSetOnActiveProcess) {
177 cudaGetDevice(&devId);
178 }
else if (cudaError != cudaSuccess) {
180 sprintf(errorStr,
"Error automatically selecting device %d:\n %s\n",
181 devId, cudaGetErrorString(cudaError));
188 cudaDeviceProp prop = GetDesiredDeviceProperties();
192 cudaError_t cudaError = cudaGetDevice(&devId);
193 if (cudaError != cudaSuccess) {
194 throw LSST_EXCEPT(GpuRuntimeError,
"Could not get selected CUDA device ID");
196 cudaDeviceProp deviceProp;
197 cudaError = cudaGetDeviceProperties(&deviceProp, devId);
198 if (cudaError != cudaSuccess) {
199 throw LSST_EXCEPT(GpuRuntimeError,
"Could not get CUDA device properties");
201 if (deviceProp.major < prop.major ||
202 (deviceProp.major == prop.major && deviceProp.minor < prop.minor)
204 sprintf(errorStr,
"Only SM %d.%d or better GPU devices are currently allowed", prop.major, prop.minor);
208 if (deviceProp.major == prop.major && deviceProp.minor < prop.minor) {
209 if (deviceProp.totalGlobalMem < prop.totalGlobalMem) {
210 throw LSST_EXCEPT(GpuRuntimeError,
"Not enough global memory on GPU");
213 if (deviceProp.sharedMemPerBlock < 16 * 1000) {
214 throw LSST_EXCEPT(GpuRuntimeError,
"Not enough shared memory on GPU");
216 if (deviceProp.regsPerBlock < prop.regsPerBlock) {
217 throw LSST_EXCEPT(GpuRuntimeError,
"Not enough registers per block available on GPU");
219 if (deviceProp.maxThreadsPerBlock < prop.maxThreadsPerBlock) {
220 throw LSST_EXCEPT(GpuRuntimeError,
"Not enough threads per block available on GPU");
226 #if !defined(GPU_BUILD)
229 static bool isDeviceSelected =
false;
230 static bool isDeviceOk =
false;
233 isDeviceSelected =
false;
237 if (isDeviceSelected)
239 isDeviceSelected =
true;
282 cudaError_t cudaError = cudaSetDevice(devId);
283 if (cudaError != cudaSuccess) PrintCudaErrorInfo(cudaError,
"SetCudaDevice> unsucessfull");
289 cudaError_t cudaError = cudaMalloc((
void**)&dataGpu, 256 *
sizeof(
int));
290 if (cudaError != cudaSuccess) {
291 PrintCudaErrorInfo(cudaError,
"CudaReserveDevice> Could not reserve device by calling cudaMalloc");
293 cudaError = cudaFree(dataGpu);
294 if (cudaError != cudaSuccess) {
295 PrintCudaErrorInfo(cudaError,
"CudaReserveDevice> Could not release memory by calling cudaFree");
additional GPU exceptions
bool TryToSelectCudaDevice(bool noExceptions, bool reselect=false)
int GetPrefferedCudaDevice()
int GetPreferredCudaDevice()
void SetCudaDevice(int devId)
selects a cuda device
void AutoSelectCudaDevice()
bool SelectPreferredCudaDevice()
void CudaThreadExit()
frees resources and releases current cuda device
Functions to help managing setup for GPU kernels.
#define LSST_EXCEPT(type,...)
void CudaReserveDevice()
reserves cuda device
Functions to query the properties of currently selected GPU device.
Include files required for standard LSST Exception handling.