41 using namespace lsst::afw::gpu;
43 #ifndef GPU_BUILD //if no GPU support, throw exceptions
53 printf(
"Afw not compiled with GPU support\n");
57 throw LSST_EXCEPT(GpuRuntimeError,
"AFW not built with GPU support");
61 throw LSST_EXCEPT(GpuRuntimeError,
"AFW not built with GPU support");
65 throw LSST_EXCEPT(GpuRuntimeError,
"AFW not built with GPU support");
69 throw LSST_EXCEPT(GpuRuntimeError,
"AFW not built with GPU support");
73 throw LSST_EXCEPT(GpuRuntimeError,
"AFW not built with GPU support");
77 throw LSST_EXCEPT(GpuRuntimeError,
"AFW not built with GPU support");
88 #include <cuda_runtime.h>
98 void PrintDeviceProperties(
int id, cudaDeviceProp deviceProp)
100 printf(
"Name : %s |", deviceProp.name );
101 printf(
" CUDA Capable SM %d.%d hardware, %d multiproc.\n", deviceProp.major, deviceProp.minor,
102 deviceProp.multiProcessorCount);
103 printf(
" Clock rate: %6.2f GHz \t", deviceProp.clockRate / (1000.0 * 1000));
104 printf(
" Memory on device: %6zu MiB\n", deviceProp.totalGlobalMem / (1 << 20) );
105 printf(
" Multiprocessors: %6d\n", deviceProp.multiProcessorCount);
106 printf(
" Warp size: %6d \t", deviceProp.warpSize );
107 printf(
" Shared memory:%6zu KiB\n", deviceProp.sharedMemPerBlock / (1 << 10) );
108 printf(
" Registers: %6d \t", deviceProp.regsPerBlock );
109 printf(
" Max threads: %6d \n", deviceProp.maxThreadsPerBlock );
111 printf(
" Compute mode (device sharing) : ");
112 if (deviceProp.computeMode == cudaComputeModeDefault) {
113 printf(
"Default - shared between threads\n" );
115 if (deviceProp.computeMode == cudaComputeModeExclusive) {
116 printf(
"Exclusive - only one thread at a time\n" );
118 if (deviceProp.computeMode == cudaComputeModeProhibited) {
119 printf(
"Prohibited - cannot use this device\n" );
122 printf(
" Timeout enabled: %3s ", deviceProp.kernelExecTimeoutEnabled == 1 ?
"Yes" :
"No" );
123 printf(
" Overlapped copying: %3s ", deviceProp.deviceOverlap == 1 ?
"Yes" :
"No" );
124 printf(
" Intergrated on MB: %3s\n", deviceProp.integrated == 1 ?
"Yes" :
"No" );
125 printf(
" Memory pitch: %12zu \t", deviceProp.memPitch );
126 printf(
" Constant memory: %6zu kiB \n", deviceProp.totalConstMem / (1 << 10) );
129 void PrintCudaErrorInfo(cudaError_t cudaError,
const char* errorStr)
131 printf(
"\nSupplied error string: %s\n", errorStr);
132 printf(
"CUDA error : %d\n", cudaError);
133 printf(
"CUDA error string : %s\n", cudaGetErrorString(cudaError));
141 cudaError_t cudaError;
144 cudaError = cudaDriverGetVersion(&driverVersion);
145 if (cudaError != cudaSuccess) PrintCudaErrorInfo(cudaError,
"Could not get CUDA driver version");
146 printf(
"Driver ver.: %d.%d ", driverVersion / 1000, driverVersion % 1000);
150 cudaError = cudaRuntimeGetVersion(&runtimeVersion);
151 if (cudaError != cudaSuccess) PrintCudaErrorInfo(cudaError,
"Could not get CUDA runtime version");
152 printf(
"Runtime ver.: %d.%d ", runtimeVersion / 1000, runtimeVersion % 1000);
157 int cudaDevicesN = 0;
158 cudaError = cudaGetDeviceCount(&cudaDevicesN);
159 if (cudaError != cudaSuccess) PrintCudaErrorInfo(cudaError,
"Could not get CUDA device count");
161 printf(
"Device count: %d ", cudaDevicesN);
163 if(cudaDevicesN < 1) {
164 printf(
"Your system does not have a CUDA capable device\n");
169 cudaError = cudaGetDevice(&curDevId);
170 if (cudaError != cudaSuccess) PrintCudaErrorInfo(cudaError,
"Could not get CUDA device id");
171 printf(
"Info for device %d\n", curDevId);
174 cudaDeviceProp deviceProp;
175 cudaError = cudaGetDeviceProperties(&deviceProp, curDevId);
176 if (cudaError != cudaSuccess) PrintCudaErrorInfo(cudaError,
"Could not get CUDA device properties");
177 PrintDeviceProperties(curDevId, deviceProp);
180 for (
int i = 0; i < 79; i++) {
190 cudaError_t cudaError = cudaGetDevice(&curDevId);
191 if (cudaError != cudaSuccess) PrintCudaErrorInfo(cudaError,
"GetCudaDeviceId> Could not get CUDA device id");
198 cudaDeviceProp deviceProp;
199 cudaError_t cudaError = cudaGetDeviceProperties(&deviceProp, curDevId);
200 if (cudaError != cudaSuccess) PrintCudaErrorInfo(cudaError,
"GetCudaSMSharedMemorySize> Could not get CUDA device properties");
202 return deviceProp.sharedMemPerBlock;
208 cudaDeviceProp deviceProp;
209 cudaError_t cudaError = cudaGetDeviceProperties(&deviceProp, curDevId);
210 if (cudaError != cudaSuccess) {
211 PrintCudaErrorInfo(cudaError,
"GetCudaCurGlobalMemorySize> Could not get CUDA device properties");
213 return deviceProp.totalGlobalMem;
219 cudaDeviceProp deviceProp;
220 cudaError_t cudaError = cudaGetDeviceProperties(&deviceProp, curDevId);
221 if (cudaError != cudaSuccess) {
222 PrintCudaErrorInfo(cudaError,
"GetCudaSMRegisterCount> Could not get CUDA device properties");
224 return deviceProp.regsPerBlock;
230 cudaDeviceProp deviceProp;
231 cudaError_t cudaError = cudaGetDeviceProperties(&deviceProp, curDevId);
232 if (cudaError != cudaSuccess) {
233 PrintCudaErrorInfo(cudaError,
"GetCudaSMCount> Could not get CUDA device properties");
235 return deviceProp.multiProcessorCount;
241 cudaDeviceProp deviceProp;
242 cudaError_t cudaError = cudaGetDeviceProperties(&deviceProp, curDevId);
243 if (cudaError != cudaSuccess) {
244 PrintCudaErrorInfo(cudaError,
"GetCudaIsDoublePrecisionSupported> Could not get CUDA device properties");
246 return deviceProp.major >= 2 || (deviceProp.major == 1 && deviceProp.minor >= 3);
int GetCudaCurSMRegisterCount()
returns the number of registers per block of currently selected cuda device
additional GPU exceptions
int GetCudaCurGlobalMemorySize()
returns global memory size of currently selected cuda device
void PrintCudaDeviceInfo()
prints some cuda device information to stdout
int GetCudaCurSMSharedMemorySize()
returns shared memory size per block of currently selected cuda device
bool GetCudaCurIsDoublePrecisionSupported()
returns whether currently selected cuda device supports double precision
int GetCudaCurSMCount()
returns the number of streaming multiprocessors of currently selected cuda device ...
#define LSST_EXCEPT(type,...)
int GetCudaCurDeviceId()
returns device ID of currently selected cuda device
Functions to query the properties of currently selected GPU device.
Include files required for standard LSST Exception handling.