LSSTApplications  10.0-2-g4f67435,11.0.rc2+1,11.0.rc2+12,11.0.rc2+3,11.0.rc2+4,11.0.rc2+5,11.0.rc2+6,11.0.rc2+7,11.0.rc2+8
LSSTDataManagementBasePackage
CudaSelectGpu.cc
Go to the documentation of this file.
1 // -*- LSST-C++ -*-
2 
3 /*
4  * LSST Data Management System
5  * Copyright 2008 - 2012 LSST Corporation.
6  *
7  * This product includes software developed by the
8  * LSST Project (http://www.lsst.org/).
9  *
10  * This program is free software: you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation, either version 3 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the LSST License Statement and
21  * the GNU General Public License along with this program. If not,
22  * see <http://www.lsstcorp.org/LegalNotices/>.
23  */
24 
35 #include "lsst/pex/exceptions.h"
39 
40 using namespace lsst::afw::gpu;
41 
42 #ifndef GPU_BUILD //if no GPU support, throw exceptions
43 
44 namespace lsst {
45 namespace afw {
46 namespace gpu {
47 namespace detail {
48 
49 
50 void SetCudaDevice(int devId) {
51  throw LSST_EXCEPT(GpuRuntimeError, "AFW not built with GPU support");
52 }
53 
55  throw LSST_EXCEPT(GpuRuntimeError, "AFW not built with GPU support");
56 }
57 
59  throw LSST_EXCEPT(GpuRuntimeError, "AFW not built with GPU support");
60 }
61 
63 {
64  throw LSST_EXCEPT(GpuRuntimeError, "AFW not built with gpu support");
65 }
67 {
68  throw LSST_EXCEPT(GpuRuntimeError, "AFW not built with gpu support");
69 }
71 {
72  throw LSST_EXCEPT(GpuRuntimeError, "AFW not built with gpu support");
73 }
74 bool TryToSelectCudaDevice(bool noExceptions, bool reselect)
75 {
76  throw LSST_EXCEPT(GpuRuntimeError, "AFW not built with gpu support");
77 }
79 {
80  throw LSST_EXCEPT(GpuRuntimeError, "AFW not built with gpu support");
81 }
82 
83 }
84 }
85 }
86 }
87 
88 #else
89 
90 #include <cuda.h>
91 #include <cuda_runtime.h>
92 #include <stdio.h>
93 #include <memory.h>
94 
95 
96 
97 namespace lsst {
98 namespace afw {
99 namespace gpu {
100 namespace detail {
101 
102 //from CudaQueryDevice.cc
103 void PrintCudaErrorInfo(cudaError_t cudaError, const char* errorStr);
104 
106 {
107  const char *devStr = getenv("CUDA_DEVICE");
108  if (devStr == NULL) return -2;
109  else return atoi(devStr);
110 }
111 
113 {
114  int devId = GetPreferredCudaDevice();
115 
116  //printf("DEVICE ID %d\n", devId);
117 
118  if (devId >= 0) {
119  cudaError_t err = cudaSetDevice(devId);
120  if (err != cudaSuccess) {
121  cudaGetLastError(); //clear error code
122  char errorStr[1000];
123  sprintf(errorStr, "Error selecting device %d:\n %s\n", devId, cudaGetErrorString(err));
124  throw LSST_EXCEPT(GpuRuntimeError, errorStr);
125  }
126  return true;
127  }
128 
129  if (devId != -2) return true;
130 
131  return false;
132 }
133 
134 cudaDeviceProp GetDesiredDeviceProperties()
135 {
136  cudaDeviceProp prop;
137  memset(&prop, 1, sizeof(prop));
138 
139  //min sm 1.3
140  prop.major = 1;
141  prop.minor = 3;
142 
143  prop.maxGridSize[0] = 128;
144  prop.maxThreadsDim[0] = 256;
145 
146  prop.multiProcessorCount = 2;
147  prop.clockRate = 700.0 * 1000 ; // 700 MHz
148  prop.warpSize = 32 ;
149  prop.sharedMemPerBlock = 32 * (1 << 10); //32 KiB
150  prop.regsPerBlock = 256 * 60 ;
151  prop.maxThreadsPerBlock = 256;
152  prop.totalGlobalMem = 500 * 1024 * 1024;
153 
154  return prop;
155 }
156 
158 {
159  int cudaDevicesN = 0;
160  cudaGetDeviceCount(&cudaDevicesN);
161  if (cudaDevicesN == 0) {
162  throw LSST_EXCEPT(GpuRuntimeError, "No CUDA capable GPUs found");
163  }
164 
165  cudaDeviceProp prop = GetDesiredDeviceProperties();
166  char errorStr[1000];
167 
168  int devId;
169  cudaError_t cudaError = cudaChooseDevice(&devId, &prop);
170  //printf("Error device %d:\n %s\n", devId, cudaGetErrorString(err));
171  if (cudaError != cudaSuccess) {
172  throw LSST_EXCEPT(GpuRuntimeError, "Error choosing device automatically");
173  }
174  cudaError = cudaSetDevice(devId);
175  if (cudaError == cudaErrorSetOnActiveProcess) {
176  cudaGetLastError(); //clear error
177  cudaGetDevice(&devId);
178  } else if (cudaError != cudaSuccess) {
179  cudaGetLastError(); //clear error
180  sprintf(errorStr, "Error automatically selecting device %d:\n %s\n",
181  devId, cudaGetErrorString(cudaError));
182  throw LSST_EXCEPT(GpuRuntimeError, errorStr);
183  }
184 }
185 
186 void VerifyCudaDevice()
187 {
188  cudaDeviceProp prop = GetDesiredDeviceProperties();
189  char errorStr[1000];
190 
191  int devId;
192  cudaError_t cudaError = cudaGetDevice(&devId);
193  if (cudaError != cudaSuccess) {
194  throw LSST_EXCEPT(GpuRuntimeError, "Could not get selected CUDA device ID");
195  }
196  cudaDeviceProp deviceProp;
197  cudaError = cudaGetDeviceProperties(&deviceProp, devId);
198  if (cudaError != cudaSuccess) {
199  throw LSST_EXCEPT(GpuRuntimeError, "Could not get CUDA device properties");
200  }
201  if (deviceProp.major < prop.major ||
202  (deviceProp.major == prop.major && deviceProp.minor < prop.minor)
203  ) {
204  sprintf(errorStr, "Only SM %d.%d or better GPU devices are currently allowed", prop.major, prop.minor);
205  throw LSST_EXCEPT(GpuRuntimeError, errorStr );
206  }
207 
208  if (deviceProp.major == prop.major && deviceProp.minor < prop.minor) {
209  if (deviceProp.totalGlobalMem < prop.totalGlobalMem) {
210  throw LSST_EXCEPT(GpuRuntimeError, "Not enough global memory on GPU");
211  }
212  }
213  if (deviceProp.sharedMemPerBlock < 16 * 1000) {
214  throw LSST_EXCEPT(GpuRuntimeError, "Not enough shared memory on GPU");
215  }
216  if (deviceProp.regsPerBlock < prop.regsPerBlock) {
217  throw LSST_EXCEPT(GpuRuntimeError, "Not enough registers per block available on GPU");
218  }
219  if (deviceProp.maxThreadsPerBlock < prop.maxThreadsPerBlock) {
220  throw LSST_EXCEPT(GpuRuntimeError, "Not enough threads per block available on GPU");
221  }
222 }
223 
224 bool TryToSelectCudaDevice(bool noExceptions, bool reselect)
225 {
226 #if !defined(GPU_BUILD)
227  return false;
228 #else
229  static bool isDeviceSelected = false;
230  static bool isDeviceOk = false;
231 
232  if (reselect){
233  isDeviceSelected = false;
234  isDeviceOk = false;
235  }
236 
237  if (isDeviceSelected)
238  return isDeviceOk;
239  isDeviceSelected = true;
240 
241 
242  if (!noExceptions) {
243  bool done = SelectPreferredCudaDevice();
244  if (done) {
245  isDeviceOk = true;
246  return true;
247  }
248  } else {
249  try {
250  bool done = SelectPreferredCudaDevice();
251  if (done) {
252  isDeviceOk = true;
253  return true;
254  }
255  } catch(...) {
256  return false;
257  }
258  }
259 
260  if (!noExceptions) {
263  isDeviceOk = true;
264  return true;
265  }
266 
267  try {
270  } catch(...) {
271  return false;
272  }
273 
274  isDeviceOk = true;
275  return true;
276 #endif
277 }
278 
279 
280 void SetCudaDevice(int devId)
281 {
282  cudaError_t cudaError = cudaSetDevice(devId);
283  if (cudaError != cudaSuccess) PrintCudaErrorInfo(cudaError, "SetCudaDevice> unsucessfull");
284 }
285 
286 void CudaReserveDevice()
287 {
288  int* dataGpu;
289  cudaError_t cudaError = cudaMalloc((void**)&dataGpu, 256 * sizeof(int));
290  if (cudaError != cudaSuccess) {
291  PrintCudaErrorInfo(cudaError, "CudaReserveDevice> Could not reserve device by calling cudaMalloc");
292  }
293  cudaError = cudaFree(dataGpu);
294  if (cudaError != cudaSuccess) {
295  PrintCudaErrorInfo(cudaError, "CudaReserveDevice> Could not release memory by calling cudaFree");
296  }
297 }
298 
299 void CudaThreadExit()
300 {
301  cudaThreadExit();
302 }
303 
304 }
305 }
306 }
307 } // namespace lsst::afw::gpu::detail ends
308 
309 #endif
310 
311 
additional GPU exceptions
bool TryToSelectCudaDevice(bool noExceptions, bool reselect=false)
void SetCudaDevice(int devId)
selects a cuda device
bool SelectPreferredCudaDevice()
void CudaThreadExit()
frees resources and releases current cuda device
Functions to help managing setup for GPU kernels.
#define LSST_EXCEPT(type,...)
Definition: Exception.h:46
void CudaReserveDevice()
reserves cuda device
Functions to query the properties of currently selected GPU device.
Include files required for standard LSST Exception handling.