OpenMPCD
Macros.hpp
Go to the documentation of this file.
1 /**
2  * @file
3  * Defines preprocessor macros to make better use of CUDA.
4  */
5 
6 #ifndef OPENMPCD_CUDA_MACROS_HPP
7 #define OPENMPCD_CUDA_MACROS_HPP
8 
10 
11 #ifdef __CUDACC__
12  /**
13  * Denotes a function to be callable both from the Host and from a CUDA Device.
14  */
15  #define OPENMPCD_CUDA_HOST_AND_DEVICE __host__ __device__
16 
17  /**
18  * Denotes a function to be callable from the Host.
19  */
20  #define OPENMPCD_CUDA_HOST __host__
21 
22  /**
23  * Denotes a function to be callable from a CUDA Device.
24  */
25  #define OPENMPCD_CUDA_DEVICE __device__
26 
27  /**
28  * Denotes a function to be a CUDA kernel.
29  */
30  #define OPENMPCD_CUDA_GLOBAL __global__
31 #else
32  #define OPENMPCD_CUDA_HOST_AND_DEVICE
33  #define OPENMPCD_CUDA_HOST
34  #define OPENMPCD_CUDA_DEVICE
35  #define OPENMPCD_CUDA_GLOBAL
36 #endif
37 
38 /**
39  * Helper to be used prior to a CUDA kernel call.
40  * This defines the variables gridSize and blockSize for use in the CUDA kernel call,
41  * which specify the number of blocks in the grid and the number of threads in a block,
42  * respectively.
43  * Also, this defines the workUnitOffset variable, which can be passed as an argument to
44  * the CUDA kernel. Since there may be so many work units that one grid cannot handle them
45  * all, the kernel call is looped over, and the workUnitOffset tells how many work units
46  * have been dispatched in previous kernel calls.
47  *
48  * @see OPENMPCD_CUDA_LAUNCH_WORKUNITS_END
49  * @param[in] numberOfWorkUnits_ The number of work units that need to be dispatched.
50  * @param[in] maxGridSize_ The maximum number of blocks to dispatch in a grid.
51  * @param[in] blockSize_ The number of threads to dispatch per block.
52  */
53 #define OPENMPCD_CUDA_LAUNCH_WORKUNITS_SIZES_BEGIN(numberOfWorkUnits_, maxGridSize_, blockSize_) \
54  { \
55  const unsigned int _numberOfWorkUnits = (numberOfWorkUnits_); \
56  static const unsigned int _maxGridSize = (maxGridSize_); \
57  static const unsigned int _blockSize = (blockSize_); \
58  for(unsigned int _workUnit = 0; _workUnit < _numberOfWorkUnits; _workUnit += _maxGridSize * _blockSize) \
59  { \
60  const unsigned int _requiredGridSize = (_numberOfWorkUnits - _workUnit) / _blockSize + 1; \
61  \
62  const unsigned int gridSize = _requiredGridSize > _maxGridSize ? _maxGridSize : _requiredGridSize; \
63  const unsigned int blockSize = _blockSize; \
64  const unsigned int workUnitOffset = _workUnit;
65 
66 /**
67  * Helper to be used after a CUDA kernel call.
68  * @see OPENMPCD_CUDA_LAUNCH_WORKUNITS_BEGIN
69  */
70 #define OPENMPCD_CUDA_LAUNCH_WORKUNITS_END \
71  } \
72  }
73 
74 /**
75  * Calls OPENMPCD_CUDA_LAUNCH_WORKUNITS_SIZES_BEGIN with default value for blockSize_.
76  *
77  * @see OPENMPCD_CUDA_LAUNCH_WORKUNITS_SIZES_BEGIN
78  * @param[in] numberOfWorkUnits_ The number of work units that need to be dispatched.
79  * @param[in] maxGridSize_ The maximum number of blocks to dispatch in a grid.
80  */
81 #define OPENMPCD_CUDA_LAUNCH_WORKUNITS_GRIDSIZE_BEGIN(numberOfWorkUnits_, maxGridSize_) \
82  OPENMPCD_CUDA_LAUNCH_WORKUNITS_SIZES_BEGIN((numberOfWorkUnits_), (maxGridSize_), 512)
83 
84 /**
85  * Calls OPENMPCD_CUDA_LAUNCH_WORKUNITS_SIZES_BEGIN with default values for maxGridSize_ and blockSize_.
86  *
87  * @see OPENMPCD_CUDA_LAUNCH_WORKUNITS_SIZES_BEGIN
88  * @param[in] numberOfWorkUnits_ The number of work units that need to be dispatched.
89  */
90 #define OPENMPCD_CUDA_LAUNCH_WORKUNITS_BEGIN(numberOfWorkUnits_) \
91  OPENMPCD_CUDA_LAUNCH_WORKUNITS_SIZES_BEGIN((numberOfWorkUnits_), 1024, 512)
92 
93 /**
94  * Throws if the last CUDA call was not successful.
95  */
96 #define OPENMPCD_CUDA_THROW_ON_ERROR do{const cudaError_t e=cudaGetLastError(); if(e!=cudaSuccess) OPENMPCD_THROW(OpenMPCD::CUDA::Exception, cudaGetErrorString(e));} while(0)
97 
98 #endif
Exceptions.hpp