20 #ifndef KJB_CUDA_UTIL_H
21 #define KJB_CUDA_UTIL_H
26 #include <boost/scoped_array.hpp>
27 #include <boost/shared_array.hpp>
48 class Cuda_utility_module :
public Cuda_base_module
51 typedef Cuda_base_module Base;
54 Cuda_utility_module();
56 void load_functions_()
58 #define KJB_LOAD_FUNCTION__(a) cuModuleGetFunction(&a##_func, get_handle_(), #a);
59 CU_ETX(KJB_LOAD_FUNCTION__(ow_ew_multiply_float));
60 CU_ETX(KJB_LOAD_FUNCTION__(ow_ew_multiply_uint));
61 CU_ETX(KJB_LOAD_FUNCTION__(ow_ew_multiply_int));
62 CU_ETX(KJB_LOAD_FUNCTION__(ow_ew_multiply_uint_float));
63 CU_ETX(KJB_LOAD_FUNCTION__(detect_changes_float));
64 CU_ETX(KJB_LOAD_FUNCTION__(detect_changes_uint));
65 CU_ETX(KJB_LOAD_FUNCTION__(detect_changes_int));
66 #undef KJB_LOAD_FUNCTION__
71 CUfunction get_ow_ew_multiply_func(
const unsigned int&,
const float&)
73 return ow_ew_multiply_uint_float_func;
76 CUfunction get_ow_ew_multiply_func(
const float&,
const float&)
78 return ow_ew_multiply_float_func;
81 CUfunction get_ow_ew_multiply_func(
const unsigned int&,
const unsigned int&)
83 return ow_ew_multiply_uint_func;
86 CUfunction get_ow_ew_multiply_func(
const int&,
const int&)
88 return ow_ew_multiply_int_func;
93 CUfunction get_detect_changes_func(
const float&)
95 return detect_changes_float_func;
98 CUfunction get_detect_changes_func(
const unsigned int&)
100 return detect_changes_uint_func;
103 CUfunction get_detect_changes_func(
const int&)
105 return detect_changes_int_func;
110 template <
class T1,
class T2>
111 void ow_ew_multiply(CUdeviceptr v1, CUdeviceptr v2,
unsigned int N)
113 using boost::scoped_array;
114 using kjb_c::kjb_debug_level;
115 using kjb_c::add_error;
118 cuCtxAttach(&ctx, 0);
120 size_t blocks, threads;
123 blocks = N / threads + (N % threads ? 1 : 0);
125 assert(blocks < MAX_BLOCKS);
128 CUfunction
function = get_ow_ew_multiply_func(T1(), T2());
131 #define ALIGN_UP(offset, alignment) \
132 (offset) = ((offset) + (alignment) - 1) & ~((alignment) - 1)
139 ptr = (
void*)(
size_t) v1;
140 ALIGN_UP(offset, __alignof(ptr));
141 ETX(cuParamSetv(
function, offset, &ptr,
sizeof(ptr)));
142 offset +=
sizeof(ptr);
144 ptr = (
void*)(
size_t) v2;
145 ALIGN_UP(offset, __alignof(ptr));
146 ETX(cuParamSetv(
function, offset, &ptr,
sizeof(ptr)));
147 offset +=
sizeof(ptr);
149 ALIGN_UP(offset, __alignof(N));
150 ETX(cuParamSeti(
function, offset, N));
153 ETX(cuParamSetSize(
function, offset));
155 ETX(cuFuncSetBlockShape(
function, threads, 1, 1));
156 ETX(cuLaunchGrid(
function, blocks, 1));
160 ETX(cuCtxDetach(ctx));
164 void detect_changes(CUdeviceptr d_in,
unsigned int N)
166 using boost::scoped_array;
167 using kjb_c::kjb_debug_level;
168 using kjb_c::add_error;
171 cuCtxAttach(&ctx, 0);
173 size_t blocks, threads;
176 blocks = N / threads + (N % threads ? 1 : 0);
178 assert(blocks < MAX_BLOCKS);
181 CUfunction
function = get_detect_changes_func(T());
184 #define ALIGN_UP(offset, alignment) \
185 (offset) = ((offset) + (alignment) - 1) & ~((alignment) - 1)
190 ptr = (
void*)(
size_t) d_in;
191 ALIGN_UP(offset, __alignof(ptr));
192 ETX(cuParamSetv(
function, offset, &ptr,
sizeof(ptr)));
193 offset +=
sizeof(ptr);
195 ALIGN_UP(offset, __alignof(N));
196 ETX(cuParamSeti(
function, offset, N));
199 ETX(cuParamSetSize(
function, offset));
201 ETX(cuFuncSetBlockShape(
function, threads, 1, 1));
202 ETX(cuLaunchGrid(
function, blocks, 1));
206 ETX(cuCtxDetach(ctx));
211 CUfunction ow_ew_multiply_uint_float_func;
213 CUfunction ow_ew_multiply_float_func;
214 CUfunction ow_ew_multiply_uint_func;
215 CUfunction ow_ew_multiply_int_func;
217 CUfunction detect_changes_float_func;
218 CUfunction detect_changes_uint_func;
219 CUfunction detect_changes_int_func;
224 static const size_t MAX_THREADS = 512;
225 static const size_t MAX_BLOCKS = 65535;
#define ETX(a)
Definition: l_exception.h:67
Support for error handling exception classes in libKJB.
Definition for the Vector class, a thin wrapper on the KJB Vector struct and its related functionalit...