27 #ifndef bits_gpulog_log_h__
28 #define bits_gpulog_log_h__
32 __device__
static inline int global_atomicAdd(
int *
x,
int add) {
33 return atomicAdd(x,add);
36 __host__
static inline int global_atomicAdd(
int *
x,
int add) {
55 __host__
static void alloc(T* &ret,
int num = 1)
57 cudaMalloc((
void **)&ret, num*
sizeof(T));
62 __host__
static const T
get(T *ptr)
65 cudaMemcpy(&ret, ptr,
sizeof(ret), cudaMemcpyDeviceToHost);
71 __host__
static void set(T *ptr,
const T& val)
73 cudaMemcpy(ptr, &val,
sizeof(*ptr), cudaMemcpyHostToDevice);
78 __host__
static void dealloc(T* p,
int num = 1)
87 return ((blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
95 __device__
static inline int atomicAdd(
int *
x,
int add) {
97 return global_atomicAdd(x, add);
100 __host__
static inline int atomicAdd(
int *
x,
int add) {
102 return global_atomicAdd(x, add);
112 static void alloc(T* &ret,
int num = 1)
114 ret = num == 1 ?
new T :
new T[num];
119 static const T
get(T *ptr)
126 static void set(T *ptr,
const T& val)
133 static void dealloc(T* p,
int num = 1)
135 if(num == 1)
delete p;
140 static inline int atomicAdd(
int *
x,
int add) {
151 static int threadId() {
return -1; }
158 template<
typename T>
struct ptr_t
161 __host__ __device__
inline ptr_t(T*p) : ptr(p) {}
162 __host__ __device__
operator T*()
const {
return ptr; }
165 #define PTR_T(T) gpulog::internal::ptr_t<T>
181 __host__
void alloc(
size_t len)
187 A::alloc(buffer, len);
189 DHOST( std::cerr <<
"Allocated " << len <<
" bytes.\n"; )
195 A::dealloc(buffer, buf_len);
198 buffer = NULL; buf_len = 0;
228 __host__ __device__
int size()
const
234 __host__ __device__
void seek(
int pos)
const
248 return idx > buf_len;
252 template<
typename T1,
typename T2,
typename T3>
253 __device__
inline PTR_T(
SCALAR(T3)) write(const
int msgid, const T1 &v1, const T2 &v2, const T3 &v3)
259 int len = P::len_with_padding(v3);
260 int at = A::atomicAdd(this->at, len);
262 char *ptr = buffer + at;
265 header v0(msgid, len);
266 P::IO0::put(ptr, v0, P::begin0, P::len0);
267 P::IO1::put(ptr, v1, P::begin1, P::len1);
268 P::IO2::put(ptr, v2, P::begin2, P::len2);
269 P::IO3::put(ptr, v3, P::begin3, P::len3);
272 P::store_arginfo(ptr, v3);
275 DHOST( std::cerr <<
"Total packet len = " << len <<
"\n"; )
276 return (
SCALAR(T3)*)(ptr + P::begin3);
279 #include "gpulog_write.h"
286 typedef log_base<dev_internals> device_log;
311 cudaMemcpy(&log, dlog,
sizeof(log), cudaMemcpyDeviceToHost);
316 inline void download_device_log(device_log &log,
const char *name)
318 cudaMemcpyFromSymbol(&log, name,
sizeof(log), 0, cudaMemcpyDeviceToHost);
323 inline void upload_device_log(
const char *name, device_log &log)
325 cudaMemcpyToSymbol(name, &log,
sizeof(log), 0, cudaMemcpyHostToDevice);
329 inline device_log* upload_device_log(device_log &log)
332 cudaMalloc(&pdlog,
sizeof(log));
333 cudaMemcpy(pdlog, &log,
sizeof(log), cudaMemcpyHostToDevice);
334 return (device_log*) pdlog;
343 __host__
inline void copy(host_log &to, device_log &from,
int flags = 0)
349 int size = from.fetch_size();
350 if(size == 0) {
return; }
353 if(to.capacity() != from.capacity())
356 to.alloc(from.capacity());
360 cudaMemcpy(to.internal_buffer(), from.internal_buffer(),
size, cudaMemcpyDeviceToHost);
376 inline void copy(host_log &to,
const char *from,
int flags = 0)
379 download_device_log(dlog, from);
380 copy(to, dlog, flags);
389 inline void copy(host_log &to, device_log *from,
int flags = 0)
392 download_device_log(dlog, from);
393 copy(to, dlog, flags);
397 inline device_log alloc_device_log(
const char *symbol,
size_t len)
401 upload_device_log(symbol, dlog);
406 inline device_log* alloc_device_log(
size_t len)
410 return upload_device_log(dlog);
414 inline void free_device_log(
const char *symbol)
417 download_device_log(dlog, symbol);
426 #endif // bits_gpulog_log_h__