20#ifndef _MDCORE_SOURCE_TFRUNNER_CUDA_H_
21#define _MDCORE_SOURCE_TFRUNNER_CUDA_H_
25#include <curand_kernel.h>
29#define cuda_maxparts 512
30#define cuda_maxdiags 352
31#define cuda_ndiags (((cuda_maxdiags - 1) * cuda_maxdiags) / 2)
33#define cuda_maxpots 100
35#define cuda_defthreads 128
36#define cuda_memcpy_chunk 6
37#define cuda_sum_chunk 3
38#define cuda_maxqueues 30
65 #define TIMER_TIC_ND if(threadIdx.x == 0) tic = clock();
66 #define TIMER_TOC_ND(tid) toc = clock(); if(threadIdx.x == 0) atomicAdd(&cuda_timers[tid],(toc > tic) ? (toc - tic) :(toc + (0xffffffff - tic)));
67 #define TIMER_TIC clock_t tic; if(threadIdx.x == 0) tic = clock();
68 #define TIMER_TOC(tid) clock_t toc = clock(); if(threadIdx.x == 0) atomicAdd(&cuda_timers[tid],(toc > tic) ? (toc - tic) :(toc + (0xffffffff - tic)));
69 #define TIMER_TIC2_ND if(threadIdx.x == 0) tic2 = clock();
70 #define TIMER_TOC2_ND(tid) toc2 = clock(); if(threadIdx.x == 0) atomicAdd(&cuda_timers[tid],(toc2 > tic2) ? (toc2 - tic2) :(toc2 + (0xffffffff - tic2)));
71 #define TIMER_TIC2 clock_t tic2; if(threadIdx.x == 0) tic2 = clock();
72 #define TIMER_TOC2(tid) clock_t toc2 = clock(); if(threadIdx.x == 0) atomicAdd(&cuda_timers[tid],(toc2 > tic2) ? (toc2 - tic2) :(toc2 + (0xffffffff - tic2)));
75 #define TIMER_TOC_ND(tid)
77 #define TIMER_TOC(tid)
79 #define TIMER_TOC2(tid)
96 volatile int rec_count;
102 volatile int *rec_data;
Tissue Forge GPU acceleration on CUDA-supporting devices.
Definition tfAngleConfig.h:26
__device__ void potential_eval_cuda(struct TissueForge::Potential *p, float r2, float *e, float *f)
Evaluates the given potential at the given point (interpolated).
__device__ void potential_eval_r_cuda(struct TissueForge::Potential *p, FPTYPE r, FPTYPE *e, FPTYPE *f)
Evaluates the given potential at the given point (interpolated).
A Potential object is a compiled interpolation of a given function. The Universe applies potentials t...
Definition tfPotential.h:213
Definition tfRunner_cuda.h:87
Definition tfRunner_cuda.h:108
int unlock[task_max_unlock]
Definition tfRunner_cuda.h:126
int i
Definition tfRunner_cuda.h:120
volatile int wait
Definition tfRunner_cuda.h:114
short int type
Definition tfRunner_cuda.h:111
int flags
Definition tfRunner_cuda.h:117
int nr_unlock
Definition tfRunner_cuda.h:123