Tissue Forge C++ 0.2.1
Interactive, particle-based physics, chemistry and biology modeling and simulation environment
Loading...
Searching...
No Matches
tfRunner_cuda.h
1/*******************************************************************************
2 * This file is part of mdcore.
3 * Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4 * Copyright (c) 2022-2024 T.J. Sego
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 ******************************************************************************/
20#ifndef _MDCORE_SOURCE_TFRUNNER_CUDA_H_
21#define _MDCORE_SOURCE_TFRUNNER_CUDA_H_
22
23#include <tfTask.h>
24
25#include <curand_kernel.h>
26
27
28/* Set the max number of parts for shared buffers. */
29#define cuda_maxparts 512
30#define cuda_maxdiags 352
31#define cuda_ndiags (((cuda_maxdiags - 1) * cuda_maxdiags) / 2)
32#define cuda_frame 32
33#define cuda_maxpots 100
34#define max_fingers 1
35#define cuda_defthreads 128
36#define cuda_memcpy_chunk 6
37#define cuda_sum_chunk 3
38#define cuda_maxqueues 30
39
40
41/* Some flags that control optional behaviour */
42// #define TIMERS
43
44
46enum {
47 tid_mutex = 0,
48 tid_queue,
49 tid_gettask,
50 tid_memcpy,
51 tid_update,
52 tid_pack,
53 tid_sort,
54 tid_pair,
55 tid_self,
56 tid_potential,
57 tid_potential4,
58 tid_total,
59 tid_count
60 };
61
62
63/* Timer functions. */
64#ifdef TIMERS
65 #define TIMER_TIC_ND if(threadIdx.x == 0) tic = clock();
66 #define TIMER_TOC_ND(tid) toc = clock(); if(threadIdx.x == 0) atomicAdd(&cuda_timers[tid],(toc > tic) ? (toc - tic) :(toc + (0xffffffff - tic)));
67 #define TIMER_TIC clock_t tic; if(threadIdx.x == 0) tic = clock();
68 #define TIMER_TOC(tid) clock_t toc = clock(); if(threadIdx.x == 0) atomicAdd(&cuda_timers[tid],(toc > tic) ? (toc - tic) :(toc + (0xffffffff - tic)));
69 #define TIMER_TIC2_ND if(threadIdx.x == 0) tic2 = clock();
70 #define TIMER_TOC2_ND(tid) toc2 = clock(); if(threadIdx.x == 0) atomicAdd(&cuda_timers[tid],(toc2 > tic2) ? (toc2 - tic2) :(toc2 + (0xffffffff - tic2)));
71 #define TIMER_TIC2 clock_t tic2; if(threadIdx.x == 0) tic2 = clock();
72 #define TIMER_TOC2(tid) clock_t toc2 = clock(); if(threadIdx.x == 0) atomicAdd(&cuda_timers[tid],(toc2 > tic2) ? (toc2 - tic2) :(toc2 + (0xffffffff - tic2)));
73#else
74 #define TIMER_TIC_ND
75 #define TIMER_TOC_ND(tid)
76 #define TIMER_TIC
77 #define TIMER_TOC(tid)
78 #define TIMER_TIC2
79 #define TIMER_TOC2(tid)
80#endif
81
82
83namespace TissueForge::cuda {
84
85
87 struct queue_cuda {
88
89 /* Indices to the first and last elements. */
90 int first, last;
91
92 /* Number of elements in this queue. */
93 volatile int count;
94
95 /* Number of elements in the recycled list. */
96 volatile int rec_count;
97
98 /* The queue data. */
99 volatile int *data;
100
101 /* The recycling list. */
102 volatile int *rec_data;
103
104 };
105
106
108 struct task_cuda {
109
111 short int type, subtype;
112
114 volatile int wait;
115
117 int flags;
118
120 int i, j;
121
124
126 int unlock[ task_max_unlock ];
127
128 };
129
144
145 __device__ void potential_eval_r_cuda(struct TissueForge::Potential *p, FPTYPE r, FPTYPE *e, FPTYPE *f);
146
161 __device__
162 void potential_eval_cuda(struct TissueForge::Potential *p, float r2, float *e, float *f);
163
164};
165
166#endif // _MDCORE_SOURCE_TFRUNNER_CUDA_H_
Tissue Forge GPU acceleration on CUDA-supporting devices.
Definition tfAngleConfig.h:26
__device__ void potential_eval_cuda(struct TissueForge::Potential *p, float r2, float *e, float *f)
Evaluates the given potential at the given point (interpolated).
__device__ void potential_eval_r_cuda(struct TissueForge::Potential *p, FPTYPE r, FPTYPE *e, FPTYPE *f)
Evaluates the given potential at the given point (interpolated).
A Potential object is a compiled interpolation of a given function. The Universe applies potentials t...
Definition tfPotential.h:213
Definition tfRunner_cuda.h:87
Definition tfRunner_cuda.h:108
int i
Definition tfRunner_cuda.h:120
volatile int wait
Definition tfRunner_cuda.h:114
int unlock[124]
Definition tfRunner_cuda.h:126
short int type
Definition tfRunner_cuda.h:111
int flags
Definition tfRunner_cuda.h:117
int nr_unlock
Definition tfRunner_cuda.h:123