33 namespace swarm {
namespace gpu {
namespace bppt {
65 const static int nbod = T::n;
66 const static int body_count = nbod;
67 const static int CHUNK_SIZE = SHMEM_CHUNK_SIZE;
73 ensemble::SystemRef& sys;
99 GPUAPI
void sum(
int b,
int c,
double& acc,
double & jerk)
const{
112 for(
int bb=nbod-1;bb>=0;--bb)
114 if( (sys[bb].mass()>0.0) && (b!=bb) && (c==0) )
117 double dx[3] = { sys[bb][0].pos()- sys[b][0].pos(),sys[bb][1].pos()- sys[b][1].pos(), sys[bb][2].pos()- sys[b][2].pos() };
119 double dv[3] = { sys[bb][0].vel()- sys[b][0].vel(),sys[bb][1].vel()- sys[b][1].vel(), sys[bb][2].vel()- sys[b][2].vel() };
121 double r2 = dx[0]*dx[0]+dx[1]*dx[1]+dx[2]*dx[2];
125 shared[b].acc() = sys[bb].mass() * rsqrt(r2) / r2;
129 if( (sys[bb].mass()>0.0) && (b!=bb) )
132 double dx = sys[bb][
c].pos()- sys[b][
c].pos();
134 double dv = sys[bb][
c].vel()- sys[b][
c].vel();
135 acc += dx* shared[b].acc();
136 jerk += (dv - dx * shared[b].jerk() ) * shared[b].acc();
152 GPUAPI
double sum_acc(
int b,
int c)
const{
166 for(
int bb=nbod-1;bb>=0;--bb)
168 if( (sys[bb].mass()>0.0) && (b!=bb) && (c==0) )
171 double dx[3] = { sys[bb][0].pos()- sys[b][0].pos(),sys[bb][1].pos()- sys[b][1].pos(), sys[bb][2].pos()- sys[b][2].pos() };
173 double dv[3] = { sys[bb][0].vel()- sys[b][0].vel(),sys[bb][1].vel()- sys[b][1].vel(), sys[bb][2].vel()- sys[b][2].vel() };
175 double r2 = dx[0]*dx[0]+dx[1]*dx[1]+dx[2]*dx[2];
179 shared[b].acc() = sys[bb].mass() * rsqrt(r2) / r2;
183 if( (sys[bb].mass()>0.0) && (b!=bb) )
186 double dx = sys[bb][
c].pos()- sys[b][
c].pos();
188 double dv = sys[bb][
c].vel()- sys[b][
c].vel();
189 acc += dx* shared[b].acc();
208 GPUAPI
double sum_acc_planets(
int b,
int c)
const{
221 for(
int bb=nbod-1;bb>=1;--bb)
223 if( (sys[bb].mass()>0.0) && (b!=bb) && (c==0) )
226 double dx[3] = { sys[bb][0].pos()- sys[b][0].pos(),sys[bb][1].pos()- sys[b][1].pos(), sys[bb][2].pos()- sys[b][2].pos() };
228 double dv[3] = { sys[bb][0].vel()- sys[b][0].vel(),sys[bb][1].vel()- sys[b][1].vel(), sys[bb][2].vel()- sys[b][2].vel() };
230 double r2 = dx[0]*dx[0]+dx[1]*dx[1]+dx[2]*dx[2];
234 shared[b].acc() = sys[bb].mass() * rsqrt(r2) / r2;
238 if( (sys[bb].mass()>0.0) && (b!=bb) )
241 double dx = sys[bb][
c].pos()- sys[b][
c].pos();
243 double dv = sys[bb][
c].vel()- sys[b][
c].vel();
244 acc += dx* shared[b].acc();
270 GPUAPI
void operator() (
int ij,
int b,
int c,
double& pos,
double& vel,
double& acc,
double& jerk)
const{
272 if(b < nbod && c < 3)
273 sys[b][
c].pos() = pos , sys[b][
c].vel() = vel;
275 if(b < nbod && c < 3){
281 __device__
double acc (
int ij,
int b,
int c,
double& pos,
double& vel)
const{
283 if(b < nbod && c < 3)
284 sys[b][
c].pos() = pos , sys[b][
c].vel() = vel;
286 if(b < nbod && c < 3){
293 __device__
double acc_planets (
int ij,
int b,
int c,
double& pos,
double& vel)
const{
295 if(b < nbod && c < 3)
296 sys[b][
c].pos() = pos , sys[b][
c].vel() = vel;
298 if(b < nbod && c < 3){
299 return sum_acc_planets(b,c);
305 static GENERIC int shmem_per_system() {
306 const int body_count = nbod;
307 return body_count *
sizeof(GravitationAccJerkScalars<CHUNK_SIZE>)/CHUNK_SIZE;
310 static GPUAPI
void * system_shared_data_pointer(
const int sysid_in_block) {
311 extern __shared__
char shared_mem[];
312 int b = sysid_in_block / CHUNK_SIZE ;
313 int i = sysid_in_block % CHUNK_SIZE ;
314 int idx = i *
sizeof(double)
316 * shmem_per_system();
317 return &shared_mem[idx];
322 extern __shared__
char shared_mem[];
323 int b = system_per_block / CHUNK_SIZE ;
324 int i = system_per_block % CHUNK_SIZE ;
326 int idx = b * CHUNK_SIZE * shmem_per_system();
327 return &shared_mem[idx];
330 static GENERIC int thread_per_system(){
334 static GENERIC int shmem_per_system() {
335 return sizeof(shared_data)/CHUNK_SIZE;