34 namespace swarm {
namespace gpu {
namespace bppt {
68 const static int nbod = T::n;
69 const static int pair_count = (nbod*(nbod-1))/2;
70 const static int CHUNK_SIZE = SHMEM_CHUNK_SIZE;
77 ensemble::SystemRef& sys;
102 GENERIC void calc_pair(
int ij_first )
const{
103 for(
int ij = ij_first ; ij < std::max(3*nbod, pair_count) ; ij += 3*nbod )
105 int i = first<nbod>( ij );
106 int j = second<nbod>( ij );
110 double dx[3] = { sys[j][0].pos()- sys[i][0].pos(),sys[j][1].pos()- sys[i][1].pos(), sys[j][2].pos()- sys[i][2].pos() };
112 double dv[3] = { sys[j][0].vel()- sys[i][0].vel(),sys[j][1].vel()- sys[i][1].vel(), sys[j][2].vel()- sys[i][2].vel() };
115 double r2 = dx[0]*dx[0]+dx[1]*dx[1]+dx[2]*dx[2];
118 double acc_mag = rsqrt(r2) / r2;
121 for(
int c = 0;
c < 3;
c ++)
123 shared[ij][
c].acc() = dx[
c]* acc_mag;
124 shared[ij][
c].jerk() = (dv[
c] - dx[
c] * jerk_mag ) * acc_mag;
138 GENERIC double sum_acc_planets(
int b,
int c)
const{
142 for(
int d = 0; d < pair_count; d++){
143 int x = first<nbod>(d), y= second<nbod>(d);
147 acc_sum += shared[d][
c].acc() * sys[y].mass();
150 acc_sum -= shared[d][
c].acc() * sys[
x].mass();
165 GENERIC double sum_acc(
int b,
int c)
const{
166 double acc_from_planets = 0;
167 double acc_from_sun = 0;
171 for(
int d = 0; d < pair_count; d++){
172 int x = first<nbod>(d), y= second<nbod>(d);
176 acc_from_sun += shared[d][
c].acc() * sys[y].mass();
178 acc_from_planets += shared[d][
c].acc() * sys[y].mass();
181 acc_from_sun -= shared[d][
c].acc() * sys[
x].mass();
183 acc_from_planets -= shared[d][
c].acc() * sys[
x].mass();
187 return acc_from_sun + acc_from_planets;
190 GENERIC void sum(
int b,
int c,
double&
acc,
double & jerk)
const
192 { sum_works(b,c,
acc,jerk); }
203 GENERIC void sum_test(
int b,
int c,
double&
acc,
double & jerk)
const{
205 double accs[2] = {0.0, 0.0};
207 double jerks[2] = {0.0, 0.0};
210 for(
int d = 0; d < pair_count; d++){
211 int x = first<nbod>(d), y= second<nbod>(d);
212 if( (x==b) || (y==b) )
218 mass = sys[y].mass();
219 dest = (y==0) ? 0 : 1;
223 mass = -sys[
x].mass();
224 dest = (x==0) ? 0 : 1;
227 accs[dest] += shared[d][
c].acc() * mass;
228 jerks[dest] += shared[d][
c].jerk() * mass;
231 acc = accs[0] + accs[1];
232 jerk = jerks[0] + jerks[1];
243 GENERIC void sum_works(
int b,
int c,
double& acc,
double & jerk)
const{
245 double acc_from_planets = 0.0;
247 double acc_from_sun = 0.0;
249 double jerk_from_planets = 0.0;
251 double jerk_from_sun = 0.0;
255 for(
int d = 0; d < pair_count; d++){
256 int x = first<nbod>(d), y= second<nbod>(d);
260 acc_from_sun += shared[d][
c].acc() * sys[y].mass();
261 jerk_from_sun += shared[d][
c].jerk() * sys[y].mass();
263 acc_from_planets += shared[d][
c].acc() * sys[y].mass();
264 jerk_from_planets += shared[d][
c].jerk() * sys[y].mass();
268 acc_from_sun -= shared[d][
c].acc() * sys[
x].mass();
269 jerk_from_sun -= shared[d][
c].jerk() * sys[
x].mass();
271 acc_from_planets -= shared[d][
c].acc() * sys[
x].mass();
272 jerk_from_planets -= shared[d][
c].jerk() * sys[
x].mass();
277 acc = acc_from_sun + acc_from_planets;
278 jerk = jerk_from_sun + jerk_from_planets;
300 GPUAPI
void operator() (
int ij,
int b,
int c,
double& pos,
double& vel,
double& acc,
double& jerk)
const{
302 if(b < nbod && c < 3)
303 sys[b][
c].pos() = pos , sys[b][
c].vel() = vel;
310 if(b < nbod && c < 3){
321 if(b < nbod && c < 3){
322 return sum_acc_planets(b,c);
328 __device__
double acc (
int ij,
int b,
int c,
double& pos,
double& vel)
const{
330 if(b < nbod && c < 3)
331 sys[b][
c].pos() = pos , sys[b][
c].vel() = vel;
337 if(b < nbod && c < 3){
345 const int pair_count = nbod * (nbod - 1) / 2;
351 static __device__
void * system_shared_data_pointer(
const int sysid_in_block) {
352 extern __shared__
char shared_mem[];
353 int b = sysid_in_block / CHUNK_SIZE ;
354 int i = sysid_in_block % CHUNK_SIZE ;
355 int idx = i *
sizeof(double)
358 return &shared_mem[idx];
362 static __device__
void * unused_shared_data_pointer(
const int system_per_block) {
363 extern __shared__
char shared_mem[];
365 int b = system_per_block / CHUNK_SIZE ;
366 int i = system_per_block % CHUNK_SIZE ;
369 return &shared_mem[idx];
374 return std::max(nbod * 3, (nbod-1)*nbod/2);
379 return sizeof(shared_data)/CHUNK_SIZE;