30 namespace swarm {
namespace gpu {
namespace bppt {
44 const static int nbod = T::n;
45 const static int pair_count = (nbod*(nbod-1))/2;
46 const static int CHUNK_SIZE = SHMEM_CHUNK_SIZE;
51 ensemble::SystemRef& sys;
70 GENERIC void calc_pair(
int ij)
const{
71 int i = first<nbod>( ij );
72 int j = second<nbod>( ij );
75 double dx[3] = { sys[j][0].pos()- sys[i][0].pos(),sys[j][1].pos()- sys[i][1].pos(), sys[j][2].pos()- sys[i][2].pos() };
76 double dv[3] = { sys[j][0].vel()- sys[i][0].vel(),sys[j][1].vel()- sys[i][1].vel(), sys[j][2].vel()- sys[i][2].vel() };
77 double r2 = dx[0]*dx[0]+dx[1]*dx[1]+dx[2]*dx[2];
78 double acc_mag = rsqrt(r2) / r2;
81 for(
int c = 0;
c < 3;
c ++)
83 shared[ij][
c].acc() = dx[
c]* acc_mag;
89 __device__
double one_over_r(
const int b1,
const int b2)
const
93 for(
int d = 0; d < pair_count; d++)
95 int x = first<nbod>(d), y= second<nbod>(d);
96 if( ((x==b1) && (y==b2)) || (x==b2) && (y==b1))
98 sum += shared[d][0].acc()*shared[d][0].acc();
99 sum += shared[d][1].acc()*shared[d][1].acc();
100 sum += shared[d][2].acc()*shared[d][2].acc();
116 GPUAPI
double sum_acc_gr(
int b,
int c)
const
121 for(
int d = 0; d < pair_count; d++)
123 int x = first<nbod>(d), y= second<nbod>(d);
125 if( ((x==b)&&(y==0)) || ((x==0)&&(y==b)) )
127 one_over_r = shared[d][0].acc()*shared[d][0].acc();
128 one_over_r += shared[d][1].acc()*shared[d][1].acc();
129 one_over_r += shared[d][2].acc()*shared[d][2].acc();
132 double v2 = 0., v_dot_r = 0.;
133 double dx = (sys[b][0].pos()-sys[0][0].pos());
134 double dv = (sys[b][0].vel()-sys[0][0].vel());
137 dx = (sys[b][1].pos()-sys[0][1].pos());
138 dv = (sys[b][1].vel()-sys[0][1].vel());
141 dx = (sys[b][2].pos()-sys[0][2].pos());
142 dv = (sys[b][2].vel()-sys[0][2].vel());
146 double GM = sys[0].mass();
147 double f1 = (4*GM*one_over_r-v2);
148 double f2 = 4*v_dot_r;
149 acc_sum = -f1*(sys[b][
c].pos()-sys[0][
c].pos())
150 -f2*(sys[b][c].vel()-sys[0][
c].vel());
151 double f0 = GM*one_over_r*one_over_r*one_over_r/c2;
161 __device__
double sum_acc_j2(
int b,
int c)
const
167 for(
int d = 0; d < pair_count; d++)
169 int x = first<nbod>(d), y= second<nbod>(d);
171 if( ((x==b)&&(y==0)) || ((x==0)&&(y==b)) )
173 one_over_r = shared[d][0].acc()*shared[d][0].acc();
174 one_over_r += shared[d][1].acc()*shared[d][1].acc();
175 one_over_r += shared[d][2].acc()*shared[d][2].acc();
176 u2 = shared[d][2].acc()*shared[d][2].acc() / one_over_r;
179 double dx = (sys[b][
c].pos()-sys[0][
c].pos());
180 double mstar = sys[0].mass();
181 double mpl = sys[b].mass();
182 double j2 = sys[b].attribute(1);
183 double jr2 = j2*one_over_r*one_over_r;
184 double tmp2 = jr2*(7.5*u2-1.5);
185 double tmp3 = jr2*3.;
187 double jr4 = j4*one_over_r*one_over_r*one_over_r*one_over_r;
189 tmp2 += jr4*(39.375*u4 - 26.25*u2 + 1.875);
190 tmp3 += jr4*(17.5*u2-7.5);
193 double jr6 = j6*one_over_r*one_over_r*one_over_r*one_over_r*one_over_r*one_over_r;
195 tmp2 += jr6*(187.6875*u6 -216.5625*u4 +59.0625*u2 -2.1875);
196 tmp3 += jr6*(86.625*u4 - 78.75*u2 + 13.125);
198 if(c==2) tmp2 -= tmp3;
199 double tmp1 = mstar*one_over_r*one_over_r*one_over_r;
200 acc_sum += dx*tmp1*tmp2;
202 double acc_sun_c = -mpl*acc_sum/mstar;
208 GENERIC double sum_acc_planets(
int b,
int c)
const{
213 for(
int d = 0; d < pair_count; d++){
214 int x = first<nbod>(d), y= second<nbod>(d);
218 acc_sum += shared[d][
c].acc() * sys[y].mass();
221 acc_sum -= shared[d][
c].acc() * sys[
x].mass();
228 __device__
double sum_acc(
int b,
int c)
const{
229 double acc_from_planets = 0;
230 double acc_from_sun = 0;
234 for(
int d = 0; d < pair_count; d++){
235 int x = first<nbod>(d), y= second<nbod>(d);
239 acc_from_sun += shared[d][
c].acc() * sys[y].mass();
241 acc_from_planets += shared[d][
c].acc() * sys[y].mass();
244 acc_from_sun -= shared[d][
c].acc() * sys[
x].mass();
246 acc_from_planets -= shared[d][
c].acc() * sys[
x].mass();
250 double acc_gr = sum_acc_gr(b,c);
251 return acc_from_sun + acc_from_planets + acc_gr;
258 if(b < nbod && c < 3)
259 sys[b][
c].pos() = pos , sys[b][
c].vel() = vel;
264 if(b < nbod && c < 3){
288 if(b < nbod && c < 3){
289 return sum_acc_planets(b,c);
306 GPUAPI
double acc (
int ij,
int b,
int c,
double& pos,
double& vel)
const{
308 if(b < nbod && c < 3)
309 sys[b][
c].pos() = pos , sys[b][
c].vel() = vel;
314 if(b < nbod && c < 3){
320 static GENERIC int thread_per_system(){
321 return ( 3*nbod > (nbod-1)*nbod/2 ) ? nbod*3 : (nbod-1)*nbod/2;
324 static GENERIC int shmem_per_system() {
325 return sizeof(shared_data)/CHUNK_SIZE;