28 namespace swarm {
namespace gpu {
namespace bppt {
34 template<
class Monitor ,
template<
class T>
class Gravitation >
37 typedef Monitor monitor_t;
38 typedef typename monitor_t::params mon_params_t;
40 double _time_step_factor, _min_time_step;
41 mon_params_t _mon_params;
45 _time_step_factor = cfg.
require(
"time_step_factor", 0.0);
46 _min_time_step = cfg.
require(
"min_time_step", 0.0);
60 typedef Gravitation<T> Grav;
61 typename Grav::shared_data gravitation;
65 GPUAPI
void convert_internal_to_std_coord() {}
66 GPUAPI
void convert_std_to_internal_coord() {}
77 if( (b < T::n) && (c < 3) ) {
78 shared.gravitation[b][
c].acc() = acc*acc;
79 shared.gravitation[b][
c].jerk() = jerk*jerk;
84 if( (b < T::n) && (c==0) ) {
85 double acc_mag_sq = shared.gravitation[b][0].acc()+shared.gravitation[b][1].acc()+shared.gravitation[b][2].acc();
86 double jerk_mag_sq = shared.gravitation[b][0].jerk()+shared.gravitation[b][1].jerk()+shared.gravitation[b][2].jerk();
87 shared.time_step_factor[b].value() = jerk_mag_sq/acc_mag_sq;
91 double tf = shared.time_step_factor[0].value();
92 for(
int bb=1;bb<T::n;++bb)
93 tf += shared.time_step_factor[bb].value();
94 shared.time_step_factor[0].value() = rsqrt(tf)*_time_step_factor+_min_time_step;
97 return shared.time_step_factor[0].value();
103 __device__
void kernel(T compile_time_param){
107 typedef Gravitation<T> Grav;
111 Grav calcForces(sys, shared_data.gravitation );
114 const int nbod = T::n;
123 monitor_t montest(_mon_params,sys,*
_log) ;
126 double pos = 0.0, vel = 0.0 , acc0 = 0.0, jerk0 = 0.0;
127 if( (b < T::n) && (c < 3) )
128 pos = sys[b][
c].pos() , vel = sys[b][
c].vel();
137 for(
int iter = 0 ; (iter <
_max_iterations) && sys.is_active() ; iter ++ ) {
151 pos = pos + h*(vel+(h*0.5)*(acc0+(h/3.0)*jerk0));
152 vel = vel + h*(acc0+(h*0.5)*jerk0);
154 double pre_pos = pos, pre_vel = vel;
163 pos = pre_pos + (0.1-0.25) * (acc0 - acc1) * h * h - 1.0/60.0 * ( 7.0 * jerk0 + 2.0 * jerk1 ) * h * h * h;
164 vel = pre_vel + ( -0.5 ) * (acc0 - acc1 ) * h - 1.0/12.0 * ( 5.0 * jerk0 + jerk1 ) * h * h;
166 pos = pre_pos + ( (0.1-0.25) * (acc0 - acc1) - 1.0/60.0 * ( 7.0 * jerk0 + 2.0 * jerk1 ) * h) * h * h;
167 vel = pre_vel + (( -0.5 ) * (acc0 - acc1 ) - 1.0/12.0 * ( 5.0 * jerk0 + jerk1 ) * h )* h ;
175 pos = pre_pos + (0.1-0.25) * (acc0 - acc1) * h * h - 1.0/60.0 * ( 7.0 * jerk0 + 2.0 * jerk1 ) * h * h * h;
176 vel = pre_vel + ( -0.5 ) * (acc0 - acc1 ) * h - 1.0/12.0 * ( 5.0 * jerk0 + jerk1 ) * h * h;
178 pos = pre_pos + ((0.1-0.25) * (acc0 - acc1) - 1.0/60.0 * ( 7.0 * jerk0 + 2.0 * jerk1 ) * h )* h * h ;
179 vel = pre_vel + (( -0.5 ) * (acc0 - acc1 ) - 1.0/12.0 * ( 5.0 * jerk0 + jerk1 ) * h ) * h ;
181 acc0 = acc1, jerk0 = jerk1;
184 if( (b < T::n) && (c < 3) )
185 sys[b][
c].pos() = pos , sys[b][
c].vel() = vel;
193 { sys.set_inactive(); }