NuT – ECP Proxy Applications

From README.md

NuT simulates neutrino transport. Currently, it runs in 1D, spherical and 3D Cartesian geometries (the mesh geometry and topology is carefully encapsulated, so adding another mesh requires a few hundred lines of interface implementation). Particles events are tallied in the lab frame, while scattering events (cross sections) are calculated in the co-moving frame.

Build and Dependency Information

Nut builds a library using cmake. It depends on the Random123 library.

NuT includes a small application to demonstrate the library, called bh-3. “bh” is for black hole—the application models the neutrino transport given a snapshot of the material state of a star collapsing to a black hole.

Analysis

Build and Run Information

Compiler = icpc (ICC) 18.0.1 20171018
Run Parameters = -n 114688 -c 2048 -I ../../test/data/p.2

Scaling

Hit Locations

Intel Software Development Emulator

Intel SDE	NuT
Arithmetric Intensity	0.00924
FLOPS per Inst	0.0363
FLOPS per FP Inst	1.0
Bytes per Load Inst	6.76
Bytes per Store Inst	6.73

UOPS Executed

Experiment Aggregate Metrics

Threads (Time)	IPC per Core	Loads per Cycle	L1 Hits per Cycle	L1 Miss Ratio	L2 Miss Ratio	L3 Miss Ratio	L2 B/W Utilized	L3 B/W Utilized	DRAM B/W Utilized
1 (100.0%)	1.68	0.52	0.51	0.06%	0.43%	11.17%	0.31%	0.01%	0.01%
56 (100.0%)	1.66	0.50	0.50	0.06%	2.00%	29.86%	0.16%	0.02%	0.11%
112 (100.0%)	1.67	0.49	0.49	0.06%	2.31%	31.13%	0.14%	0.02%	0.10%

`decide_event`

Threads (Time)	IPC per Core	Loads per Cycle	L1 Hits per Cycle	L1 Miss Ratio	L2 Miss Ratio	L3 Miss Ratio	L2 B/W Utilized	L3 B/W Utilized	DRAM B/W Utilized
1 (57.3%)	1.65	0.52	0.52	0.09%	0.26%	12.33%	0.39%	0.01%	0.01%
56 (29.5%)	1.67	0.51	0.51	0.09%	1.35%	26.93%	0.38%	0.04%	0.15%
112 (22.3%)	1.68	0.51	0.51	0.09%	1.55%	28.60%	0.39%	0.05%	0.17%

 29     template <typename particle_t, typename mesh_t, typename opacity_t,
 30               typename velocity_t>
 31     event_n_dist
 32     decide_event( particle_t & p, // non-const b/c of RNG
 33                   mesh_t const & mesh,
 34                   opacity_t const & opacity,
 35                   velocity_t const & velocity)
 36     {
 37         static const size_t dim(particle_t::dim);
 38 
 39         typedef typename particle_t::fp_t fp_t;
 40         // Currently there are always  three top-level events considered.
 41         // Changing vector to std::array
 42         typedef std::array<event_n_dist,3> vend_t;
 43         typedef typename mesh_t::d_to_b_t d_to_b_t;
 44 
 45         vend_t e_n_ds;
 46 
 47         cell_t const cell  = p.cell;
 48         fp_t const tleft   = p.t;
 49         vec_t<dim> const x = p.x;
 50         Species const species = p.species;
 51 
 52         // compute distance to events, push onto vector
 53         // compute cross-section in comoving frame
 54 
 55         vec_t<dim> v = velocity.v(cell);
 56         // TO DO vector velocity
 57         vec_t<dim> vtmp(v);
 58         geom_t const eli  = p.e;
 59         vec_t<dim> const oli  = p.omega;
 60         // LT to comoving frame (compute interaction comoving).
 61         EandOmega<dim> eno_cmi = mesh_t::LT_to_comoving(vtmp,eli,oli);
 62         geom_t const eci  = eno_cmi.first;
 63 
 64         fp_t const sig_coll   = opacity.sigma_collide(cell,eci,species);
 65 
 66         fp_t const random_dev = p.rng.random();
 67         /* fp_t const ignored    =*/ p.rng.random(); // to keep pace with McPhD
 68 
 69         geom_t const d_coll     = (sig_coll != fp_t(0)) ?
 70             -std::log(random_dev)/sig_coll : huge;
 71 
 72         e_n_ds[0] = event_n_dist(Event::collision,d_coll);
 73 
 74         d_to_b_t dnf = mesh.distance_to_bdy(x,oli,cell);
 75         geom_t const d_bdy = dnf.d;
 76         e_n_ds[1] = event_n_dist(Event::boundary,d_bdy);
 77 
 78         geom_t const d_step_end = c * tleft;
 79         e_n_ds[2] = event_n_dist(Event::step_end,d_step_end);
 80 
 81         // pick (event,dist) pair with shortest distance
 82         event_n_dist closest = *(std::min_element(e_n_ds.begin(),e_n_ds.end(),
 83                                                   pair_min_2nd<event_n_dist>));
 84 
 85         // if needed, further resolve events
 86         if(closest.first == Event::collision)
 87         {
 88             // need to compute the comoving energy at the scattering site,
 89             // in order to compute the different cross sections there.
 90 
 91             typename mesh_t::coord_t const scat_site(
 92                 mesh.new_coordinate(x,oli,d_coll));
 93             vec_t<dim> const o_sct = scat_site.omega;
 94             EandOmega<dim> const eno_scat = mesh_t::LT_to_comoving(vtmp,eli,o_sct);
 95             fp_t const ecscat = eno_scat.first;
 96             closest.first =
 97                 decide_scatter_event(p.rng,ecscat,cell,opacity,species);
 98         }
 99         else
100         {
101             if(closest.first == Event::boundary)
102             {
103                 closest.first = decide_boundary_event(mesh,cell,dnf.face);
104             }
105             // compensate RNG if decide_scatter_event not called
106             p.rng.random();
107         }
108         return closest;
109     } // decide_event

`apply_event`

Threads (Time)	IPC per Core	Loads per Cycle	L1 Hits per Cycle	L1 Miss Ratio	L2 Miss Ratio	L3 Miss Ratio	L2 B/W Utilized	L3 B/W Utilized	DRAM B/W Utilized
1 (42.1%)	1.70	0.51	0.51	0.02%	0.46%	12.31%	0.20%	0.01%	0.00%
56 (21.7%)	1.71	0.50	0.50	0.02%	2.60%	29.96%	0.20%	0.03%	0.15%
112 (16.4%)	1.73	0.50	0.50	0.02%	2.40%	34.00%	0.22%	0.04%	0.16%

 46     template <typename ParticleT, typename MeshT, typename OpacityT,
 47               typename VelocityT, typename fp_t>
 48     void
 49     apply_event(ParticleT & p,
 50                 Event const & event,
 51                 geom_t const distance,
 52                 MeshT const & mesh,
 53                 OpacityT const & opacity,
 54                 VelocityT const & velocity,
 55                 Tally<fp_t> & tally,
 56                 fp_t const alpha = fp_t(2.0)
 57         )
 58     {
 59         cell_t const index = make_idx(p.cell,opacity.m_n_cells);
 60 
 61         stream_particle(p,distance,mesh);
 62 
 63         tally.accum_pl(distance);
 64 
 65         switch(event)
 66         {
 67         case Event::nucleon_abs:
 68             apply_nucleon_abs(p,tally);
 69             break;
 70         case Event::nucleon_elastic_scatter:
 71             apply_nucleon_elastic_scatter<ParticleT,Tally<fp_t>,VelocityT,MeshT>( p,
                                                                            tally,velocity);
 72             break;
 73         case Event::electron_scatter:
 74             {
 75                 fp_t const ebar = opacity.m_T.T_e_minus[index];
 76                 fp_t const e_e  = ebar;
 77                 // fp_t const e_e  = gen_power_law_energy_alpha2(ebar,p.rng);
 78                 apply_lepton_scatter<ParticleT,Tally<fp_t>,VelocityT,MeshT>(p,
                                                                        tally,e_e,velocity);
 79             }
 80             break;
 81         case Event::positron_scatter:
 82             {
 83                 fp_t const ebar = opacity.m_T.T_e_plus[index];
 84                 fp_t const e_e  = ebar;
 85                 // fp_t const e_e  = gen_power_law_energy_alpha2(ebar,p.rng);
 86                 apply_lepton_scatter<ParticleT,Tally<fp_t>,VelocityT,MeshT>(p,
                                                                        tally,e_e,velocity);
 87             }
 88             break;
 89         // case Event::nu_e_annhilation:
 90         //     break;
 91         // case Event::nu_x_annhilation:
 92         //     break;
 93         case Event::cell_low_x_boundary:
 94             apply_low_x_boundary(p,tally);
 95             break;
 96         case Event::cell_high_x_boundary:
 97             apply_hi_x_boundary(p,tally);
 98             break;
 99         case Event::escape:
100             apply_escape(p,tally);
101             break;
102         case Event::reflect:
103             apply_reflect(p,tally);
104             break;
105         case Event::step_end:
106             apply_step_end(p,tally);
107             break;
108         // error if we get to an unresolved collision or boundary
109         case Event::undetermined:
110             p.kill(Fates::UNDETERMINED_EVENT);
111             break;
112         case Event::boundary:   // fall through to next
113         case Event::collision:  //
114             // underresolved_event(event);
115             // break;
116         default:
117             p.kill(Fates::UNHANDLED_EVENT);
118         } // switch
119         if(event != Event::nucleon_elastic_scatter
120            && event != Event::electron_scatter
121            && event != Event::positron_scatter)
122         {
123             p.rng.random(); // compensate to maintain constant RNs/MC step
124         }
125         return;
126     } // apply_event

`random`

Threads (Time)	IPC per Core	Loads per Cycle	L1 Hits per Cycle	L1 Miss Ratio	L2 Miss Ratio	L3 Miss Ratio	L2 B/W Utilized	L3 B/W Utilized	DRAM B/W Utilized
1 (26.7%)	2.68	0.71	0.71	0.02%	0.57%	16.06%	0.17%	0.01%	0.00%
56 (22.8%)	2.67	0.70	0.70	0.02%	2.42%	30.93%	0.11%	0.02%	0.09%
112 (13.9%)	2.65	0.69	0.69	0.02%	2.71%	32.31%	0.15%	0.03%	0.12%

 39         double random() {
 40             double ret = m_bank[m_idx];
 41             if(m_idx == 0) m_idx++;
 42             else
 43             {
 44                 m_idx = 0;
 45                 this->advance();
 46             }
 47             return ret;
 48         }

`stream_particle`

Threads (Time)	IPC per Core	Loads per Cycle	L1 Hits per Cycle	L1 Miss Ratio	L2 Miss Ratio	L3 Miss Ratio	L2 B/W Utilized	L3 B/W Utilized	DRAM B/W Utilized
1 (26.4%)	1.22	0.40	0.40	0.02%	0.52%	9.59%	0.15%	0.01%	0.00%
56 (13.6%)	1.23	0.40	0.40	0.02%	2.75%	27.17%	0.16%	0.03%	0.14%
112 (10.3%)	1.25	0.40	0.40	0.02%	2.61%	30.30%	0.17%	0.03%	0.14%

 35     template <typename Mesh_T, typename Particle_T, typename fp_t>
 36     void stream_particle(Particle_T & p, fp_t const d, Mesh_T const & mesh)
 37     {
 38         typename Mesh_T::coord_t newcoord =
 39             mesh.new_coordinate(p.x,p.omega,d);
 40         p.x     = newcoord.x;
 41         p.omega = newcoord.omega;
 42         p.t     = p.t - d / c;
 43         return;
 44     } // stream_particle

`Opacity::sigma_collide`

Threads (Time)	IPC per Core	Loads per Cycle	L1 Hits per Cycle	L1 Miss Ratio	L2 Miss Ratio	L3 Miss Ratio	L2 B/W Utilized	L3 B/W Utilized	DRAM B/W Utilized
1 (10.1%)	1.81	0.59	0.59	0.26%	0.12%	6.44%	0.88%	0.01%	0.01%
56 (5.3%)	1.81	0.58	0.58	0.26%	0.91%	25.69%	0.81%	0.06%	0.19%
112 (4.0%)	1.85	0.58	0.58	0.26%	1.19%	31.74%	0.83%	0.08%	0.22%

 46         /* total collision opacity */
 47         fp_t sigma_collide( cell_t const & cell,
 48                             fp_t const e_nu,
 49                             Species const nu_species) const {
 50             cellOK(cell,m_n_cells);
 51             nrgOK(e_nu);
 52             fp_t s_coll =
 53                 sigma_N_total(cell, e_nu) +
 54                 sigma_lepton(cell,e_nu, nu_species);
 55             return s_coll;
 56         } // sigma_collide

`Sphere_1D::distance_to_bdy`

Threads (Time)	IPC per Core	Loads per Cycle	L1 Hits per Cycle	L1 Miss Ratio	L2 Miss Ratio	L3 Miss Ratio	L2 B/W Utilized	L3 B/W Utilized	DRAM B/W Utilized
1 (9.5%)	1.29	0.46	0.46	0.12%	0.10%	10.20%	0.83%	0.01%	0.00%
56 (4.8%)	1.34	0.47	0.46	0.12%	0.78%	26.17%	0.81%	0.05%	0.15%
112 (3.7%)	1.34	0.46	0.45	0.12%	0.81%	35.02%	0.80%	0.05%	0.15%

175         d_to_b_t
176         distance_to_bdy(vec_t<dim> const x, vec_t<dim> const omega,
177                         cell_t const cell) const noexcept
178         {
179             // cellOK(cell);
180             extents_t extents = this->cell_extents(cell);
181             return dist_to_bdy_impl(x.v[0],omega.v[0],extents.first,extents.second);
182         } // distance_to_bdy
{...}
185         static
186         d_to_b_t
187         dist_to_bdy_impl(geom_t const x, geom_t const omega,
188                          geom_t const rlo, geom_t const rhi) noexcept
189         {
190             geom_t const rhisq = rhi * rhi;
191             geom_t const rlosq = rlo * rlo;
192             geom_t const xsq   = x * x;
193 
194             // need to (1-2) check for intersection with the two spheres that
195             // bound the current cell, and (3) choose the first intersection
196             // along the direction of travel.
197 
198             // 1. Compute intersections with outer sphere
199 
200             // get Tan(theta) from inverting omega=Cos(theta). We work with a
201             // reduced polar angle, "abs_theta", in the range 0 <= theta <= pi/2,
202             // then select the correct intercept between the line that the
203             // particle travels and the spherical shell using the full value of
204             // theta.
205             geom_t const theta = std::acos(omega);
206             geom_t const abs_theta = (theta > pi/2) ?  pi - theta : theta;
207             geom_t const t = std::tan(abs_theta);
208             geom_t const tsq = t*t;
209             geom_t const tcub = tsq * t;
210             geom_t const one_plus_tsq = 1 + tsq;
211             geom_t const one_on_one_plus_tsq = 1.0/one_plus_tsq;
212             // the determinant is r^2 + r^2 * t^2 - x^2 * t^2. Include the
213             // square root and divide by 1+Tan(theta)^2 for convenience here.
214             geom_t const dethi =
215                 std::sqrt( rhisq * one_plus_tsq - xsq * tsq) * one_on_one_plus_tsq;
216             // These parts don't depend on the radius of the sphere.
217             geom_t const xterm1 = x*tsq*one_on_one_plus_tsq;
218             geom_t const yterm1 = -x * t + x * tcub * one_on_one_plus_tsq;
219             // These are the two intersection with the outer sphere
220             geom_t const xhip = xterm1 + dethi;
221             geom_t const yhip = yterm1 + t * dethi;
222             geom_t const xhim = xterm1 - dethi;
223             geom_t const yhim = yterm1 - t * dethi;
224             // if the polar angle is less than pi/2, we want the solution
225             // that adds the determinant.
226             geom_t const xhi = (theta < pi/2) ? xhip : xhim;
227             geom_t const yhi = (theta < pi/2) ? yhip : yhim;
228             geom_t const d_hi = std::sqrt( (xhi-x)*(xhi-x) + (yhi*yhi));
229 
230             // 2. Look for intersection with inner sphere
231             // There is if the absolute value of the polar angle is
232             // less than atan(1/(b^2 -1)), where b = x/r_lo. In this case, we can
233             // just use the "plus" solution, since that's always the closest.
234             // Note that you may want to know a real solution exists before
235             // computing it--complicates branch removal.
236             geom_t d_lo = huge;
237             if(rlo > 0.0)
238             {
239                 geom_t const b = x/rlo;
240                 // if the particle is on the inner sphere, and headed inward...
241                 if(soft_equiv(b,1.0,1e-11))
242                 {
243                     if(theta > pi/2)
244                     {
245                         d_lo = 0.0;
246                     }
247                 }
248                 else
249                 {
250                     geom_t const tan_theta_lim = std::sqrt(1/(b*b - 1));
251                     geom_t const theta_lim = std::atan(tan_theta_lim);
252                     if(abs_theta <= theta_lim and theta > pi/2)
253                     {
254                         geom_t const detlo =
255                             std::sqrt( rlosq * one_plus_tsq - xsq * tsq)
256                             * one_on_one_plus_tsq;
257                         geom_t const xlo = xterm1 + detlo;
258                         geom_t const ylo = yterm1 + t * detlo;
259                         d_lo = std::sqrt( (xlo-x)*(xlo-x) + ylo*ylo);
260                     }
261                 }
262             }
263             // now select the shorter distance and the corresponding face
264             geom_t d_bdy = huge;
265             cell_t face  = -1;
266             if( d_hi < d_lo)
267             {
268                 d_bdy = d_hi;
269                 face  = 1;     // will intersect outer sphere
270             }
271             else
272             {
273                 d_bdy = d_lo;
274                 face  = 0;     // will intersect inner sphere
275             }
276             d_to_b_t d2b;
277             d2b.d = d_bdy;
278             d2b.face = face;
279             return d2b;
280         } // dist_to_bdy_impl

`Sphere_1D::LT_to_comoving`

Threads (Time)	IPC per Core	Loads per Cycle	L1 Hits per Cycle	L1 Miss Ratio	L2 Miss Ratio	L3 Miss Ratio	L2 B/W Utilized	L3 B/W Utilized	DRAM B/W Utilized
1 (8.4%)	1.11	0.38	0.38	0.01%	0.77%	80.92%	0.07%	0.01%	0.01%
56 (4.3%)	1.12	0.37	0.38	0.01%	4.09%	25.08%	0.09%	0.03%	0.13%
112 (3.3%)	1.15	0.37	0.36	0.01%	3.98%	23.53%	0.11%	0.03%	0.15%

283         static
284         inline
285         EandOmega<1>
286         LT_to_comoving(vec_t<1> const v_lab,
287             geom_t const & e_lab,
288             vec_t<1> const & omega_lab) noexcept
289         {
290             return spec_1D::LT_to_comoving_sphere1D(v_lab.v[0],e_lab,omega_lab);
291         } // LT_to_comoving

 35         EandOmega<1>
 36         inline
 37         LT_to_comoving_sphere1D(geom_t const v_lab,
 38                                 geom_t const e_lab,
 39                                 vec_t<1> const omega_lab) noexcept
 40         {
 41             geom_t const voc       = v_lab/c;
 42             geom_t const onemovoc  = (1.0 - omega_lab.v[0] * voc);
 43             geom_t const e_com     = e_lab * gamma(v_lab) * onemovoc;
 44             vec_t<1> omega_com;
 45             omega_com.v[0] = (omega_lab.v[0] - voc) / onemovoc;
 46             return EandOmega<1>(e_com,omega_com);
 47         } // LT_to_comoving_sphere1D