From README.md
NuT simulates neutrino transport. Currently, it runs in 1D, spherical and 3D Cartesian geometries (the mesh geometry and topology is carefully encapsulated, so adding another mesh requires a few hundred lines of interface implementation). Particles events are tallied in the lab frame, while scattering events (cross sections) are calculated in the co-moving frame.
Build and Dependency Information
Nut builds a library using cmake. It depends on the Random123 library.
NuT includes a small application to demonstrate the library, called bh-3. “bh” is for black hole—the application models the neutrino transport given a snapshot of the material state of a star collapsing to a black hole.
Analysis
Build and Run Information
Compiler = icpc (ICC) 18.0.1 20171018
Run Parameters = -n 114688 -c 2048 -I ../../test/data/p.2
Scaling
Hit Locations
Intel Software Development Emulator
Intel SDE |
NuT |
Arithmetric Intensity |
0.00924 |
FLOPS per Inst |
0.0363 |
FLOPS per FP Inst |
1.0 |
Bytes per Load Inst |
6.76 |
Bytes per Store Inst |
6.73 |
UOPS Executed
Experiment Aggregate Metrics
Threads (Time) |
IPC per Core |
Loads per Cycle |
L1 Hits per Cycle |
L1 Miss Ratio |
L2 Miss Ratio |
L3 Miss Ratio |
L2 B/W Utilized |
L3 B/W Utilized |
DRAM B/W Utilized |
1 (100.0%) |
1.68 |
0.52 |
0.51 |
0.06% |
0.43% |
11.17% |
0.31% |
0.01% |
0.01% |
56 (100.0%) |
1.66 |
0.50 |
0.50 |
0.06% |
2.00% |
29.86% |
0.16% |
0.02% |
0.11% |
112 (100.0%) |
1.67 |
0.49 |
0.49 |
0.06% |
2.31% |
31.13% |
0.14% |
0.02% |
0.10% |
decide_event
Threads (Time) |
IPC per Core |
Loads per Cycle |
L1 Hits per Cycle |
L1 Miss Ratio |
L2 Miss Ratio |
L3 Miss Ratio |
L2 B/W Utilized |
L3 B/W Utilized |
DRAM B/W Utilized |
1 (57.3%) |
1.65 |
0.52 |
0.52 |
0.09% |
0.26% |
12.33% |
0.39% |
0.01% |
0.01% |
56 (29.5%) |
1.67 |
0.51 |
0.51 |
0.09% |
1.35% |
26.93% |
0.38% |
0.04% |
0.15% |
112 (22.3%) |
1.68 |
0.51 |
0.51 |
0.09% |
1.55% |
28.60% |
0.39% |
0.05% |
0.17% |
29 template <typename particle_t, typename mesh_t, typename opacity_t,
30 typename velocity_t>
31 event_n_dist
32 decide_event( particle_t & p, // non-const b/c of RNG
33 mesh_t const & mesh,
34 opacity_t const & opacity,
35 velocity_t const & velocity)
36 {
37 static const size_t dim(particle_t::dim);
38
39 typedef typename particle_t::fp_t fp_t;
40 // Currently there are always three top-level events considered.
41 // Changing vector to std::array
42 typedef std::array<event_n_dist,3> vend_t;
43 typedef typename mesh_t::d_to_b_t d_to_b_t;
44
45 vend_t e_n_ds;
46
47 cell_t const cell = p.cell;
48 fp_t const tleft = p.t;
49 vec_t<dim> const x = p.x;
50 Species const species = p.species;
51
52 // compute distance to events, push onto vector
53 // compute cross-section in comoving frame
54
55 vec_t<dim> v = velocity.v(cell);
56 // TO DO vector velocity
57 vec_t<dim> vtmp(v);
58 geom_t const eli = p.e;
59 vec_t<dim> const oli = p.omega;
60 // LT to comoving frame (compute interaction comoving).
61 EandOmega<dim> eno_cmi = mesh_t::LT_to_comoving(vtmp,eli,oli);
62 geom_t const eci = eno_cmi.first;
63
64 fp_t const sig_coll = opacity.sigma_collide(cell,eci,species);
65
66 fp_t const random_dev = p.rng.random();
67 /* fp_t const ignored =*/ p.rng.random(); // to keep pace with McPhD
68
69 geom_t const d_coll = (sig_coll != fp_t(0)) ?
70 -std::log(random_dev)/sig_coll : huge;
71
72 e_n_ds[0] = event_n_dist(Event::collision,d_coll);
73
74 d_to_b_t dnf = mesh.distance_to_bdy(x,oli,cell);
75 geom_t const d_bdy = dnf.d;
76 e_n_ds[1] = event_n_dist(Event::boundary,d_bdy);
77
78 geom_t const d_step_end = c * tleft;
79 e_n_ds[2] = event_n_dist(Event::step_end,d_step_end);
80
81 // pick (event,dist) pair with shortest distance
82 event_n_dist closest = *(std::min_element(e_n_ds.begin(),e_n_ds.end(),
83 pair_min_2nd<event_n_dist>));
84
85 // if needed, further resolve events
86 if(closest.first == Event::collision)
87 {
88 // need to compute the comoving energy at the scattering site,
89 // in order to compute the different cross sections there.
90
91 typename mesh_t::coord_t const scat_site(
92 mesh.new_coordinate(x,oli,d_coll));
93 vec_t<dim> const o_sct = scat_site.omega;
94 EandOmega<dim> const eno_scat = mesh_t::LT_to_comoving(vtmp,eli,o_sct);
95 fp_t const ecscat = eno_scat.first;
96 closest.first =
97 decide_scatter_event(p.rng,ecscat,cell,opacity,species);
98 }
99 else
100 {
101 if(closest.first == Event::boundary)
102 {
103 closest.first = decide_boundary_event(mesh,cell,dnf.face);
104 }
105 // compensate RNG if decide_scatter_event not called
106 p.rng.random();
107 }
108 return closest;
109 } // decide_event
apply_event
Threads (Time) |
IPC per Core |
Loads per Cycle |
L1 Hits per Cycle |
L1 Miss Ratio |
L2 Miss Ratio |
L3 Miss Ratio |
L2 B/W Utilized |
L3 B/W Utilized |
DRAM B/W Utilized |
1 (42.1%) |
1.70 |
0.51 |
0.51 |
0.02% |
0.46% |
12.31% |
0.20% |
0.01% |
0.00% |
56 (21.7%) |
1.71 |
0.50 |
0.50 |
0.02% |
2.60% |
29.96% |
0.20% |
0.03% |
0.15% |
112 (16.4%) |
1.73 |
0.50 |
0.50 |
0.02% |
2.40% |
34.00% |
0.22% |
0.04% |
0.16% |
46 template <typename ParticleT, typename MeshT, typename OpacityT,
47 typename VelocityT, typename fp_t>
48 void
49 apply_event(ParticleT & p,
50 Event const & event,
51 geom_t const distance,
52 MeshT const & mesh,
53 OpacityT const & opacity,
54 VelocityT const & velocity,
55 Tally<fp_t> & tally,
56 fp_t const alpha = fp_t(2.0)
57 )
58 {
59 cell_t const index = make_idx(p.cell,opacity.m_n_cells);
60
61 stream_particle(p,distance,mesh);
62
63 tally.accum_pl(distance);
64
65 switch(event)
66 {
67 case Event::nucleon_abs:
68 apply_nucleon_abs(p,tally);
69 break;
70 case Event::nucleon_elastic_scatter:
71 apply_nucleon_elastic_scatter<ParticleT,Tally<fp_t>,VelocityT,MeshT>( p,
tally,velocity);
72 break;
73 case Event::electron_scatter:
74 {
75 fp_t const ebar = opacity.m_T.T_e_minus[index];
76 fp_t const e_e = ebar;
77 // fp_t const e_e = gen_power_law_energy_alpha2(ebar,p.rng);
78 apply_lepton_scatter<ParticleT,Tally<fp_t>,VelocityT,MeshT>(p,
tally,e_e,velocity);
79 }
80 break;
81 case Event::positron_scatter:
82 {
83 fp_t const ebar = opacity.m_T.T_e_plus[index];
84 fp_t const e_e = ebar;
85 // fp_t const e_e = gen_power_law_energy_alpha2(ebar,p.rng);
86 apply_lepton_scatter<ParticleT,Tally<fp_t>,VelocityT,MeshT>(p,
tally,e_e,velocity);
87 }
88 break;
89 // case Event::nu_e_annhilation:
90 // break;
91 // case Event::nu_x_annhilation:
92 // break;
93 case Event::cell_low_x_boundary:
94 apply_low_x_boundary(p,tally);
95 break;
96 case Event::cell_high_x_boundary:
97 apply_hi_x_boundary(p,tally);
98 break;
99 case Event::escape:
100 apply_escape(p,tally);
101 break;
102 case Event::reflect:
103 apply_reflect(p,tally);
104 break;
105 case Event::step_end:
106 apply_step_end(p,tally);
107 break;
108 // error if we get to an unresolved collision or boundary
109 case Event::undetermined:
110 p.kill(Fates::UNDETERMINED_EVENT);
111 break;
112 case Event::boundary: // fall through to next
113 case Event::collision: //
114 // underresolved_event(event);
115 // break;
116 default:
117 p.kill(Fates::UNHANDLED_EVENT);
118 } // switch
119 if(event != Event::nucleon_elastic_scatter
120 && event != Event::electron_scatter
121 && event != Event::positron_scatter)
122 {
123 p.rng.random(); // compensate to maintain constant RNs/MC step
124 }
125 return;
126 } // apply_event
random
Threads (Time) |
IPC per Core |
Loads per Cycle |
L1 Hits per Cycle |
L1 Miss Ratio |
L2 Miss Ratio |
L3 Miss Ratio |
L2 B/W Utilized |
L3 B/W Utilized |
DRAM B/W Utilized |
1 (26.7%) |
2.68 |
0.71 |
0.71 |
0.02% |
0.57% |
16.06% |
0.17% |
0.01% |
0.00% |
56 (22.8%) |
2.67 |
0.70 |
0.70 |
0.02% |
2.42% |
30.93% |
0.11% |
0.02% |
0.09% |
112 (13.9%) |
2.65 |
0.69 |
0.69 |
0.02% |
2.71% |
32.31% |
0.15% |
0.03% |
0.12% |
39 double random() {
40 double ret = m_bank[m_idx];
41 if(m_idx == 0) m_idx++;
42 else
43 {
44 m_idx = 0;
45 this->advance();
46 }
47 return ret;
48 }
stream_particle
Threads (Time) |
IPC per Core |
Loads per Cycle |
L1 Hits per Cycle |
L1 Miss Ratio |
L2 Miss Ratio |
L3 Miss Ratio |
L2 B/W Utilized |
L3 B/W Utilized |
DRAM B/W Utilized |
1 (26.4%) |
1.22 |
0.40 |
0.40 |
0.02% |
0.52% |
9.59% |
0.15% |
0.01% |
0.00% |
56 (13.6%) |
1.23 |
0.40 |
0.40 |
0.02% |
2.75% |
27.17% |
0.16% |
0.03% |
0.14% |
112 (10.3%) |
1.25 |
0.40 |
0.40 |
0.02% |
2.61% |
30.30% |
0.17% |
0.03% |
0.14% |
35 template <typename Mesh_T, typename Particle_T, typename fp_t>
36 void stream_particle(Particle_T & p, fp_t const d, Mesh_T const & mesh)
37 {
38 typename Mesh_T::coord_t newcoord =
39 mesh.new_coordinate(p.x,p.omega,d);
40 p.x = newcoord.x;
41 p.omega = newcoord.omega;
42 p.t = p.t - d / c;
43 return;
44 } // stream_particle
Opacity::sigma_collide
Threads (Time) |
IPC per Core |
Loads per Cycle |
L1 Hits per Cycle |
L1 Miss Ratio |
L2 Miss Ratio |
L3 Miss Ratio |
L2 B/W Utilized |
L3 B/W Utilized |
DRAM B/W Utilized |
1 (10.1%) |
1.81 |
0.59 |
0.59 |
0.26% |
0.12% |
6.44% |
0.88% |
0.01% |
0.01% |
56 (5.3%) |
1.81 |
0.58 |
0.58 |
0.26% |
0.91% |
25.69% |
0.81% |
0.06% |
0.19% |
112 (4.0%) |
1.85 |
0.58 |
0.58 |
0.26% |
1.19% |
31.74% |
0.83% |
0.08% |
0.22% |
46 /* total collision opacity */
47 fp_t sigma_collide( cell_t const & cell,
48 fp_t const e_nu,
49 Species const nu_species) const {
50 cellOK(cell,m_n_cells);
51 nrgOK(e_nu);
52 fp_t s_coll =
53 sigma_N_total(cell, e_nu) +
54 sigma_lepton(cell,e_nu, nu_species);
55 return s_coll;
56 } // sigma_collide
Sphere_1D::distance_to_bdy
Threads (Time) |
IPC per Core |
Loads per Cycle |
L1 Hits per Cycle |
L1 Miss Ratio |
L2 Miss Ratio |
L3 Miss Ratio |
L2 B/W Utilized |
L3 B/W Utilized |
DRAM B/W Utilized |
1 (9.5%) |
1.29 |
0.46 |
0.46 |
0.12% |
0.10% |
10.20% |
0.83% |
0.01% |
0.00% |
56 (4.8%) |
1.34 |
0.47 |
0.46 |
0.12% |
0.78% |
26.17% |
0.81% |
0.05% |
0.15% |
112 (3.7%) |
1.34 |
0.46 |
0.45 |
0.12% |
0.81% |
35.02% |
0.80% |
0.05% |
0.15% |
175 d_to_b_t
176 distance_to_bdy(vec_t<dim> const x, vec_t<dim> const omega,
177 cell_t const cell) const noexcept
178 {
179 // cellOK(cell);
180 extents_t extents = this->cell_extents(cell);
181 return dist_to_bdy_impl(x.v[0],omega.v[0],extents.first,extents.second);
182 } // distance_to_bdy
{...}
185 static
186 d_to_b_t
187 dist_to_bdy_impl(geom_t const x, geom_t const omega,
188 geom_t const rlo, geom_t const rhi) noexcept
189 {
190 geom_t const rhisq = rhi * rhi;
191 geom_t const rlosq = rlo * rlo;
192 geom_t const xsq = x * x;
193
194 // need to (1-2) check for intersection with the two spheres that
195 // bound the current cell, and (3) choose the first intersection
196 // along the direction of travel.
197
198 // 1. Compute intersections with outer sphere
199
200 // get Tan(theta) from inverting omega=Cos(theta). We work with a
201 // reduced polar angle, "abs_theta", in the range 0 <= theta <= pi/2,
202 // then select the correct intercept between the line that the
203 // particle travels and the spherical shell using the full value of
204 // theta.
205 geom_t const theta = std::acos(omega);
206 geom_t const abs_theta = (theta > pi/2) ? pi - theta : theta;
207 geom_t const t = std::tan(abs_theta);
208 geom_t const tsq = t*t;
209 geom_t const tcub = tsq * t;
210 geom_t const one_plus_tsq = 1 + tsq;
211 geom_t const one_on_one_plus_tsq = 1.0/one_plus_tsq;
212 // the determinant is r^2 + r^2 * t^2 - x^2 * t^2. Include the
213 // square root and divide by 1+Tan(theta)^2 for convenience here.
214 geom_t const dethi =
215 std::sqrt( rhisq * one_plus_tsq - xsq * tsq) * one_on_one_plus_tsq;
216 // These parts don't depend on the radius of the sphere.
217 geom_t const xterm1 = x*tsq*one_on_one_plus_tsq;
218 geom_t const yterm1 = -x * t + x * tcub * one_on_one_plus_tsq;
219 // These are the two intersection with the outer sphere
220 geom_t const xhip = xterm1 + dethi;
221 geom_t const yhip = yterm1 + t * dethi;
222 geom_t const xhim = xterm1 - dethi;
223 geom_t const yhim = yterm1 - t * dethi;
224 // if the polar angle is less than pi/2, we want the solution
225 // that adds the determinant.
226 geom_t const xhi = (theta < pi/2) ? xhip : xhim;
227 geom_t const yhi = (theta < pi/2) ? yhip : yhim;
228 geom_t const d_hi = std::sqrt( (xhi-x)*(xhi-x) + (yhi*yhi));
229
230 // 2. Look for intersection with inner sphere
231 // There is if the absolute value of the polar angle is
232 // less than atan(1/(b^2 -1)), where b = x/r_lo. In this case, we can
233 // just use the "plus" solution, since that's always the closest.
234 // Note that you may want to know a real solution exists before
235 // computing it--complicates branch removal.
236 geom_t d_lo = huge;
237 if(rlo > 0.0)
238 {
239 geom_t const b = x/rlo;
240 // if the particle is on the inner sphere, and headed inward...
241 if(soft_equiv(b,1.0,1e-11))
242 {
243 if(theta > pi/2)
244 {
245 d_lo = 0.0;
246 }
247 }
248 else
249 {
250 geom_t const tan_theta_lim = std::sqrt(1/(b*b - 1));
251 geom_t const theta_lim = std::atan(tan_theta_lim);
252 if(abs_theta <= theta_lim and theta > pi/2)
253 {
254 geom_t const detlo =
255 std::sqrt( rlosq * one_plus_tsq - xsq * tsq)
256 * one_on_one_plus_tsq;
257 geom_t const xlo = xterm1 + detlo;
258 geom_t const ylo = yterm1 + t * detlo;
259 d_lo = std::sqrt( (xlo-x)*(xlo-x) + ylo*ylo);
260 }
261 }
262 }
263 // now select the shorter distance and the corresponding face
264 geom_t d_bdy = huge;
265 cell_t face = -1;
266 if( d_hi < d_lo)
267 {
268 d_bdy = d_hi;
269 face = 1; // will intersect outer sphere
270 }
271 else
272 {
273 d_bdy = d_lo;
274 face = 0; // will intersect inner sphere
275 }
276 d_to_b_t d2b;
277 d2b.d = d_bdy;
278 d2b.face = face;
279 return d2b;
280 } // dist_to_bdy_impl
Sphere_1D::LT_to_comoving
Threads (Time) |
IPC per Core |
Loads per Cycle |
L1 Hits per Cycle |
L1 Miss Ratio |
L2 Miss Ratio |
L3 Miss Ratio |
L2 B/W Utilized |
L3 B/W Utilized |
DRAM B/W Utilized |
1 (8.4%) |
1.11 |
0.38 |
0.38 |
0.01% |
0.77% |
80.92% |
0.07% |
0.01% |
0.01% |
56 (4.3%) |
1.12 |
0.37 |
0.38 |
0.01% |
4.09% |
25.08% |
0.09% |
0.03% |
0.13% |
112 (3.3%) |
1.15 |
0.37 |
0.36 |
0.01% |
3.98% |
23.53% |
0.11% |
0.03% |
0.15% |
283 static
284 inline
285 EandOmega<1>
286 LT_to_comoving(vec_t<1> const v_lab,
287 geom_t const & e_lab,
288 vec_t<1> const & omega_lab) noexcept
289 {
290 return spec_1D::LT_to_comoving_sphere1D(v_lab.v[0],e_lab,omega_lab);
291 } // LT_to_comoving
35 EandOmega<1>
36 inline
37 LT_to_comoving_sphere1D(geom_t const v_lab,
38 geom_t const e_lab,
39 vec_t<1> const omega_lab) noexcept
40 {
41 geom_t const voc = v_lab/c;
42 geom_t const onemovoc = (1.0 - omega_lab.v[0] * voc);
43 geom_t const e_com = e_lab * gamma(v_lab) * onemovoc;
44 vec_t<1> omega_com;
45 omega_com.v[0] = (omega_lab.v[0] - voc) / onemovoc;
46 return EandOmega<1>(e_com,omega_com);
47 } // LT_to_comoving_sphere1D