Skip to content

Commit 32723a7

Browse files
committed
use symmetry for storage on innermost loop (but not outermost)
1 parent 262dcbb commit 32723a7

File tree

2 files changed

+230
-8
lines changed

2 files changed

+230
-8
lines changed

src/apps/EDGE3D.hpp

Lines changed: 197 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@
2020
/// double z[NB] = {z0[i],z1[i],z2[i],z3[i],z4[i],z5[i],z6[i],z7[i]};
2121
/// double edge_matrix[EB][EB];
2222
///
23-
// Get integration points and weights
23+
/// // Get integration points and weights
2424
/// double qpts_1d[MAX_QUAD_ORDER];
2525
/// double wgts_1d[MAX_QUAD_ORDER];
2626
///
2727
/// get_quadrature_rule(quad_type, quad_order, qpts_1d, wgts_1d);
2828
///
29-
// Compute cell centered Jacobian
29+
/// // Compute cell centered Jacobian
3030
/// const double jxx_cc = Jxx(x, y, z, 0.25, 0.25, 0.25, 0.25);
3131
/// const double jxy_cc = Jxy(x, y, z, 0.25, 0.25, 0.25, 0.25);
3232
/// const double jxz_cc = Jxz(x, y, z, 0.25, 0.25, 0.25, 0.25);
@@ -37,20 +37,20 @@
3737
/// const double jzy_cc = Jzy(x, y, z, 0.25, 0.25, 0.25, 0.25);
3838
/// const double jzz_cc = Jzz(x, y, z, 0.25, 0.25, 0.25, 0.25);
3939
///
40-
// Compute cell centered Jacobian determinant
40+
/// // Compute cell centered Jacobian determinant
4141
/// const double detj_cc = compute_detj(
4242
/// jxx_cc, jxy_cc, jxz_cc,
4343
/// jyx_cc, jyy_cc, jyz_cc,
4444
/// jzx_cc, jzy_cc, jzz_cc);
4545
///
46-
// Initialize the stiffness matrix
46+
/// // Initialize the stiffness matrix
4747
/// for (int m = 0; m < EB; m++) {
4848
/// for (int p = m; p < EB; p++) {
4949
/// matrix[m][p] = 0.0;
5050
/// }
5151
/// }
5252
///
53-
// Compute values at each quadrature point
53+
/// // Compute values at each quadrature point
5454
/// for ( int i = 0; i < quad_order; i++ ) {
5555
///
5656
/// const double xloc = qpts_1d[i];
@@ -80,7 +80,7 @@
8080
/// double dbasisy[EB] = {0};
8181
/// curl_edgebasis_y(dbasisy, tmpy, yloc);
8282
///
83-
// Differeniate basis with respect to z at this quadrature point
83+
/// // Differeniate basis with respect to z at this quadrature point
8484
///
8585
/// for ( int k = 0; k < quad_order; k++ ) {
8686
///
@@ -158,14 +158,14 @@
158158
/// dbasisx, dbasisy, dbasisz,
159159
/// tdbasisx, tdbasisy, tdbasisz);
160160
///
161-
// the inner product: alpha*<w_i, w_j>
161+
/// // the inner product: alpha*<w_i, w_j>
162162
/// inner_product(
163163
/// detjwgts*alpha,
164164
/// tebasisx, tebasisy, tebasisz,
165165
/// tebasisx, tebasisy, tebasisz,
166166
/// matrix, true);
167167
///
168-
// the inner product: beta*<Curl(w_i), Curl(w_j)>
168+
/// // the inner product: beta*<Curl(w_i), Curl(w_j)>
169169
/// inner_product(
170170
/// detjwgts*beta,
171171
/// tdbasisx, tdbasisy, tdbasisz,
@@ -360,6 +360,195 @@ RAJA_INLINE void edge_MpSmatrix(
360360
}
361361
}
362362

363+
RAJA_HOST_DEVICE
364+
RAJA_INLINE void symmetric_edge_MpSmatrix(
365+
const rajaperf::Real_type (&x)[NB],
366+
const rajaperf::Real_type (&y)[NB],
367+
const rajaperf::Real_type (&z)[NB],
368+
rajaperf::Real_type alpha,
369+
rajaperf::Real_type beta,
370+
const rajaperf::Real_type detj_tol,
371+
const rajaperf::Int_type quad_type,
372+
const rajaperf::Int_type quad_order,
373+
rajaperf::Real_type (&matrix)[EB][EB])
374+
{
375+
// Get integration points and weights
376+
rajaperf::Real_type qpts_1d[MAX_QUAD_ORDER];
377+
rajaperf::Real_type wgts_1d[MAX_QUAD_ORDER];
378+
379+
get_quadrature_rule(quad_type, quad_order, qpts_1d, wgts_1d);
380+
381+
// Compute cell centered Jacobian
382+
const rajaperf::Real_type jxx_cc = Jxx(x, y, z, 0.25, 0.25, 0.25, 0.25);
383+
const rajaperf::Real_type jxy_cc = Jxy(x, y, z, 0.25, 0.25, 0.25, 0.25);
384+
const rajaperf::Real_type jxz_cc = Jxz(x, y, z, 0.25, 0.25, 0.25, 0.25);
385+
const rajaperf::Real_type jyx_cc = Jyx(x, y, z, 0.25, 0.25, 0.25, 0.25);
386+
const rajaperf::Real_type jyy_cc = Jyy(x, y, z, 0.25, 0.25, 0.25, 0.25);
387+
const rajaperf::Real_type jyz_cc = Jyz(x, y, z, 0.25, 0.25, 0.25, 0.25);
388+
const rajaperf::Real_type jzx_cc = Jzx(x, y, z, 0.25, 0.25, 0.25, 0.25);
389+
const rajaperf::Real_type jzy_cc = Jzy(x, y, z, 0.25, 0.25, 0.25, 0.25);
390+
const rajaperf::Real_type jzz_cc = Jzz(x, y, z, 0.25, 0.25, 0.25, 0.25);
391+
392+
// Compute cell centered Jacobian determinant
393+
const rajaperf::Real_type detj_cc = compute_detj(
394+
jxx_cc, jxy_cc, jxz_cc,
395+
jyx_cc, jyy_cc, jyz_cc,
396+
jzx_cc, jzy_cc, jzz_cc);
397+
398+
// Initialize the stiffness matrix
399+
for (rajaperf::Int_type m = 0; m < EB; m++) {
400+
for (rajaperf::Int_type p = m; p < EB; p++) {
401+
matrix[m][p] = 0.0;
402+
}
403+
}
404+
405+
constexpr rajaperf::Int_type symmetric_size = (EB*(EB+1))/2;
406+
rajaperf::Real_type symmetric_matrix[symmetric_size];
407+
408+
// Initialize the symmetric stiffness matrix
409+
for (rajaperf::Int_type m = 0; m < symmetric_size; m++) {
410+
symmetric_matrix[m] = 0.0;
411+
}
412+
413+
// Compute values at each quadrature point
414+
for ( rajaperf::Int_type i = 0; i < quad_order; i++ ) {
415+
416+
const rajaperf::Real_type xloc = qpts_1d[i];
417+
const rajaperf::Real_type tmpx = 1. - xloc;
418+
419+
rajaperf::Real_type dbasisx[EB] = {0};
420+
curl_edgebasis_x(dbasisx, tmpx, xloc);
421+
422+
for ( rajaperf::Int_type j = 0; j < quad_order; j++ ) {
423+
424+
const rajaperf::Real_type yloc = qpts_1d[j];
425+
const rajaperf::Real_type wgtxy = wgts_1d[i]*wgts_1d[j];
426+
const rajaperf::Real_type tmpy = 1. - yloc;
427+
428+
rajaperf::Real_type tmpxy = tmpx*tmpy;
429+
rajaperf::Real_type xyloc = xloc*yloc;
430+
rajaperf::Real_type tmpxyloc = tmpx*yloc;
431+
rajaperf::Real_type xloctmpy = xloc*tmpy;
432+
433+
const rajaperf::Real_type jzx = Jzx(x, y, z, tmpxy, xloctmpy, xyloc, tmpxyloc);
434+
const rajaperf::Real_type jzy = Jzy(x, y, z, tmpxy, xloctmpy, xyloc, tmpxyloc);
435+
const rajaperf::Real_type jzz = Jzz(x, y, z, tmpxy, xloctmpy, xyloc, tmpxyloc);
436+
437+
rajaperf::Real_type ebasisz[EB] = {0};
438+
edgebasis_z(ebasisz, tmpxy, xloctmpy, xyloc, tmpxyloc);
439+
440+
rajaperf::Real_type dbasisy[EB] = {0};
441+
curl_edgebasis_y(dbasisy, tmpy, yloc);
442+
443+
// Differeniate basis with respect to z at this quadrature point
444+
445+
for ( rajaperf::Int_type k = 0; k < quad_order; k++ ) {
446+
447+
const rajaperf::Real_type zloc = qpts_1d[k];
448+
const rajaperf::Real_type wgts = wgtxy*wgts_1d[k];
449+
const rajaperf::Real_type tmpz = 1. - zloc;
450+
451+
const rajaperf::Real_type tmpxz = tmpx*tmpz;
452+
const rajaperf::Real_type tmpyz = tmpy*tmpz;
453+
454+
const rajaperf::Real_type xzloc = xloc*zloc;
455+
const rajaperf::Real_type yzloc = yloc*zloc;
456+
457+
const rajaperf::Real_type tmpyzloc = tmpy*zloc;
458+
const rajaperf::Real_type tmpxzloc = tmpx*zloc;
459+
460+
const rajaperf::Real_type yloctmpz = yloc*tmpz;
461+
const rajaperf::Real_type xloctmpz = xloc*tmpz;
462+
463+
const rajaperf::Real_type jxx = Jxx(x, y, z, tmpyz, yloctmpz, tmpyzloc, yzloc);
464+
const rajaperf::Real_type jxy = Jxy(x, y, z, tmpyz, yloctmpz, tmpyzloc, yzloc);
465+
const rajaperf::Real_type jxz = Jxz(x, y, z, tmpyz, yloctmpz, tmpyzloc, yzloc);
466+
const rajaperf::Real_type jyx = Jyx(x, y, z, tmpxz, xloctmpz, tmpxzloc, xzloc);
467+
const rajaperf::Real_type jyy = Jyy(x, y, z, tmpxz, xloctmpz, tmpxzloc, xzloc);
468+
const rajaperf::Real_type jyz = Jyz(x, y, z, tmpxz, xloctmpz, tmpxzloc, xzloc);
469+
470+
rajaperf::Real_type jinvxx, jinvxy, jinvxz,
471+
jinvyx, jinvyy, jinvyz,
472+
jinvzx, jinvzy, jinvzz,
473+
detj_unfixed, detj, abs_detj, invdetj;
474+
475+
jacobian_inv(
476+
jxx, jxy, jxz,
477+
jyx, jyy, jyz,
478+
jzx, jzy, jzz,
479+
detj_cc, detj_tol,
480+
jinvxx, jinvxy, jinvxz,
481+
jinvyx, jinvyy, jinvyz,
482+
jinvzx, jinvzy, jinvzz,
483+
detj_unfixed, detj, abs_detj, invdetj);
484+
485+
const rajaperf::Real_type detjwgts = wgts*abs_detj;
486+
487+
rajaperf::Real_type ebasisx[EB] = {0};
488+
edgebasis_x(ebasisx, tmpyz, yloctmpz, tmpyzloc, yzloc);
489+
490+
rajaperf::Real_type ebasisy[EB] = {0};
491+
edgebasis_y(ebasisy, tmpxz, xloctmpz, tmpxzloc, xzloc);
492+
493+
rajaperf::Real_type dbasisz[EB] = {0};
494+
curl_edgebasis_z(dbasisz, tmpz, zloc);
495+
496+
const rajaperf::Real_type inv_abs_detj = 1./(abs_detj+ptiny);
497+
498+
rajaperf::Real_type tebasisx[EB] = {0};
499+
rajaperf::Real_type tebasisy[EB] = {0};
500+
rajaperf::Real_type tebasisz[EB] = {0};
501+
502+
transform_edge_basis(
503+
jinvxx, jinvxy, jinvxz,
504+
jinvyx, jinvyy, jinvyz,
505+
jinvzx, jinvzy, jinvzz,
506+
ebasisx, ebasisy, ebasisz,
507+
tebasisx, tebasisy, tebasisz);
508+
509+
rajaperf::Real_type tdbasisx[EB] = {0};
510+
rajaperf::Real_type tdbasisy[EB] = {0};
511+
rajaperf::Real_type tdbasisz[EB] = {0};
512+
513+
transform_curl_edge_basis(
514+
jxx, jxy, jxz,
515+
jyx, jyy, jyz,
516+
jzx, jzy, jzz,
517+
inv_abs_detj,
518+
dbasisx, dbasisy, dbasisz,
519+
tdbasisx, tdbasisy, tdbasisz);
520+
521+
// the inner product: alpha*<w_i, w_j>
522+
inner_product(
523+
detjwgts*alpha,
524+
tebasisx, tebasisy, tebasisz,
525+
tebasisx, tebasisy, tebasisz,
526+
symmetric_matrix);
527+
528+
// the inner product: beta*<Curl(w_i), Curl(w_j)>
529+
inner_product(
530+
detjwgts*beta,
531+
tdbasisx, tdbasisy, tdbasisz,
532+
tdbasisx, tdbasisy, tdbasisz,
533+
symmetric_matrix);
534+
}
535+
}
536+
}
537+
538+
// write back to matrix
539+
rajaperf::Int_type offset = 0;
540+
for (rajaperf::Int_type p = 0; p < M; p++) {
541+
542+
matrix[p][p] = symmetric_matrix[offset];
543+
for (rajaperf::Int_type m = 1; m < (M-p); m++) {
544+
auto x = symmetric_matrix[offset + m];
545+
matrix[p][m] = x;
546+
matrix[m][p] = x;
547+
}
548+
offset += (M-p);
549+
}
550+
}
551+
363552
#define EDGE3D_DATA_SETUP \
364553
Real_ptr x = m_x; \
365554
Real_ptr y = m_y; \

src/apps/mixed_fem_helper.hpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,39 @@ constexpr void inner_product(
309309
}
310310
}
311311

312+
template<rajaperf::Int_type M>
313+
RAJA_HOST_DEVICE
314+
constexpr void inner_product(
315+
const rajaperf::Real_type weight,
316+
const rajaperf::Real_type (&basis_1_x)[M],
317+
const rajaperf::Real_type (&basis_1_y)[M],
318+
const rajaperf::Real_type (&basis_1_z)[M],
319+
const rajaperf::Real_type (&basis_2_x)[M],
320+
const rajaperf::Real_type (&basis_2_y)[M],
321+
const rajaperf::Real_type (&basis_2_z)[M],
322+
rajaperf::Real_type (&matrix)[(M*(M+1))/2])
323+
{
324+
// inner product is <basis_2, basis_1>
325+
rajaperf::Int_type offset = 0;
326+
327+
for (rajaperf::Int_type p = 0; p < M; p++) {
328+
329+
const rajaperf::Real_type txi = basis_2_x[p];
330+
const rajaperf::Real_type tyi = basis_2_y[p];
331+
const rajaperf::Real_type tzi = basis_2_z[p];
332+
333+
for (rajaperf::Int_type m = 0; m < (M-p); m++) {
334+
335+
const rajaperf::Real_type txj = basis_1_x[p+m];
336+
const rajaperf::Real_type tyj = basis_1_y[p+m];
337+
const rajaperf::Real_type tzj = basis_1_z[p+m];
338+
matrix[offset + m] += weight*(txi*txj + tyi*tyj + tzi*tzj);
339+
}
340+
341+
offset += (M-p);
342+
}
343+
}
344+
312345
constexpr rajaperf::Int_type flops_bad_zone_algorithm()
313346
{
314347
return 5;

0 commit comments

Comments
 (0)