|
20 | 20 | /// double z[NB] = {z0[i],z1[i],z2[i],z3[i],z4[i],z5[i],z6[i],z7[i]};
|
21 | 21 | /// double edge_matrix[EB][EB];
|
22 | 22 | ///
|
23 |
| - // Get integration points and weights |
| 23 | +/// // Get integration points and weights |
24 | 24 | /// double qpts_1d[MAX_QUAD_ORDER];
|
25 | 25 | /// double wgts_1d[MAX_QUAD_ORDER];
|
26 | 26 | ///
|
27 | 27 | /// get_quadrature_rule(quad_type, quad_order, qpts_1d, wgts_1d);
|
28 | 28 | ///
|
29 |
| - // Compute cell centered Jacobian |
| 29 | +/// // Compute cell centered Jacobian |
30 | 30 | /// const double jxx_cc = Jxx(x, y, z, 0.25, 0.25, 0.25, 0.25);
|
31 | 31 | /// const double jxy_cc = Jxy(x, y, z, 0.25, 0.25, 0.25, 0.25);
|
32 | 32 | /// const double jxz_cc = Jxz(x, y, z, 0.25, 0.25, 0.25, 0.25);
|
|
37 | 37 | /// const double jzy_cc = Jzy(x, y, z, 0.25, 0.25, 0.25, 0.25);
|
38 | 38 | /// const double jzz_cc = Jzz(x, y, z, 0.25, 0.25, 0.25, 0.25);
|
39 | 39 | ///
|
40 |
| - // Compute cell centered Jacobian determinant |
| 40 | +/// // Compute cell centered Jacobian determinant |
41 | 41 | /// const double detj_cc = compute_detj(
|
42 | 42 | /// jxx_cc, jxy_cc, jxz_cc,
|
43 | 43 | /// jyx_cc, jyy_cc, jyz_cc,
|
44 | 44 | /// jzx_cc, jzy_cc, jzz_cc);
|
45 | 45 | ///
|
46 |
| - // Initialize the stiffness matrix |
| 46 | +/// // Initialize the stiffness matrix |
47 | 47 | /// for (int m = 0; m < EB; m++) {
|
48 | 48 | /// for (int p = m; p < EB; p++) {
|
49 | 49 | /// matrix[m][p] = 0.0;
|
50 | 50 | /// }
|
51 | 51 | /// }
|
52 | 52 | ///
|
53 |
| - // Compute values at each quadrature point |
| 53 | +/// // Compute values at each quadrature point |
54 | 54 | /// for ( int i = 0; i < quad_order; i++ ) {
|
55 | 55 | ///
|
56 | 56 | /// const double xloc = qpts_1d[i];
|
|
80 | 80 | /// double dbasisy[EB] = {0};
|
81 | 81 | /// curl_edgebasis_y(dbasisy, tmpy, yloc);
|
82 | 82 | ///
|
83 |
| - // Differeniate basis with respect to z at this quadrature point |
| 83 | +/// // Differeniate basis with respect to z at this quadrature point |
84 | 84 | ///
|
85 | 85 | /// for ( int k = 0; k < quad_order; k++ ) {
|
86 | 86 | ///
|
|
158 | 158 | /// dbasisx, dbasisy, dbasisz,
|
159 | 159 | /// tdbasisx, tdbasisy, tdbasisz);
|
160 | 160 | ///
|
161 |
| - // the inner product: alpha*<w_i, w_j> |
| 161 | +/// // the inner product: alpha*<w_i, w_j> |
162 | 162 | /// inner_product(
|
163 | 163 | /// detjwgts*alpha,
|
164 | 164 | /// tebasisx, tebasisy, tebasisz,
|
165 | 165 | /// tebasisx, tebasisy, tebasisz,
|
166 | 166 | /// matrix, true);
|
167 | 167 | ///
|
168 |
| - // the inner product: beta*<Curl(w_i), Curl(w_j)> |
| 168 | +/// // the inner product: beta*<Curl(w_i), Curl(w_j)> |
169 | 169 | /// inner_product(
|
170 | 170 | /// detjwgts*beta,
|
171 | 171 | /// tdbasisx, tdbasisy, tdbasisz,
|
@@ -360,6 +360,195 @@ RAJA_INLINE void edge_MpSmatrix(
|
360 | 360 | }
|
361 | 361 | }
|
362 | 362 |
|
| 363 | +RAJA_HOST_DEVICE |
| 364 | +RAJA_INLINE void symmetric_edge_MpSmatrix( |
| 365 | + const rajaperf::Real_type (&x)[NB], |
| 366 | + const rajaperf::Real_type (&y)[NB], |
| 367 | + const rajaperf::Real_type (&z)[NB], |
| 368 | + rajaperf::Real_type alpha, |
| 369 | + rajaperf::Real_type beta, |
| 370 | + const rajaperf::Real_type detj_tol, |
| 371 | + const rajaperf::Int_type quad_type, |
| 372 | + const rajaperf::Int_type quad_order, |
| 373 | + rajaperf::Real_type (&matrix)[EB][EB]) |
| 374 | +{ |
| 375 | + // Get integration points and weights |
| 376 | + rajaperf::Real_type qpts_1d[MAX_QUAD_ORDER]; |
| 377 | + rajaperf::Real_type wgts_1d[MAX_QUAD_ORDER]; |
| 378 | + |
| 379 | + get_quadrature_rule(quad_type, quad_order, qpts_1d, wgts_1d); |
| 380 | + |
| 381 | + // Compute cell centered Jacobian |
| 382 | + const rajaperf::Real_type jxx_cc = Jxx(x, y, z, 0.25, 0.25, 0.25, 0.25); |
| 383 | + const rajaperf::Real_type jxy_cc = Jxy(x, y, z, 0.25, 0.25, 0.25, 0.25); |
| 384 | + const rajaperf::Real_type jxz_cc = Jxz(x, y, z, 0.25, 0.25, 0.25, 0.25); |
| 385 | + const rajaperf::Real_type jyx_cc = Jyx(x, y, z, 0.25, 0.25, 0.25, 0.25); |
| 386 | + const rajaperf::Real_type jyy_cc = Jyy(x, y, z, 0.25, 0.25, 0.25, 0.25); |
| 387 | + const rajaperf::Real_type jyz_cc = Jyz(x, y, z, 0.25, 0.25, 0.25, 0.25); |
| 388 | + const rajaperf::Real_type jzx_cc = Jzx(x, y, z, 0.25, 0.25, 0.25, 0.25); |
| 389 | + const rajaperf::Real_type jzy_cc = Jzy(x, y, z, 0.25, 0.25, 0.25, 0.25); |
| 390 | + const rajaperf::Real_type jzz_cc = Jzz(x, y, z, 0.25, 0.25, 0.25, 0.25); |
| 391 | + |
| 392 | + // Compute cell centered Jacobian determinant |
| 393 | + const rajaperf::Real_type detj_cc = compute_detj( |
| 394 | + jxx_cc, jxy_cc, jxz_cc, |
| 395 | + jyx_cc, jyy_cc, jyz_cc, |
| 396 | + jzx_cc, jzy_cc, jzz_cc); |
| 397 | + |
| 398 | + // Initialize the stiffness matrix |
| 399 | + for (rajaperf::Int_type m = 0; m < EB; m++) { |
| 400 | + for (rajaperf::Int_type p = m; p < EB; p++) { |
| 401 | + matrix[m][p] = 0.0; |
| 402 | + } |
| 403 | + } |
| 404 | + |
| 405 | + constexpr rajaperf::Int_type symmetric_size = (EB*(EB+1))/2; |
| 406 | + rajaperf::Real_type symmetric_matrix[symmetric_size]; |
| 407 | + |
| 408 | + // Initialize the symmetric stiffness matrix |
| 409 | + for (rajaperf::Int_type m = 0; m < symmetric_size; m++) { |
| 410 | + symmetric_matrix[m] = 0.0; |
| 411 | + } |
| 412 | + |
| 413 | + // Compute values at each quadrature point |
| 414 | + for ( rajaperf::Int_type i = 0; i < quad_order; i++ ) { |
| 415 | + |
| 416 | + const rajaperf::Real_type xloc = qpts_1d[i]; |
| 417 | + const rajaperf::Real_type tmpx = 1. - xloc; |
| 418 | + |
| 419 | + rajaperf::Real_type dbasisx[EB] = {0}; |
| 420 | + curl_edgebasis_x(dbasisx, tmpx, xloc); |
| 421 | + |
| 422 | + for ( rajaperf::Int_type j = 0; j < quad_order; j++ ) { |
| 423 | + |
| 424 | + const rajaperf::Real_type yloc = qpts_1d[j]; |
| 425 | + const rajaperf::Real_type wgtxy = wgts_1d[i]*wgts_1d[j]; |
| 426 | + const rajaperf::Real_type tmpy = 1. - yloc; |
| 427 | + |
| 428 | + rajaperf::Real_type tmpxy = tmpx*tmpy; |
| 429 | + rajaperf::Real_type xyloc = xloc*yloc; |
| 430 | + rajaperf::Real_type tmpxyloc = tmpx*yloc; |
| 431 | + rajaperf::Real_type xloctmpy = xloc*tmpy; |
| 432 | + |
| 433 | + const rajaperf::Real_type jzx = Jzx(x, y, z, tmpxy, xloctmpy, xyloc, tmpxyloc); |
| 434 | + const rajaperf::Real_type jzy = Jzy(x, y, z, tmpxy, xloctmpy, xyloc, tmpxyloc); |
| 435 | + const rajaperf::Real_type jzz = Jzz(x, y, z, tmpxy, xloctmpy, xyloc, tmpxyloc); |
| 436 | + |
| 437 | + rajaperf::Real_type ebasisz[EB] = {0}; |
| 438 | + edgebasis_z(ebasisz, tmpxy, xloctmpy, xyloc, tmpxyloc); |
| 439 | + |
| 440 | + rajaperf::Real_type dbasisy[EB] = {0}; |
| 441 | + curl_edgebasis_y(dbasisy, tmpy, yloc); |
| 442 | + |
| 443 | + // Differeniate basis with respect to z at this quadrature point |
| 444 | + |
| 445 | + for ( rajaperf::Int_type k = 0; k < quad_order; k++ ) { |
| 446 | + |
| 447 | + const rajaperf::Real_type zloc = qpts_1d[k]; |
| 448 | + const rajaperf::Real_type wgts = wgtxy*wgts_1d[k]; |
| 449 | + const rajaperf::Real_type tmpz = 1. - zloc; |
| 450 | + |
| 451 | + const rajaperf::Real_type tmpxz = tmpx*tmpz; |
| 452 | + const rajaperf::Real_type tmpyz = tmpy*tmpz; |
| 453 | + |
| 454 | + const rajaperf::Real_type xzloc = xloc*zloc; |
| 455 | + const rajaperf::Real_type yzloc = yloc*zloc; |
| 456 | + |
| 457 | + const rajaperf::Real_type tmpyzloc = tmpy*zloc; |
| 458 | + const rajaperf::Real_type tmpxzloc = tmpx*zloc; |
| 459 | + |
| 460 | + const rajaperf::Real_type yloctmpz = yloc*tmpz; |
| 461 | + const rajaperf::Real_type xloctmpz = xloc*tmpz; |
| 462 | + |
| 463 | + const rajaperf::Real_type jxx = Jxx(x, y, z, tmpyz, yloctmpz, tmpyzloc, yzloc); |
| 464 | + const rajaperf::Real_type jxy = Jxy(x, y, z, tmpyz, yloctmpz, tmpyzloc, yzloc); |
| 465 | + const rajaperf::Real_type jxz = Jxz(x, y, z, tmpyz, yloctmpz, tmpyzloc, yzloc); |
| 466 | + const rajaperf::Real_type jyx = Jyx(x, y, z, tmpxz, xloctmpz, tmpxzloc, xzloc); |
| 467 | + const rajaperf::Real_type jyy = Jyy(x, y, z, tmpxz, xloctmpz, tmpxzloc, xzloc); |
| 468 | + const rajaperf::Real_type jyz = Jyz(x, y, z, tmpxz, xloctmpz, tmpxzloc, xzloc); |
| 469 | + |
| 470 | + rajaperf::Real_type jinvxx, jinvxy, jinvxz, |
| 471 | + jinvyx, jinvyy, jinvyz, |
| 472 | + jinvzx, jinvzy, jinvzz, |
| 473 | + detj_unfixed, detj, abs_detj, invdetj; |
| 474 | + |
| 475 | + jacobian_inv( |
| 476 | + jxx, jxy, jxz, |
| 477 | + jyx, jyy, jyz, |
| 478 | + jzx, jzy, jzz, |
| 479 | + detj_cc, detj_tol, |
| 480 | + jinvxx, jinvxy, jinvxz, |
| 481 | + jinvyx, jinvyy, jinvyz, |
| 482 | + jinvzx, jinvzy, jinvzz, |
| 483 | + detj_unfixed, detj, abs_detj, invdetj); |
| 484 | + |
| 485 | + const rajaperf::Real_type detjwgts = wgts*abs_detj; |
| 486 | + |
| 487 | + rajaperf::Real_type ebasisx[EB] = {0}; |
| 488 | + edgebasis_x(ebasisx, tmpyz, yloctmpz, tmpyzloc, yzloc); |
| 489 | + |
| 490 | + rajaperf::Real_type ebasisy[EB] = {0}; |
| 491 | + edgebasis_y(ebasisy, tmpxz, xloctmpz, tmpxzloc, xzloc); |
| 492 | + |
| 493 | + rajaperf::Real_type dbasisz[EB] = {0}; |
| 494 | + curl_edgebasis_z(dbasisz, tmpz, zloc); |
| 495 | + |
| 496 | + const rajaperf::Real_type inv_abs_detj = 1./(abs_detj+ptiny); |
| 497 | + |
| 498 | + rajaperf::Real_type tebasisx[EB] = {0}; |
| 499 | + rajaperf::Real_type tebasisy[EB] = {0}; |
| 500 | + rajaperf::Real_type tebasisz[EB] = {0}; |
| 501 | + |
| 502 | + transform_edge_basis( |
| 503 | + jinvxx, jinvxy, jinvxz, |
| 504 | + jinvyx, jinvyy, jinvyz, |
| 505 | + jinvzx, jinvzy, jinvzz, |
| 506 | + ebasisx, ebasisy, ebasisz, |
| 507 | + tebasisx, tebasisy, tebasisz); |
| 508 | + |
| 509 | + rajaperf::Real_type tdbasisx[EB] = {0}; |
| 510 | + rajaperf::Real_type tdbasisy[EB] = {0}; |
| 511 | + rajaperf::Real_type tdbasisz[EB] = {0}; |
| 512 | + |
| 513 | + transform_curl_edge_basis( |
| 514 | + jxx, jxy, jxz, |
| 515 | + jyx, jyy, jyz, |
| 516 | + jzx, jzy, jzz, |
| 517 | + inv_abs_detj, |
| 518 | + dbasisx, dbasisy, dbasisz, |
| 519 | + tdbasisx, tdbasisy, tdbasisz); |
| 520 | + |
| 521 | + // the inner product: alpha*<w_i, w_j> |
| 522 | + inner_product( |
| 523 | + detjwgts*alpha, |
| 524 | + tebasisx, tebasisy, tebasisz, |
| 525 | + tebasisx, tebasisy, tebasisz, |
| 526 | + symmetric_matrix); |
| 527 | + |
| 528 | + // the inner product: beta*<Curl(w_i), Curl(w_j)> |
| 529 | + inner_product( |
| 530 | + detjwgts*beta, |
| 531 | + tdbasisx, tdbasisy, tdbasisz, |
| 532 | + tdbasisx, tdbasisy, tdbasisz, |
| 533 | + symmetric_matrix); |
| 534 | + } |
| 535 | + } |
| 536 | + } |
| 537 | + |
| 538 | + // write back to matrix |
| 539 | + rajaperf::Int_type offset = 0; |
| 540 | + for (rajaperf::Int_type p = 0; p < M; p++) { |
| 541 | + |
| 542 | + matrix[p][p] = symmetric_matrix[offset]; |
| 543 | + for (rajaperf::Int_type m = 1; m < (M-p); m++) { |
| 544 | + auto x = symmetric_matrix[offset + m]; |
| 545 | + matrix[p][m] = x; |
| 546 | + matrix[m][p] = x; |
| 547 | + } |
| 548 | + offset += (M-p); |
| 549 | + } |
| 550 | +} |
| 551 | + |
363 | 552 | #define EDGE3D_DATA_SETUP \
|
364 | 553 | Real_ptr x = m_x; \
|
365 | 554 | Real_ptr y = m_y; \
|
|
0 commit comments