-
Notifications
You must be signed in to change notification settings - Fork 1
/
assemble_rhs.cpp
110 lines (94 loc) · 3.5 KB
/
assemble_rhs.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#include "assemble_rhs.h"
#include <iostream>
// Need to include C file in same translation unit as lambda
#include "poisson.c"
#define tabulate_L \
tabulate_tensor_integral_cell_otherwise_5c190cf5c2fdd7607007457553972e50fddca3b5
// tabulate_tensor_integral_cell_otherwise_d5110eeede6a6d5fef6fa5fa9759f7bdb8a989e8
class AssemblyKernel;
void assemble_rhs(cl::sycl::queue& queue,
cl::sycl::buffer<double, 1>& accum_buf,
cl::sycl::buffer<double, 2>& geom_buf,
cl::sycl::buffer<int, 2>& coord_dm_buf,
cl::sycl::buffer<double, 2>& coeff_buf,
cl::sycl::buffer<int, 2>& fi_buf)
{
queue.submit([&](cl::sycl::handler& cgh) {
auto access_geom = geom_buf.get_access<cl::sycl::access::mode::read>(cgh);
auto access_cdm
= coord_dm_buf.get_access<cl::sycl::access::mode::read>(cgh);
auto access_fi = fi_buf.get_access<cl::sycl::access::mode::read>(cgh);
auto access_coeff = coeff_buf.get_access<cl::sycl::access::mode::read>(cgh);
auto access_ac = accum_buf.get_access<cl::sycl::access::mode::write>(cgh);
cl::sycl::range<2> coord_dims = coord_dm_buf.get_range();
cl::sycl::range<2> fi_dims = fi_buf.get_range();
cl::sycl::range<1> nelem_sycl{fi_dims[0]};
int nelem_dofs = fi_dims[1];
int ncoeff = coeff_buf.get_range()[1];
int gdim = 3;
auto kern = [=](cl::sycl::id<1> wiID) {
const int i = wiID[0];
double cell_geom[12];
double w[32] = {0};
double b[32] = {0};
double c[32] = {0};
for (int j = 0; j < ncoeff; ++j)
w[j] = access_coeff[i][j];
// Pull out points for this cell
for (std::size_t j = 0; j < coord_dims[1]; ++j)
{
const std::size_t dmi = access_cdm[i][j];
for (int k = 0; k < gdim; ++k)
cell_geom[j * gdim + k] = access_geom[dmi][k];
}
// Get local values
tabulate_L(b, w, c, cell_geom, nullptr, nullptr, 0);
// Insert result into array range corresponding to each dof
for (int j = 0; j < nelem_dofs; ++j)
{
const std::size_t idx = access_fi[i][j];
access_ac[idx] = b[j];
}
};
cgh.parallel_for<AssemblyKernel>(nelem_sycl, kern);
});
try
{
queue.wait_and_throw();
}
catch (sycl::exception const& e)
{
std::cout << "Caught synchronous SYCL exception:\n"
<< e.what() << std::endl;
}
}
class AccumulationKernel;
void accumulate_rhs(cl::sycl::queue& queue, cl::sycl::buffer<double, 1>& ac_buf,
cl::sycl::buffer<double, 1>& global_vec_buf,
cl::sycl::buffer<int, 1>& offset_buf)
{
// Second kernel to accumulate RHS for each dof
cl::sycl::range<1> ndofs_sycl = offset_buf.get_range() - 1;
queue.submit([&](cl::sycl::handler& cgh) {
auto access_gv
= global_vec_buf.get_access<cl::sycl::access::mode::write>(cgh);
auto access_ac = ac_buf.get_access<cl::sycl::access::mode::read>(cgh);
auto access_off = offset_buf.get_access<cl::sycl::access::mode::read>(cgh);
auto kern = [=](cl::sycl::id<1> wiID) {
const int i = wiID[0];
access_gv[i] = 0.0;
for (int j = access_off[i]; j < access_off[i + 1]; ++j)
access_gv[i] += access_ac[j];
};
cgh.parallel_for<AccumulationKernel>(ndofs_sycl, kern);
});
try
{
queue.wait_and_throw();
}
catch (sycl::exception const& e)
{
std::cout << "Caught synchronous SYCL exception:\n"
<< e.what() << std::endl;
}
}