inducer · inducer · Jun 2, 2021 · May 21, 2021 · May 21, 2021 · May 21, 2021
diff --git a/examples/simple-dg.py b/examples/simple-dg.py
@@ -40,6 +40,7 @@
         with_container_arithmetic,
         dataclass_array_container,
         )
+from arraycontext.metadata import FirstAxisIsElementsTag
 
 import logging
 logger = logging.getLogger(__name__)
@@ -243,28 +244,22 @@ def inverse_mass(self, vec):
         if not isinstance(vec, DOFArray):
             return map_array_container(self.inverse_mass, vec)
 
-        @memoize_in(self, "elwise_linear_knl")
-        def knl():
-            return make_loopy_program(
-                """{[iel,idof,j]:
-                    0<=iel<nelements and
-                    0<=idof<ndiscr_nodes_out and
-                    0<=j<ndiscr_nodes_in}""",
-                "result[iel,idof] = sum(j, mat[idof, j] * vec[iel, j])",
-                name="diff")
-
+        actx = vec.array_context
+        dtype = vec.entry_dtype
         discr = self.volume_discr
 
-        result = discr.empty_like(vec)
-
-        for grp in discr.groups:
-            matrix = self.get_inverse_mass_matrix(grp, vec.entry_dtype)
-
-            vec.array_context.call_loopy(
-                    knl(),
-                    mat=matrix, result=result[grp.index], vec=vec[grp.index])
-
-        return result/self.vol_jacobian()
+        return DOFArray(
+            actx,
+            data=tuple(
+                actx.einsum(
+                    "ij,ej->ei",
+                    self.get_inverse_mass_matrix(grp, dtype),
+                    vec_i,
+                    arg_names=("mass_inv_mat", "vec"),
+                    tagged=(FirstAxisIsElementsTag(),)
+                ) for grp, vec_i in zip(discr.groups, vec)
+            )
+        ) / self.vol_jacobian()
 
     @memoize_method
     def get_local_face_mass_matrix(self, afgrp, volgrp, dtype):
@@ -296,6 +291,9 @@ def face_mass(self, vec):
         if not isinstance(vec, DOFArray):
             return map_array_container(self.face_mass, vec)
 
+        actx = vec.array_context
+        dtype = vec.entry_dtype
+
         @memoize_in(self, "face_mass_knl")
         def knl():
             return make_loopy_program(
@@ -312,24 +310,27 @@ def knl():
         all_faces_discr = all_faces_conn.to_discr
         vol_discr = all_faces_conn.from_discr
 
-        result = vol_discr.empty_like(vec)
-
         fj = self.face_jacobian("all_faces")
         vec = vec*fj
 
         assert len(all_faces_discr.groups) == len(vol_discr.groups)
 
-        for afgrp, volgrp in zip(all_faces_discr.groups, vol_discr.groups):
-            nfaces = volgrp.mesh_el_group.nfaces
-
-            matrix = self.get_local_face_mass_matrix(afgrp, volgrp, vec.entry_dtype)
-
-            vec.array_context.call_loopy(knl(),
-                    mat=matrix, result=result[volgrp.index],
-                    vec=vec[afgrp.index].reshape(
-                        nfaces, volgrp.nelements, afgrp.nunit_dofs))
-
-        return result
+        return DOFArray(
+            actx,
+            data=tuple(
+                actx.call_loopy(
+                    knl(),
+                    mat=self.get_local_face_mass_matrix(afgrp, volgrp, dtype),
+                    vec=vec_i.reshape(
+                        volgrp.mesh_el_group.nfaces,
+                        volgrp.nelements,
+                        afgrp.nunit_dofs
+                    )
+                )["result"]
+                for afgrp, volgrp, vec_i in zip(all_faces_discr.groups,
+                                                vol_discr.groups, vec)
+            )
+        )
 
 # }}}
 

diff --git a/meshmode/discretization/__init__.py b/meshmode/discretization/__init__.py
@@ -538,7 +538,7 @@ def prg():
                 actx.freeze(
                     actx.call_loopy(
                         prg(),
-                        weights=actx.from_numpy(grp.weights),
+                        weights=actx.from_numpy(grp.quadrature_rule().weights),
                         nelements=grp.nelements,
                         )["result"])
                 for grp in self.groups))

diff --git a/meshmode/discretization/connection/direct.py b/meshmode/discretization/connection/direct.py
@@ -295,16 +295,24 @@ def __call__(self, ary):
 
         actx = ary.array_context
 
-        @memoize_in(actx, (DirectDiscretizationConnection, "resample_by_mat_knl"))
-        def mat_knl():
-            knl = make_loopy_program(
-                """{[iel, idof, j]:
-                    0<=iel<nelements and
-                    0<=idof<n_to_nodes and
-                    0<=j<n_from_nodes}""",
-                "result[to_element_indices[iel], idof] \
-                    = sum(j, resample_mat[idof, j] \
-                    * ary[from_element_indices[iel], j])",
+        @memoize_in(actx,
+                (DirectDiscretizationConnection, "resample_by_mat_batch_knl"))
+        def batch_mat_knl():
+            return make_loopy_program(
+                [
+                    "{[iel_init]: 0 <= iel_init < nelements_result}",
+                    "{[idof_init]: 0 <= idof_init < n_to_nodes}",
+                    "{[iel]: 0 <= iel < nelements}",
+                    "{[idof]: 0 <= idof < n_to_nodes}",
+                    "{[jdof]: 0 <= jdof < n_from_nodes}"
+                ],
+                """
+                    result[iel_init, idof_init] = 0 {id=init}
+                    ... gbarrier {id=barrier, dep=init}
+                    result[to_element_indices[iel], idof] =  \
+                        sum(jdof, resample_mat[idof, jdof]   \
+                            * ary[from_element_indices[iel], jdof]) {dep=barrier}
+                """,
                 [
                     lp.GlobalArg("result", None,
                         shape="nelements_result, n_to_nodes",
@@ -314,21 +322,29 @@ def mat_knl():
                         offset=lp.auto),
                     lp.ValueArg("nelements_result", np.int32),
                     lp.ValueArg("nelements_vec", np.int32),
+                    lp.ValueArg("n_from_nodes", np.int32),
                     "...",
-                    ],
-                name="resample_by_mat")
-
-            return knl
+                ],
+                name="resample_by_mat_batch"
+            )
 
         @memoize_in(actx,
-                (DirectDiscretizationConnection, "resample_by_picking_knl"))
-        def pick_knl():
-            knl = make_loopy_program(
-                """{[iel, idof]:
-                    0<=iel<nelements and
-                    0<=idof<n_to_nodes}""",
-                "result[to_element_indices[iel], idof] \
-                    = ary[from_element_indices[iel], pick_list[idof]]",
+                (DirectDiscretizationConnection, "resample_by_picking_batch_knl"))
+        def batch_pick_knl():
+            return make_loopy_program(
+                [
+                    "{[iel_init]: 0 <= iel_init < nelements_result}",
+                    "{[idof_init]: 0 <= idof_init < n_to_nodes}",
+                    "{[iel]: 0 <= iel < nelements}",
+                    "{[idof]: 0 <= idof < n_to_nodes}"
+                ],
+                """
+                    result[iel_init, idof_init] = 0 {id=init}
+                    ... gbarrier {id=barrier, dep=init}
+                    result[to_element_indices[iel], idof] =              \
+                        ary[from_element_indices[iel], pick_list[idof]]  \
+                            {dep=barrier}
+                """,
                 [
                     lp.GlobalArg("result", None,
                         shape="nelements_result, n_to_nodes",
@@ -340,17 +356,15 @@ def pick_knl():
                     lp.ValueArg("nelements_vec", np.int32),
                     lp.ValueArg("n_from_nodes", np.int32),
                     "...",
-                    ],
-                name="resample_by_picking")
-
-            return knl
-
-        if self.is_surjective:
-            result = self.to_discr.empty(actx, dtype=ary.entry_dtype)
-        else:
-            result = self.to_discr.zeros(actx, dtype=ary.entry_dtype)
+                ],
+                name="resample_by_picking_batch"
+            )
 
+        group_data = []
         for i_tgrp, cgrp in enumerate(self.groups):
+            # Loop over each batch in a group and evaluate the
+            # batch-contribution
+            batched_data = []
             for i_batch, batch in enumerate(cgrp.batches):
                 if not len(batch.from_element_indices):
                     continue
@@ -359,23 +373,47 @@ def pick_knl():
                         actx, i_tgrp, i_batch)
 
                 if point_pick_indices is None:
-                    actx.call_loopy(mat_knl(),
-                            resample_mat=self._resample_matrix(
-                                actx, i_tgrp, i_batch),
-                            result=result[i_tgrp],
-                            ary=ary[batch.from_group_index],
-                            from_element_indices=batch.from_element_indices,
-                            to_element_indices=batch.to_element_indices)
+                    batch_result = actx.call_loopy(
+                        batch_mat_knl(),
+                        resample_mat=self._resample_matrix(
+                            actx, i_tgrp, i_batch
+                        ),
+                        ary=ary[batch.from_group_index],
+                        from_element_indices=batch.from_element_indices,
+                        to_element_indices=batch.to_element_indices,
+                        nelements_result=self.to_discr.groups[i_tgrp].nelements,
+                        n_to_nodes=self.to_discr.groups[i_tgrp].nunit_dofs
+                    )["result"]
 
                 else:
-                    actx.call_loopy(pick_knl(),
-                            pick_list=point_pick_indices,
-                            result=result[i_tgrp],
-                            ary=ary[batch.from_group_index],
-                            from_element_indices=batch.from_element_indices,
-                            to_element_indices=batch.to_element_indices)
-
-        return result
+                    batch_result = actx.call_loopy(
+                        batch_pick_knl(),
+                        pick_list=point_pick_indices,
+                        ary=ary[batch.from_group_index],
+                        from_element_indices=batch.from_element_indices,
+                        to_element_indices=batch.to_element_indices,
+                        nelements_result=self.to_discr.groups[i_tgrp].nelements,
+                        n_to_nodes=self.to_discr.groups[i_tgrp].nunit_dofs
+                    )["result"]
+
+                batched_data.append(batch_result)
+
+            # After computing each batched result, take the sum
+            # to get the entire contribution over the group
+            if batched_data:
+                group_data.append(sum(batched_data))
+            else:
+                # If no batched data at all, return zeros for this
+                # particular group array
+                group_data.append(
+                    actx.zeros(
+                        shape=(self.to_discr.groups[i_tgrp].nelements,
+                               self.to_discr.groups[i_tgrp].nunit_dofs),
+                        dtype=ary.entry_dtype
+                    )
+                )
+
+        return DOFArray(actx, data=tuple(group_data))
 
 # }}}
 
@@ -408,55 +446,65 @@ def make_direct_full_resample_matrix(actx, conn):
     @memoize_in(actx, (make_direct_full_resample_matrix, "oversample_mat_knl"))
     def knl():
         return make_loopy_program(
-            """{[iel, idof, j]:
-                0<=iel<nelements and
-                0<=idof<n_to_nodes and
-                0<=j<n_from_nodes}""",
-            "result[itgt_base + to_element_indices[iel]*n_to_nodes + idof, \
-                    isrc_base + from_element_indices[iel]*n_from_nodes + j] \
-                = resample_mat[idof, j]",
+            [
+                "{[idof_init]: 0 <= idof_init < nnodes_tgt}",
+                "{[jdof_init]: 0 <= jdof_init < nnodes_src}",
+                "{[iel]: 0 <= iel < nelements}",
+                "{[idof]: 0 <= idof < n_to_nodes}",
+                "{[jdof]: 0 <= jdof < n_from_nodes}"
+            ],
+            """
+                result[idof_init, jdof_init] = 0 {id=init}
+                ... gbarrier {id=barrier, dep=init}
+                result[itgt_base + to_element_indices[iel]*n_to_nodes + idof,      \
+                       isrc_base + from_element_indices[iel]*n_from_nodes + jdof]  \
+                           = resample_mat[idof, jdof] {dep=barrier}
+            """,
             [
                 lp.GlobalArg("result", None,
                     shape="nnodes_tgt, nnodes_src",
                     offset=lp.auto),
-                lp.ValueArg("itgt_base,isrc_base", np.int32),
-                lp.ValueArg("nnodes_tgt,nnodes_src", np.int32),
+                lp.ValueArg("itgt_base, isrc_base", np.int32),
+                lp.ValueArg("nnodes_tgt, nnodes_src", np.int32),
                 "...",
-                ],
-            name="oversample_mat")
+            ],
+            name="oversample_mat"
+        )
 
     to_discr_ndofs = sum(grp.nelements*grp.nunit_dofs
             for grp in conn.to_discr.groups)
     from_discr_ndofs = sum(grp.nelements*grp.nunit_dofs
             for grp in conn.from_discr.groups)
 
-    result = actx.zeros(
-            (to_discr_ndofs, from_discr_ndofs),
-            dtype=conn.to_discr.real_dtype)
-
     from_group_sizes = [
             grp.nelements*grp.nunit_dofs
             for grp in conn.from_discr.groups]
     from_group_starts = np.cumsum([0] + from_group_sizes)
 
     tgt_node_nr_base = 0
+    mats = []
     for i_tgrp, (tgrp, cgrp) in enumerate(
             zip(conn.to_discr.groups, conn.groups)):
         for i_batch, batch in enumerate(cgrp.batches):
             if not len(batch.from_element_indices):
                 continue
 
-            actx.call_loopy(knl(),
+            mats.append(
+                actx.call_loopy(
+                    knl(),
                     resample_mat=conn._resample_matrix(actx, i_tgrp, i_batch),
-                    result=result,
                     itgt_base=tgt_node_nr_base,
                     isrc_base=from_group_starts[batch.from_group_index],
                     from_element_indices=batch.from_element_indices,
-                    to_element_indices=batch.to_element_indices)
+                    to_element_indices=batch.to_element_indices,
+                    nnodes_tgt=to_discr_ndofs,
+                    nnodes_src=from_discr_ndofs,
+                )["result"]
+            )
 
         tgt_node_nr_base += tgrp.nelements*tgrp.nunit_dofs
 
-    return result
+    return sum(mats)
 
 # }}}
 

diff --git a/meshmode/discretization/connection/modal.py b/meshmode/discretization/connection/modal.py
@@ -153,7 +153,7 @@ def quad_proj_keval():
         def quadrature_matrix(grp, mgrp):
             vdm = mp.vandermonde(mgrp.basis_obj().functions,
                                  grp.unit_nodes)
-            w_diag = np.diag(grp.weights)
+            w_diag = np.diag(grp.quadrature_rule().weights)
             vtw = np.dot(vdm.T, w_diag)
             return actx.from_numpy(vtw)