Skip to content

Commit 854251d

Browse files
committed
SWE: Fix bit manipulation algorithm in parallel
1 parent f33aec5 commit 854251d

File tree

2 files changed

+52
-58
lines changed

2 files changed

+52
-58
lines changed

examples/fluids/shallow-water/shallowwater.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ int main(int argc, char **argv) {
436436
ierr = DMRestoreLocalVector(dm, &Qloc); CHKERRQ(ierr);
437437

438438
// Set up the MatShell for the associated Jacobian operator
439-
ierr = MatCreateShell(PETSC_COMM_SELF, ncompq*odofs, ncompq*odofs,
439+
ierr = MatCreateShell(comm, ncompq*odofs, ncompq*odofs,
440440
PETSC_DETERMINE, PETSC_DETERMINE, user, &J);
441441
CHKERRQ(ierr);
442442
// Set the MatShell operation needed for the Jacobian

examples/fluids/shallow-water/src/setup.c

Lines changed: 51 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -74,19 +74,18 @@ problemData problemOptions[] = {
7474
PetscErrorCode FindPanelEdgeNodes(DM dm, PhysicsContext phys_ctx,
7575
PetscInt ncomp, Mat *T) {
7676

77-
PetscInt ierr, rankid;
77+
PetscInt ierr;
7878
MPI_Comm comm;
79-
PetscInt cstart, cend, nstart, nend, lnodes, gdofs, depth, edgenodecnt = 0;
80-
PetscSection section;
79+
PetscInt cstart, cend, gdofs, depth, edgenodecnt = 0;
80+
PetscSection section, sectionloc;
8181
EdgeNode edgenodes;
8282

8383
PetscFunctionBeginUser;
8484
// Get Nelem
85-
ierr = DMGetLocalSection(dm, &section); CHKERRQ(ierr);
85+
ierr = DMGetGlobalSection(dm, &section); CHKERRQ(ierr);
86+
ierr = DMGetLocalSection(dm, &sectionloc); CHKERRQ(ierr);
8687
ierr = DMPlexGetHeightStratum(dm, 0, &cstart, &cend); CHKERRQ(ierr);
8788
ierr = DMPlexGetDepth(dm, &depth); CHKERRQ(ierr);
88-
ierr = DMPlexGetHeightStratum(dm, depth, &nstart, &nend); CHKERRQ(ierr);
89-
lnodes = nend - nstart;
9089

9190
PetscSF sf;
9291
Vec bitmapVec;
@@ -96,75 +95,70 @@ PetscErrorCode FindPanelEdgeNodes(DM dm, PhysicsContext phys_ctx,
9695
ierr = VecDestroy(&bitmapVec);
9796

9897
// Arrays for local and global bitmaps
99-
unsigned int bitmaploc[lnodes * ncomp];
100-
unsigned int bitmap[gdofs];
101-
ierr = PetscMemzero(bitmap, sizeof(bitmap)); CHKERRQ(ierr);
102-
103-
// Broadcast bitmap values to all ranks
104-
ierr = PetscSFBcastBegin(sf, MPI_UNSIGNED, bitmap, bitmaploc); CHKERRQ(ierr);
105-
ierr = PetscSFBcastEnd(sf, MPI_UNSIGNED, bitmap, bitmaploc); CHKERRQ(ierr);
98+
unsigned int *bitmaploc;
99+
unsigned int *bitmap;
100+
ierr = PetscCalloc2(gdofs, &bitmaploc, gdofs, &bitmap);
101+
CHKERRQ(ierr);
106102

107103
// Get indices
108104
for (PetscInt c = cstart; c < cend; c++) { // Traverse elements
109105
PetscInt numindices, *indices, n, panel;
110106
// Query element panel
111107
ierr = DMGetLabelValue(dm, "panel", c, &panel); CHKERRQ(ierr);
112-
ierr = DMPlexGetClosureIndices(dm, section, section, c, PETSC_TRUE,
108+
ierr = DMPlexGetClosureIndices(dm, sectionloc, section, c, PETSC_TRUE,
113109
&numindices, &indices, NULL, NULL);
114110
CHKERRQ(ierr);
115111
for (n = 0; n < numindices; n += ncomp) { // Traverse nodes per element
116-
PetscInt bitmapidx = indices[n];
112+
PetscInt bitmapidx = indices[n];
117113
bitmaploc[bitmapidx] |= (1 << panel);
118114
}
119-
ierr = DMPlexRestoreClosureIndices(dm, section, section, c, PETSC_TRUE,
115+
ierr = DMPlexRestoreClosureIndices(dm, sectionloc, section, c, PETSC_TRUE,
120116
&numindices, &indices, NULL, NULL);
121117
CHKERRQ(ierr);
122118
}
123119

124-
// Reduce on all ranks
125-
ierr = PetscSFReduceBegin(sf, MPI_UNSIGNED, bitmaploc, bitmap, MPI_BOR);
126-
CHKERRQ(ierr);
127-
ierr = PetscSFReduceEnd(sf, MPI_UNSIGNED, bitmaploc, bitmap, MPI_BOR);
128-
CHKERRQ(ierr);
129-
130-
// Rank 0 reads the 1's in the resulting bitmap and extracts edge nodes only
120+
// Reduce from all ranks
131121
PetscObjectGetComm((PetscObject)dm, &comm); CHKERRQ(ierr);
132-
MPI_Comm_rank(comm, &rankid);
133-
if (rankid == 0) {
134-
ierr = PetscMalloc1(gdofs + 24*ncomp, &edgenodes); CHKERRQ(ierr);
135-
for (PetscInt i = 0; i < gdofs; i += ncomp) {
136-
PetscInt ones = 0, panels[3];
137-
for (PetscInt p = 0; p < 6; p++) {
138-
if (bitmap[i] & 1)
139-
panels[ones++] = p;
140-
bitmap[i] >>= 1;
141-
}
142-
if (ones == 2) {
143-
edgenodes[edgenodecnt].idx = i;
144-
edgenodes[edgenodecnt].panelA = panels[0];
145-
edgenodes[edgenodecnt].panelB = panels[1];
146-
edgenodecnt++;
147-
}
148-
else if (ones == 3) {
149-
edgenodes[edgenodecnt].idx = i;
150-
edgenodes[edgenodecnt].panelA = panels[0];
151-
edgenodes[edgenodecnt].panelB = panels[1];
152-
edgenodecnt++;
153-
edgenodes[edgenodecnt].idx = i;
154-
edgenodes[edgenodecnt].panelA = panels[0];
155-
edgenodes[edgenodecnt].panelB = panels[2];
156-
edgenodecnt++;
157-
edgenodes[edgenodecnt].idx = i;
158-
edgenodes[edgenodecnt].panelA = panels[1];
159-
edgenodes[edgenodecnt].panelB = panels[2];
160-
edgenodecnt++;
161-
}
122+
MPI_Reduce(bitmaploc, bitmap, gdofs, MPI_UNSIGNED, MPI_BOR, 0, comm);
123+
124+
// Read the resulting bitmap and extract edge nodes only
125+
ierr = PetscMalloc1(gdofs + 24*ncomp, &edgenodes); CHKERRQ(ierr);
126+
for (PetscInt i = 0; i < gdofs; i += ncomp) {
127+
PetscInt ones = 0, panels[3];
128+
for (PetscInt p = 0; p < 6; p++) {
129+
if (bitmap[i] & 1)
130+
panels[ones++] = p;
131+
bitmap[i] >>= 1;
132+
}
133+
if (ones == 2) {
134+
edgenodes[edgenodecnt].idx = i;
135+
edgenodes[edgenodecnt].panelA = panels[0];
136+
edgenodes[edgenodecnt].panelB = panels[1];
137+
edgenodecnt++;
138+
}
139+
else if (ones == 3) {
140+
edgenodes[edgenodecnt].idx = i;
141+
edgenodes[edgenodecnt].panelA = panels[0];
142+
edgenodes[edgenodecnt].panelB = panels[1];
143+
edgenodecnt++;
144+
edgenodes[edgenodecnt].idx = i;
145+
edgenodes[edgenodecnt].panelA = panels[0];
146+
edgenodes[edgenodecnt].panelB = panels[2];
147+
edgenodecnt++;
148+
edgenodes[edgenodecnt].idx = i;
149+
edgenodes[edgenodecnt].panelA = panels[1];
150+
edgenodes[edgenodecnt].panelB = panels[2];
151+
edgenodecnt++;
162152
}
163-
ierr = SetupPanelCoordTransformations(dm, phys_ctx, ncomp, edgenodes,
164-
edgenodecnt, T); CHKERRQ(ierr);
165-
// Free edgenodes structure array
166-
ierr = PetscFree(edgenodes); CHKERRQ(ierr);
167153
}
154+
ierr = SetupPanelCoordTransformations(dm, phys_ctx, ncomp, edgenodes,
155+
edgenodecnt, T); CHKERRQ(ierr);
156+
157+
// Free edgenodes structure array
158+
ierr = PetscFree(edgenodes); CHKERRQ(ierr);
159+
// Free heap
160+
ierr = PetscFree(bitmap); CHKERRQ(ierr);
161+
ierr = PetscFree(bitmaploc); CHKERRQ(ierr);
168162

169163
PetscFunctionReturn(0);
170164
}

0 commit comments

Comments
 (0)