Skip to content

Commit 298d92a

Browse files
bfacciniQiao Kang
authored andcommitted
handle errors gracefuly to prevent SEGV
oob_allgather_test() do not check isend() call success, leading to the possibility to use oob_req->reqs[] un-initialized upon error and thus to SEGV. Signed-off-by: Bruno Faccini <bfaccini@nvidia.com> (cherry picked from commit 109f4bc)
1 parent 0e6449f commit 298d92a

File tree

1 file changed

+13
-3
lines changed

1 file changed

+13
-3
lines changed

ompi/mca/coll/ucc/coll_ucc_module.c

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
* Copyright (c) 2021 Mellanox Technologies. All rights reserved.
33
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
44
* All Rights reserved.
5+
* Copyright (c) 2022-2025 NVIDIA Corporation. All rights reserved.
6+
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
57
* $COPYRIGHT$
68
*
79
* Additional copyrights may follow
@@ -165,7 +167,7 @@ static ucc_status_t oob_allgather_test(void *req)
165167
size_t msglen = oob_req->msglen;
166168
int probe_count = 5;
167169
int rank, size, sendto, recvfrom, recvdatafrom,
168-
senddatafrom, completed, probe;
170+
senddatafrom, completed, probe, rc;
169171

170172
size = ompi_comm_size(comm);
171173
rank = ompi_comm_rank(comm);
@@ -190,10 +192,16 @@ static ucc_status_t oob_allgather_test(void *req)
190192
senddatafrom = (rank - oob_req->iter + size) % size;
191193
tmprecv = (char*)oob_req->rbuf + (ptrdiff_t)recvdatafrom * (ptrdiff_t)msglen;
192194
tmpsend = (char*)oob_req->rbuf + (ptrdiff_t)senddatafrom * (ptrdiff_t)msglen;
193-
MCA_PML_CALL(isend(tmpsend, msglen, MPI_BYTE, sendto, MCA_COLL_BASE_TAG_UCC,
195+
rc = MCA_PML_CALL(isend(tmpsend, msglen, MPI_BYTE, sendto, MCA_COLL_BASE_TAG_UCC,
194196
MCA_PML_BASE_SEND_STANDARD, comm, &oob_req->reqs[0]));
195-
MCA_PML_CALL(irecv(tmprecv, msglen, MPI_BYTE, recvfrom,
197+
if (OMPI_SUCCESS != rc) {
198+
return UCC_ERR_NO_MESSAGE;
199+
}
200+
rc = MCA_PML_CALL(irecv(tmprecv, msglen, MPI_BYTE, recvfrom,
196201
MCA_COLL_BASE_TAG_UCC, comm, &oob_req->reqs[1]));
202+
if (OMPI_SUCCESS != rc) {
203+
return UCC_ERR_NO_MESSAGE;
204+
}
197205
}
198206
probe = 0;
199207
do {
@@ -221,6 +229,8 @@ static ucc_status_t oob_allgather(void *sbuf, void *rbuf, size_t msglen,
221229
oob_req->msglen = msglen;
222230
oob_req->oob_coll_ctx = oob_coll_ctx;
223231
oob_req->iter = 0;
232+
oob_req->reqs[0] = MPI_REQUEST_NULL;
233+
oob_req->reqs[1] = MPI_REQUEST_NULL;
224234
*req = oob_req;
225235
return UCC_OK;
226236
}

0 commit comments

Comments
 (0)