Skip to content

Commit 67a7ce3

Browse files
bfacciniQiao Kang
authored andcommitted
handle errors gracefuly to prevent SEGV
oob_allgather_test() do not check isend() call success, leading to the possibility to use oob_req->reqs[] un-initialized upon error and thus to SEGV. Signed-off-by: Bruno Faccini <bfaccini@nvidia.com> (cherry picked from commit 109f4bc)
1 parent 2c9b68d commit 67a7ce3

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

ompi/mca/coll/ucc/coll_ucc_module.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
* Copyright (c) 2021 Mellanox Technologies. All rights reserved.
33
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
44
* All Rights reserved.
5-
* Copyright (c) 2022 NVIDIA Corporation. All rights reserved.
5+
* Copyright (c) 2022-2025 NVIDIA Corporation. All rights reserved.
6+
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
67
* $COPYRIGHT$
78
*
89
* Additional copyrights may follow
@@ -220,7 +221,7 @@ static ucc_status_t oob_allgather_test(void *req)
220221
size_t msglen = oob_req->msglen;
221222
int probe_count = 5;
222223
int rank, size, sendto, recvfrom, recvdatafrom,
223-
senddatafrom, completed, probe;
224+
senddatafrom, completed, probe, rc;
224225

225226
size = ompi_comm_size(comm);
226227
rank = ompi_comm_rank(comm);
@@ -245,10 +246,16 @@ static ucc_status_t oob_allgather_test(void *req)
245246
senddatafrom = (rank - oob_req->iter + size) % size;
246247
tmprecv = (char*)oob_req->rbuf + (ptrdiff_t)recvdatafrom * (ptrdiff_t)msglen;
247248
tmpsend = (char*)oob_req->rbuf + (ptrdiff_t)senddatafrom * (ptrdiff_t)msglen;
248-
MCA_PML_CALL(isend(tmpsend, msglen, MPI_BYTE, sendto, MCA_COLL_BASE_TAG_UCC,
249+
rc = MCA_PML_CALL(isend(tmpsend, msglen, MPI_BYTE, sendto, MCA_COLL_BASE_TAG_UCC,
249250
MCA_PML_BASE_SEND_STANDARD, comm, &oob_req->reqs[0]));
250-
MCA_PML_CALL(irecv(tmprecv, msglen, MPI_BYTE, recvfrom,
251+
if (OMPI_SUCCESS != rc) {
252+
return UCC_ERR_NO_MESSAGE;
253+
}
254+
rc = MCA_PML_CALL(irecv(tmprecv, msglen, MPI_BYTE, recvfrom,
251255
MCA_COLL_BASE_TAG_UCC, comm, &oob_req->reqs[1]));
256+
if (OMPI_SUCCESS != rc) {
257+
return UCC_ERR_NO_MESSAGE;
258+
}
252259
}
253260
probe = 0;
254261
do {
@@ -276,6 +283,8 @@ static ucc_status_t oob_allgather(void *sbuf, void *rbuf, size_t msglen,
276283
oob_req->msglen = msglen;
277284
oob_req->oob_coll_ctx = oob_coll_ctx;
278285
oob_req->iter = 0;
286+
oob_req->reqs[0] = MPI_REQUEST_NULL;
287+
oob_req->reqs[1] = MPI_REQUEST_NULL;
279288
*req = oob_req;
280289
return UCC_OK;
281290
}

0 commit comments

Comments
 (0)