From 67a7ce34687f68ab5152dcec3030bbcb476d8b7e Mon Sep 17 00:00:00 2001 From: Bruno Faccini Date: Tue, 6 May 2025 17:41:01 +0200 Subject: [PATCH 1/2] handle errors gracefuly to prevent SEGV oob_allgather_test() do not check isend() call success, leading to the possibility to use oob_req->reqs[] un-initialized upon error and thus to SEGV. Signed-off-by: Bruno Faccini (cherry picked from commit 109f4bcb21570f4bfe111af5dd4f0a349da2597a) --- ompi/mca/coll/ucc/coll_ucc_module.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/ompi/mca/coll/ucc/coll_ucc_module.c b/ompi/mca/coll/ucc/coll_ucc_module.c index fd6df963633..746f268a227 100644 --- a/ompi/mca/coll/ucc/coll_ucc_module.c +++ b/ompi/mca/coll/ucc/coll_ucc_module.c @@ -2,7 +2,8 @@ * Copyright (c) 2021 Mellanox Technologies. All rights reserved. * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. * All Rights reserved. - * Copyright (c) 2022 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2022-2025 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -220,7 +221,7 @@ static ucc_status_t oob_allgather_test(void *req) size_t msglen = oob_req->msglen; int probe_count = 5; int rank, size, sendto, recvfrom, recvdatafrom, - senddatafrom, completed, probe; + senddatafrom, completed, probe, rc; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -245,10 +246,16 @@ static ucc_status_t oob_allgather_test(void *req) senddatafrom = (rank - oob_req->iter + size) % size; tmprecv = (char*)oob_req->rbuf + (ptrdiff_t)recvdatafrom * (ptrdiff_t)msglen; tmpsend = (char*)oob_req->rbuf + (ptrdiff_t)senddatafrom * (ptrdiff_t)msglen; - MCA_PML_CALL(isend(tmpsend, msglen, MPI_BYTE, sendto, MCA_COLL_BASE_TAG_UCC, + rc = MCA_PML_CALL(isend(tmpsend, msglen, MPI_BYTE, sendto, MCA_COLL_BASE_TAG_UCC, MCA_PML_BASE_SEND_STANDARD, comm, &oob_req->reqs[0])); - MCA_PML_CALL(irecv(tmprecv, msglen, MPI_BYTE, recvfrom, + if (OMPI_SUCCESS != rc) { + return UCC_ERR_NO_MESSAGE; + } + rc = MCA_PML_CALL(irecv(tmprecv, msglen, MPI_BYTE, recvfrom, MCA_COLL_BASE_TAG_UCC, comm, &oob_req->reqs[1])); + if (OMPI_SUCCESS != rc) { + return UCC_ERR_NO_MESSAGE; + } } probe = 0; do { @@ -276,6 +283,8 @@ static ucc_status_t oob_allgather(void *sbuf, void *rbuf, size_t msglen, oob_req->msglen = msglen; oob_req->oob_coll_ctx = oob_coll_ctx; oob_req->iter = 0; + oob_req->reqs[0] = MPI_REQUEST_NULL; + oob_req->reqs[1] = MPI_REQUEST_NULL; *req = oob_req; return UCC_OK; } From a2d64e15377d32c0845c8f269b576754f8892aba Mon Sep 17 00:00:00 2001 From: Qiao Kang Date: Mon, 1 Dec 2025 12:47:32 -0600 Subject: [PATCH 2/2] coll/ucc: Fix indentation issue with tab. Signed-off-by: Qiao Kang (cherry picked from commit 3980e803c328c789bcbfbd891323de92967f596a) --- ompi/mca/coll/ucc/coll_ucc_module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ompi/mca/coll/ucc/coll_ucc_module.c b/ompi/mca/coll/ucc/coll_ucc_module.c index 746f268a227..675c045afc0 100644 --- a/ompi/mca/coll/ucc/coll_ucc_module.c +++ b/ompi/mca/coll/ucc/coll_ucc_module.c @@ -248,9 +248,9 @@ static ucc_status_t oob_allgather_test(void *req) tmpsend = (char*)oob_req->rbuf + (ptrdiff_t)senddatafrom * (ptrdiff_t)msglen; rc = MCA_PML_CALL(isend(tmpsend, msglen, MPI_BYTE, sendto, MCA_COLL_BASE_TAG_UCC, MCA_PML_BASE_SEND_STANDARD, comm, &oob_req->reqs[0])); - if (OMPI_SUCCESS != rc) { + if (OMPI_SUCCESS != rc) { return UCC_ERR_NO_MESSAGE; - } + } rc = MCA_PML_CALL(irecv(tmprecv, msglen, MPI_BYTE, recvfrom, MCA_COLL_BASE_TAG_UCC, comm, &oob_req->reqs[1])); if (OMPI_SUCCESS != rc) {