From a3da3e42f88581d217be1b7fd31b82ec44b36062 Mon Sep 17 00:00:00 2001 From: David Eberius Date: Tue, 30 Aug 2016 15:02:47 -0400 Subject: [PATCH 1/8] Added the software events driver code along with several new counters (Note: the progress switch counter is not yet functional). Made the bytes received counters more accurate. The software events code has been moved into the opal/runtime directory. Put all software events functions in macros that become noops when SOFTWARE_EVENTS_ENABLE is not defined. This still needs to be added as an MCA parameter. Made the software counters self-reliant without the PAPI component. Some of the changes have been added directly to MPI_Init and MPI_Finalize. These are temporary until a more robust method is implemented. Added some functions to the software events driver code to allow for registration with the new PAPI sde component. --- ompi/mca/bml/bml.h | 2 + ompi/mca/pml/ob1/pml_ob1.c | 21 +- ompi/mca/pml/ob1/pml_ob1_isend.c | 11 + ompi/mca/pml/ob1/pml_ob1_recvfrag.c | 37 +++ ompi/mca/pml/ob1/pml_ob1_recvreq.c | 46 +++ ompi/mca/pml/ob1/pml_ob1_sendreq.c | 161 +++++++++- ompi/mpi/c/allgather.c | 3 + ompi/mpi/c/allreduce.c | 3 + ompi/mpi/c/alltoall.c | 3 + ompi/mpi/c/bcast.c | 12 +- ompi/mpi/c/finalize.c | 60 ++++ ompi/mpi/c/gather.c | 3 + ompi/mpi/c/init.c | 3 + ompi/mpi/c/init_thread.c | 3 + ompi/mpi/c/irecv.c | 5 +- ompi/mpi/c/isend.c | 5 +- ompi/mpi/c/recv.c | 5 +- ompi/mpi/c/reduce.c | 3 + ompi/mpi/c/scatter.c | 3 + ompi/mpi/c/send.c | 5 +- opal/mca/base/mca_base_pvar.h | 2 +- opal/mca/btl/btl.h | 1 + opal/runtime/Makefile.am | 8 +- opal/runtime/ompi_software_events.c | 437 ++++++++++++++++++++++++++++ opal/runtime/ompi_software_events.h | 138 +++++++++ opal/threads/wait_sync.c | 4 +- 26 files changed, 966 insertions(+), 18 deletions(-) create mode 100644 opal/runtime/ompi_software_events.c create mode 100644 opal/runtime/ompi_software_events.h diff --git a/ompi/mca/bml/bml.h b/ompi/mca/bml/bml.h index df731a64a04..d48df9643d6 100644 --- a/ompi/mca/bml/bml.h +++ b/ompi/mca/bml/bml.h @@ -286,6 +286,7 @@ static inline int mca_bml_base_send_status( mca_bml_base_btl_t* bml_btl, mca_btl_base_module_t* btl = bml_btl->btl; des->des_context = (void*) bml_btl; + return btl->btl_send(btl, bml_btl->btl_endpoint, des, tag); } @@ -300,6 +301,7 @@ static inline int mca_bml_base_sendi( mca_bml_base_btl_t* bml_btl, mca_btl_base_descriptor_t** descriptor ) { mca_btl_base_module_t* btl = bml_btl->btl; + return btl->btl_sendi(btl, bml_btl->btl_endpoint, convertor, header, header_size, payload_size, order, flags, tag, descriptor); diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index eaf3ab26e0f..1c0d55805ed 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -36,6 +36,7 @@ #include "opal_stdint.h" #include "opal/mca/btl/btl.h" #include "opal/mca/btl/base/base.h" +#include "opal/runtime/ompi_software_events.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/base.h" @@ -195,6 +196,7 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) mca_pml_ob1_recv_frag_t *frag, *next_frag; mca_pml_ob1_comm_proc_t* pml_proc; mca_pml_ob1_match_hdr_t* hdr; + opal_timer_t usecs = 0; if (NULL == pml_comm) { return OMPI_ERR_OUT_OF_RESOURCE; @@ -264,15 +266,17 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) * situation as the cant_match is only checked when a new fragment is received from * the network. */ + SW_EVENT_TIMER_START(OMPI_OOS_MATCH_TIME, &usecs); OPAL_LIST_FOREACH(frag, &pml_proc->frags_cant_match, mca_pml_ob1_recv_frag_t) { - hdr = &frag->hdr.hdr_match; - /* If the message has the next expected seq from that proc... */ - if(hdr->hdr_seq != pml_proc->expected_sequence) - continue; - - opal_list_remove_item(&pml_proc->frags_cant_match, (opal_list_item_t*)frag); - goto add_fragment_to_unexpected; - } + hdr = &frag->hdr.hdr_match; + /* If the message has the next expected seq from that proc... */ + if(hdr->hdr_seq != pml_proc->expected_sequence) + continue; + + opal_list_remove_item(&pml_proc->frags_cant_match, (opal_list_item_t*)frag); + goto add_fragment_to_unexpected; + } + SW_EVENT_TIMER_STOP(OMPI_OOS_MATCH_TIME, &usecs); } else { opal_list_append( &pml_proc->frags_cant_match, (opal_list_item_t*)frag ); } @@ -659,6 +663,7 @@ int mca_pml_ob1_send_fin( ompi_proc_t* proc, /* queue request */ rc = mca_bml_base_send( bml_btl, fin, MCA_PML_OB1_HDR_TYPE_FIN ); + if( OPAL_LIKELY( rc >= 0 ) ) { if( OPAL_LIKELY( 1 == rc ) ) { MCA_PML_OB1_PROGRESS_PENDING(bml_btl); diff --git a/ompi/mca/pml/ob1/pml_ob1_isend.c b/ompi/mca/pml/ob1/pml_ob1_isend.c index 3a5b0c2d7a0..e107f97967c 100644 --- a/ompi/mca/pml/ob1/pml_ob1_isend.c +++ b/ompi/mca/pml/ob1/pml_ob1_isend.c @@ -28,6 +28,7 @@ #include "pml_ob1_sendreq.h" #include "pml_ob1_recvreq.h" #include "ompi/peruse/peruse-internal.h" +#include "opal/runtime/ompi_software_events.h" /** * Single usage request. As we allow recursive calls (as an @@ -119,6 +120,16 @@ static inline int mca_pml_ob1_send_inline (const void *buf, size_t count, rc = mca_bml_base_sendi (bml_btl, &convertor, &match, OMPI_PML_OB1_MATCH_HDR_LEN, size, MCA_BTL_NO_ORDER, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, MCA_PML_OB1_HDR_TYPE_MATCH, NULL); + + if(rc == OPAL_SUCCESS){ + if(tag >= 0){ + SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, size); + } + else{ + SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, size); + } + } + if (count > 0) { opal_convertor_cleanup (&convertor); } diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index 1b59e3aae16..9a08d1eab00 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -39,6 +39,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/peruse/peruse-internal.h" #include "ompi/memchecker.h" +#include "opal/runtime/ompi_software_events.h" #include "pml_ob1.h" #include "pml_ob1_comm.h" @@ -231,6 +232,14 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, &iov_count, &bytes_received ); match->req_bytes_received = bytes_received; + + if(match->req_recv.req_base.req_tag >= 0){ + SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_USER, (long long)(bytes_received)); + } + else{ + SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_MPI, (long long)(bytes_received)); + } + /* * Unpacking finished, make the user buffer unaccessable again. */ @@ -536,6 +545,9 @@ match_one(mca_btl_base_module_t *btl, mca_pml_ob1_comm_proc_t *proc, mca_pml_ob1_recv_frag_t* frag) { + opal_timer_t usecs = 0; + SW_EVENT_TIMER_START(OMPI_MATCH_TIME, &usecs); + mca_pml_ob1_recv_request_t *match; mca_pml_ob1_comm_t *comm = (mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm; @@ -573,19 +585,31 @@ match_one(mca_btl_base_module_t *btl, num_segments); /* this frag is already processed, so we want to break out of the loop and not end up back on the unexpected queue. */ + SW_EVENT_TIMER_STOP(OMPI_MATCH_TIME, &usecs); + /*SW_EVENT_RECORD(OMPI_MATCH_TIME, (long long)usecs);*/ + return NULL; } PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_MSG_MATCH_POSTED_REQ, &(match->req_recv.req_base), PERUSE_RECV); + SW_EVENT_TIMER_STOP(OMPI_MATCH_TIME, &usecs); + /*SW_EVENT_RECORD(OMPI_MATCH_TIME, (long long)usecs);*/ + return match; } /* if no match found, place on unexpected queue */ append_frag_to_list(&proc->unexpected_frags, btl, hdr, segments, num_segments, frag); + + SW_EVENT_RECORD(OMPI_UNEXPECTED, 1); + PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm_ptr, hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); + SW_EVENT_TIMER_STOP(OMPI_MATCH_TIME, &usecs); + /*SW_EVENT_RECORD(OMPI_MATCH_TIME, (long long)usecs);*/ + return NULL; } while(true); } @@ -593,6 +617,9 @@ match_one(mca_btl_base_module_t *btl, static mca_pml_ob1_recv_frag_t* check_cantmatch_for_match(mca_pml_ob1_comm_proc_t *proc) { mca_pml_ob1_recv_frag_t *frag; + opal_timer_t usecs = 0; + + SW_EVENT_TIMER_START(OMPI_OOS_MATCH_TIME, &usecs); /* search the list for a fragment from the send with sequence * number next_msg_seq_expected @@ -609,9 +636,16 @@ static mca_pml_ob1_recv_frag_t* check_cantmatch_for_match(mca_pml_ob1_comm_proc_ continue; opal_list_remove_item(&proc->frags_cant_match, (opal_list_item_t*)frag); + + SW_EVENT_TIMER_STOP(OMPI_OOS_MATCH_TIME, &usecs); + /*SW_EVENT_RECORD(OMPI_OOS_MATCH_TIME, (long long)usecs);*/ + return frag; } + SW_EVENT_TIMER_STOP(OMPI_OOS_MATCH_TIME, &usecs); + /*SW_EVENT_RECORD(OMPI_OOS_MATCH_TIME, (long long)usecs);*/ + return NULL; } @@ -776,6 +810,9 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl, */ append_frag_to_list(&proc->frags_cant_match, btl, hdr, segments, num_segments, NULL); + + SW_EVENT_RECORD(OMPI_OUT_OF_SEQUENCE, 1); + OB1_MATCHING_UNLOCK(&comm->matching_lock); return OMPI_SUCCESS; } diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index bbc90e1e471..bded1d48fc9 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -29,6 +29,7 @@ #include "opal/mca/mpool/mpool.h" #include "opal/util/arch.h" +#include "opal/runtime/ompi_software_events.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/bml/bml.h" #include "pml_ob1_comm.h" @@ -242,6 +243,7 @@ int mca_pml_ob1_recv_request_ack_send_btl( des->des_cbfunc = mca_pml_ob1_recv_ctl_completion; rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_ACK); + if( OPAL_LIKELY( rc >= 0 ) ) { return OMPI_SUCCESS; } @@ -429,6 +431,30 @@ static int mca_pml_ob1_recv_request_put_frag (mca_pml_ob1_rdma_frag_t *frag) /* send rdma request to peer */ rc = mca_bml_base_send (bml_btl, ctl, MCA_PML_OB1_HDR_TYPE_PUT); + +#ifdef SOFTWARE_EVENTS_ENABLE + volatile int64_t bytes_sent; + unsigned int i; + if(attached_event[OMPI_BYTES_SENT_USER] == 1){ + if(recvreq->req_recv.req_base.req_tag >= 0){ + bytes_sent = 0; + for(i = 0; i < ctl->des_segment_count; i++){ + bytes_sent += ctl->des_segments[i].seg_len; + } + SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, bytes_sent); + } + } + if(attached_event[OMPI_BYTES_SENT_MPI] == 1){ + if(recvreq->req_recv.req_base.req_tag < 0){ + bytes_sent = 0; + for(i = 0; i < ctl->des_segment_count; i++){ + bytes_sent += ctl->des_segments[i].seg_len; + } + SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, bytes_sent); + } + } +#endif + if (OPAL_UNLIKELY(rc < 0)) { mca_bml_base_free (bml_btl, ctl); return rc; @@ -470,6 +496,10 @@ int mca_pml_ob1_recv_request_get_frag (mca_pml_ob1_rdma_frag_t *frag) rc = mca_bml_base_get (bml_btl, frag->local_address, frag->remote_address, local_handle, (mca_btl_base_registration_handle_t *) frag->remote_handle, frag->rdma_length, 0, MCA_BTL_NO_ORDER, mca_pml_ob1_rget_completion, frag); + + /* Increment counter for bytes_get even though they probably haven't all been received yet */ + SW_EVENT_RECORD(OMPI_BYTES_GET, frag->rdma_length); + if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { return mca_pml_ob1_recv_request_get_frag_failed (frag, OMPI_ERR_OUT_OF_RESOURCE); } @@ -525,6 +555,14 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq ); OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); + + if(recvreq->req_recv.req_base.req_tag >= 0){ + SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_USER, (long long)bytes_received); + } + else{ + SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_MPI, (long long)bytes_received); + } + /* check completion status */ if(recv_request_pml_complete_check(recvreq) == false && recvreq->req_rdma_offset < recvreq->req_send_offset) { @@ -886,6 +924,14 @@ void mca_pml_ob1_recv_request_progress_match( mca_pml_ob1_recv_request_t* recvre * for this request. */ recvreq->req_bytes_received += bytes_received; + + if(recvreq->req_recv.req_base.req_tag >= 0){ + SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_USER, (long long)bytes_received); + } + else{ + SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_MPI, (long long)bytes_received); + } + recv_request_pml_complete(recvreq); } diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index f358d733dab..87351135f78 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -29,6 +29,7 @@ #include "ompi_config.h" #include "opal/prefetch.h" #include "opal/mca/mpool/mpool.h" +#include "opal/runtime/ompi_software_events.h" #include "ompi/constants.h" #include "ompi/mca/pml/pml.h" #include "pml_ob1.h" @@ -39,7 +40,6 @@ #include "ompi/mca/bml/base/base.h" #include "ompi/memchecker.h" - OBJ_CLASS_INSTANCE(mca_pml_ob1_send_range_t, opal_free_list_item_t, NULL, NULL); @@ -350,6 +350,7 @@ mca_pml_ob1_copy_frag_completion( mca_btl_base_module_t* btl, * we just abort. In theory, a new queue could be created to hold this * fragment and then attempt to send it out on another BTL. */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_FRAG); + if(OPAL_UNLIKELY(rc < 0)) { opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); ompi_rte_abort(-1, NULL); @@ -449,6 +450,30 @@ int mca_pml_ob1_send_request_start_buffered( /* send */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RNDV); + +#ifdef SOFTWARE_EVENTS_ENABLE + volatile int64_t bytes_sent; + unsigned int i; + if(attached_event[OMPI_BYTES_SENT_USER] == 1){ + if(sendreq->req_send.req_base.req_tag >= 0){ + bytes_sent = 0; + for(i = 0; i < des->des_segment_count; i++){ + bytes_sent += des->des_segments[i].seg_len; + } + SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, bytes_sent); + } + } + if(attached_event[OMPI_BYTES_SENT_MPI] == 1){ + if(sendreq->req_send.req_base.req_tag < 0){ + bytes_sent = 0; + for(i = 0; i < des->des_segment_count; i++){ + bytes_sent += des->des_segments[i].seg_len; + } + SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, bytes_sent); + } + } +#endif + if( OPAL_LIKELY( rc >= 0 ) ) { if( OPAL_LIKELY( 1 == rc ) ) { mca_pml_ob1_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered); @@ -495,6 +520,16 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, MCA_PML_OB1_HDR_TYPE_MATCH, &des); + + if(rc == OPAL_SUCCESS){ + if(sendreq->req_send.req_base.req_tag >= 0){ + SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, size); + } + else{ + SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, size); + } + } + if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) { /* signal request completion */ send_request_pml_complete(sendreq); @@ -567,6 +602,30 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, /* send */ rc = mca_bml_base_send_status(bml_btl, des, MCA_PML_OB1_HDR_TYPE_MATCH); + +#ifdef SOFTWARE_EVENTS_ENABLE + volatile int64_t bytes_sent; + unsigned int i; + if(attached_event[OMPI_BYTES_SENT_USER] == 1){ + if(sendreq->req_send.req_base.req_tag >= 0){ + bytes_sent = 0; + for(i = 0; i < des->des_segment_count; i++){ + bytes_sent += des->des_segments[i].seg_len; + } + SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, bytes_sent - OMPI_PML_OB1_MATCH_HDR_LEN); + } + } + if(attached_event[OMPI_BYTES_SENT_MPI] == 1){ + if(sendreq->req_send.req_base.req_tag < 0){ + bytes_sent = 0; + for(i = 0; i < des->des_segment_count; i++){ + bytes_sent += des->des_segments[i].seg_len; + } + SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, bytes_sent - OMPI_PML_OB1_MATCH_HDR_LEN); + } + } +#endif + if( OPAL_LIKELY( rc >= OPAL_SUCCESS ) ) { if( OPAL_LIKELY( 1 == rc ) ) { mca_pml_ob1_match_completion_free_request( bml_btl, sendreq ); @@ -627,6 +686,30 @@ int mca_pml_ob1_send_request_start_prepare( mca_pml_ob1_send_request_t* sendreq, /* send */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_MATCH); + +#ifdef SOFTWARE_EVENTS_ENABLE + volatile int64_t bytes_sent; + unsigned int i; + if(attached_event[OMPI_BYTES_SENT_USER] == 1){ + if(sendreq->req_send.req_base.req_tag >= 0){ + bytes_sent = 0; + for(i = 0; i < des->des_segment_count; i++){ + bytes_sent += des->des_segments[i].seg_len; + } + SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, bytes_sent); + } + } + if(attached_event[OMPI_BYTES_SENT_MPI] == 1){ + if(sendreq->req_send.req_base.req_tag < 0){ + bytes_sent = 0; + for(i = 0; i < des->des_segment_count; i++){ + bytes_sent += des->des_segments[i].seg_len; + } + SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, bytes_sent); + } + } +#endif + if( OPAL_LIKELY( rc >= OPAL_SUCCESS ) ) { if( OPAL_LIKELY( 1 == rc ) ) { mca_pml_ob1_match_completion_free_request( bml_btl, sendreq ); @@ -731,6 +814,30 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq, /* send */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RGET); + +#ifdef SOFTWARE_EVENTS_ENABLE + volatile int64_t bytes_sent; + unsigned int i; + if(attached_event[OMPI_BYTES_SENT_USER] == 1){ + if(sendreq->req_send.req_base.req_tag >= 0){ + bytes_sent = 0; + for(i = 0; i < des->des_segment_count; i++){ + bytes_sent += des->des_segments[i].seg_len; + } + SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, bytes_sent); + } + } + if(attached_event[OMPI_BYTES_SENT_MPI] == 1){ + if(sendreq->req_send.req_base.req_tag < 0){ + bytes_sent = 0; + for(i = 0; i < des->des_segment_count; i++){ + bytes_sent += des->des_segments[i].seg_len; + } + SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, bytes_sent); + } + } +#endif + if (OPAL_UNLIKELY(rc < 0)) { mca_bml_base_free(bml_btl, des); return rc; @@ -811,6 +918,30 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq, /* send */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RNDV); + +#ifdef SOFTWARE_EVENTS_ENABLE + volatile int64_t bytes_sent; + unsigned int i; + if(attached_event[OMPI_BYTES_SENT_USER] == 1){ + if(sendreq->req_send.req_base.req_tag >= 0){ + bytes_sent = 0; + for(i = 0; i < des->des_segment_count; i++){ + bytes_sent += des->des_segments[i].seg_len; + } + SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, bytes_sent); + } + } + if(attached_event[OMPI_BYTES_SENT_MPI] == 1){ + if(sendreq->req_send.req_base.req_tag < 0){ + bytes_sent = 0; + for(i = 0; i < des->des_segment_count; i++){ + bytes_sent += des->des_segments[i].seg_len; + } + SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, bytes_sent); + } + } +#endif + if( OPAL_LIKELY( rc >= 0 ) ) { if( OPAL_LIKELY( 1 == rc ) ) { mca_pml_ob1_rndv_completion_request( bml_btl, sendreq, size ); @@ -1055,6 +1186,30 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) /* initiate send - note that this may complete before the call returns */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_FRAG); + +#ifdef SOFTWARE_EVENTS_ENABLE + volatile int64_t bytes_sent; + unsigned int i; + if(attached_event[OMPI_BYTES_SENT_USER] == 1){ + if(sendreq->req_send.req_base.req_tag >= 0){ + bytes_sent = 0; + for(i = 0; i < des->des_segment_count; i++){ + bytes_sent += des->des_segments[i].seg_len; + } + SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, bytes_sent); + } + } + if(attached_event[OMPI_BYTES_SENT_MPI] == 1){ + if(sendreq->req_send.req_base.req_tag < 0){ + bytes_sent = 0; + for(i = 0; i < des->des_segment_count; i++){ + bytes_sent += des->des_segments[i].seg_len; + } + SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, bytes_sent); + } + } +#endif + if( OPAL_LIKELY(rc >= 0) ) { /* update state */ range->range_btls[btl_idx].length -= size; @@ -1177,6 +1332,10 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t *frag ) rc = mca_bml_base_put (bml_btl, frag->local_address, frag->remote_address, local_handle, (mca_btl_base_registration_handle_t *) frag->remote_handle, frag->rdma_length, 0, MCA_BTL_NO_ORDER, mca_pml_ob1_put_completion, frag); + + /* Count the bytes put even though they probably haven't been sent yet */ + SW_EVENT_RECORD(OMPI_BYTES_PUT, frag->rdma_length); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { mca_pml_ob1_send_request_put_frag_failed (frag, rc); return rc; diff --git a/ompi/mpi/c/allgather.c b/ompi/mpi/c/allgather.c index 41df7adf386..5a05e2685df 100644 --- a/ompi/mpi/c/allgather.c +++ b/ompi/mpi/c/allgather.c @@ -32,6 +32,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" +#include "opal/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -49,6 +50,8 @@ int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, { int err; + SW_EVENT_RECORD(OMPI_ALLGATHER, 1); + MEMCHECKER( int rank; ptrdiff_t ext; diff --git a/ompi/mpi/c/allreduce.c b/ompi/mpi/c/allreduce.c index edfb7020c00..c38293667ff 100644 --- a/ompi/mpi/c/allreduce.c +++ b/ompi/mpi/c/allreduce.c @@ -31,6 +31,7 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" #include "ompi/memchecker.h" +#include "opal/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -47,6 +48,8 @@ int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, { int err; + SW_EVENT_RECORD(OMPI_ALLREDUCE, 1); + MEMCHECKER( memchecker_datatype(datatype); memchecker_comm(comm); diff --git a/ompi/mpi/c/alltoall.c b/ompi/mpi/c/alltoall.c index c31bb724205..471b38e00e5 100644 --- a/ompi/mpi/c/alltoall.c +++ b/ompi/mpi/c/alltoall.c @@ -33,6 +33,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" +#include "opal/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -51,6 +52,8 @@ int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, int err; size_t recvtype_size; + SW_EVENT_RECORD(OMPI_ALLTOALL, 1); + MEMCHECKER( memchecker_comm(comm); if (MPI_IN_PLACE != sendbuf) { diff --git a/ompi/mpi/c/bcast.c b/ompi/mpi/c/bcast.c index 6715aff90de..7f4de50e561 100644 --- a/ompi/mpi/c/bcast.c +++ b/ompi/mpi/c/bcast.c @@ -26,6 +26,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" +#include "opal/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -36,12 +37,13 @@ static const char FUNC_NAME[] = "MPI_Bcast"; - int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm) { int err; + SW_EVENT_RECORD(OMPI_BCAST, 1); + MEMCHECKER( memchecker_datatype(datatype); memchecker_comm(comm); @@ -110,5 +112,13 @@ int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, err = comm->c_coll->coll_bcast(buffer, count, datatype, root, comm, comm->c_coll->coll_bcast_module); + + if(ompi_comm_rank(comm) == root){ + SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, count * sizeof(datatype) * ((int)ompi_comm_size(comm)-1)); + } + else{ + SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_MPI, count * sizeof(datatype)); + } + OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/finalize.c b/ompi/mpi/c/finalize.c index b640c6cec11..22ef9004c78 100644 --- a/ompi/mpi/c/finalize.c +++ b/ompi/mpi/c/finalize.c @@ -23,6 +23,7 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/errhandler/errhandler.h" +#include "opal/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -36,6 +37,65 @@ static const char FUNC_NAME[] = "MPI_Finalize"; int MPI_Finalize(void) { +#ifdef SOFTWARE_EVENTS_ENABLE + int i, j, rank, world_size, offset; + long long *recv_buffer, *send_buffer; + char *filename; + FILE *fptr; + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + + if(rank == 0){ + send_buffer = (long long*)malloc(OMPI_NUM_COUNTERS * sizeof(long long)); + recv_buffer = (long long*)malloc(world_size * OMPI_NUM_COUNTERS * sizeof(long long)); + for(i = 0; i < OMPI_NUM_COUNTERS; i++){ + send_buffer[i] = events[i].value; + } + MPI_Gather(send_buffer, OMPI_NUM_COUNTERS, MPI_LONG_LONG, recv_buffer, OMPI_NUM_COUNTERS, MPI_LONG_LONG, 0, MPI_COMM_WORLD); + } + else{ + send_buffer = (long long*)malloc(OMPI_NUM_COUNTERS * sizeof(long long)); + for(i = 0; i < OMPI_NUM_COUNTERS; i++){ + send_buffer[i] = events[i].value; + } + MPI_Gather(send_buffer, OMPI_NUM_COUNTERS, MPI_LONG_LONG, recv_buffer, OMPI_NUM_COUNTERS, MPI_LONG_LONG, 0, MPI_COMM_WORLD); + } + + if(rank == 0){ + asprintf(&filename, "sw_events_output_XXXXXX"); + filename = mktemp(filename); + fptr = fopen(filename, "w+"); + + fprintf(fptr, "%d %d\n", world_size, OMPI_NUM_COUNTERS); + + fprintf(stdout, "OMPI Software Counters:\n"); + offset = 0; + for(j = 0; j < world_size; j++){ + fprintf(stdout, "World Rank %d:\n", j); + fprintf(fptr, "%d\n", j); + for(i = 0; i < OMPI_NUM_COUNTERS; i++){ + fprintf(stdout, "%s -> %lld\n", events[i].name, recv_buffer[offset+i]); + fprintf(fptr, "%s %lld\n", events[i].name, recv_buffer[offset+i]); + } + fprintf(stdout, "\n"); + offset += OMPI_NUM_COUNTERS; + } + free(recv_buffer); + free(send_buffer); + fclose(fptr); + } + else{ + free(send_buffer); + } + + MPI_Barrier(MPI_COMM_WORLD); + + /*SW_EVENT_PRINT_ALL();*/ + + /*SW_EVENT_FINI();*/ +#endif + OPAL_CR_FINALIZE_LIBRARY(); if (MPI_PARAM_CHECK) { diff --git a/ompi/mpi/c/gather.c b/ompi/mpi/c/gather.c index 03a7bf63860..1890d13cbf8 100644 --- a/ompi/mpi/c/gather.c +++ b/ompi/mpi/c/gather.c @@ -32,6 +32,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" +#include "opal/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -49,6 +50,8 @@ int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, { int err; + SW_EVENT_RECORD(OMPI_GATHER, 1); + MEMCHECKER( int rank; ptrdiff_t ext; diff --git a/ompi/mpi/c/init.c b/ompi/mpi/c/init.c index d316fb743d2..31c21b74961 100644 --- a/ompi/mpi/c/init.c +++ b/ompi/mpi/c/init.c @@ -25,6 +25,7 @@ #include #include "opal/util/show_help.h" +#include "opal/runtime/ompi_software_events.h" #include "ompi/mpi/c/bindings.h" #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" @@ -47,6 +48,8 @@ int MPI_Init(int *argc, char ***argv) char *env; int required = MPI_THREAD_SINGLE; + SW_EVENT_INIT(); + /* check for environment overrides for required thread level. If there is, check to see that it is a valid/supported thread level. If not, default to MPI_THREAD_MULTIPLE. */ diff --git a/ompi/mpi/c/init_thread.c b/ompi/mpi/c/init_thread.c index 38c6d7b7a81..fba9d6371d0 100644 --- a/ompi/mpi/c/init_thread.c +++ b/ompi/mpi/c/init_thread.c @@ -26,6 +26,7 @@ #include "ompi_config.h" #include "opal/util/show_help.h" +#include "opal/runtime/ompi_software_events.h" #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" @@ -48,6 +49,8 @@ int MPI_Init_thread(int *argc, char ***argv, int required, { int err; + SW_EVENT_INIT(); + ompi_hook_base_mpi_init_thread_top(argc, argv, required, provided); if ( MPI_PARAM_CHECK ) { diff --git a/ompi/mpi/c/irecv.c b/ompi/mpi/c/irecv.c index 3e66e365eb1..42f0236bb9e 100644 --- a/ompi/mpi/c/irecv.c +++ b/ompi/mpi/c/irecv.c @@ -27,6 +27,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/request/request.h" #include "ompi/memchecker.h" +#include "opal/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -37,12 +38,13 @@ static const char FUNC_NAME[] = "MPI_Irecv"; - int MPI_Irecv(void *buf, int count, MPI_Datatype type, int source, int tag, MPI_Comm comm, MPI_Request *request) { int rc = MPI_SUCCESS; + SW_EVENT_RECORD(OMPI_IRECV, 1); + MEMCHECKER( memchecker_datatype(type); memchecker_comm(comm); @@ -78,5 +80,6 @@ int MPI_Irecv(void *buf, int count, MPI_Datatype type, int source, memchecker_call(&opal_memchecker_base_mem_noaccess, buf, count, type); ); rc = MCA_PML_CALL(irecv(buf,count,type,source,tag,comm,request)); + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/isend.c b/ompi/mpi/c/isend.c index 5e56deed67e..18ce9b241ab 100644 --- a/ompi/mpi/c/isend.c +++ b/ompi/mpi/c/isend.c @@ -31,6 +31,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/request/request.h" #include "ompi/memchecker.h" +#include "opal/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -41,12 +42,13 @@ static const char FUNC_NAME[] = "MPI_Isend"; - int MPI_Isend(const void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm, MPI_Request *request) { int rc = MPI_SUCCESS; + SW_EVENT_RECORD(OMPI_ISEND, 1); + MEMCHECKER( memchecker_datatype(type); memchecker_call(&opal_memchecker_base_isdefined, buf, count, type); @@ -91,6 +93,7 @@ int MPI_Isend(const void *buf, int count, MPI_Datatype type, int dest, rc = MCA_PML_CALL(isend(buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm, request)); + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/recv.c b/ompi/mpi/c/recv.c index 864fdd2cdbb..2c036f8b634 100644 --- a/ompi/mpi/c/recv.c +++ b/ompi/mpi/c/recv.c @@ -27,6 +27,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/memchecker.h" #include "ompi/request/request.h" +#include "opal/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -37,12 +38,13 @@ static const char FUNC_NAME[] = "MPI_Recv"; - int MPI_Recv(void *buf, int count, MPI_Datatype type, int source, int tag, MPI_Comm comm, MPI_Status *status) { int rc = MPI_SUCCESS; + SW_EVENT_RECORD(OMPI_RECV, 1); + MEMCHECKER( memchecker_datatype(type); memchecker_call(&opal_memchecker_base_isaddressable, buf, count, type); @@ -77,5 +79,6 @@ int MPI_Recv(void *buf, int count, MPI_Datatype type, int source, OPAL_CR_ENTER_LIBRARY(); rc = MCA_PML_CALL(recv(buf, count, type, source, tag, comm, status)); + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/reduce.c b/ompi/mpi/c/reduce.c index 92cb8024d75..e9603a8dadf 100644 --- a/ompi/mpi/c/reduce.c +++ b/ompi/mpi/c/reduce.c @@ -31,6 +31,7 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" #include "ompi/memchecker.h" +#include "opal/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -47,6 +48,8 @@ int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, { int err; + SW_EVENT_RECORD(OMPI_REDUCE, 1); + MEMCHECKER( memchecker_datatype(datatype); memchecker_comm(comm); diff --git a/ompi/mpi/c/scatter.c b/ompi/mpi/c/scatter.c index 91cbf30c3dd..44a318646e4 100644 --- a/ompi/mpi/c/scatter.c +++ b/ompi/mpi/c/scatter.c @@ -32,6 +32,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" +#include "opal/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -49,6 +50,8 @@ int MPI_Scatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, { int err; + SW_EVENT_RECORD(OMPI_SCATTER, 1); + MEMCHECKER( memchecker_comm(comm); if(OMPI_COMM_IS_INTRA(comm)) { diff --git a/ompi/mpi/c/send.c b/ompi/mpi/c/send.c index d5b859aede4..7b73647dec5 100644 --- a/ompi/mpi/c/send.c +++ b/ompi/mpi/c/send.c @@ -30,6 +30,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" +#include "opal/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -40,12 +41,13 @@ static const char FUNC_NAME[] = "MPI_Send"; - int MPI_Send(const void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm) { int rc = MPI_SUCCESS; + SW_EVENT_RECORD(OMPI_SEND, 1); + MEMCHECKER( memchecker_datatype(type); memchecker_call(&opal_memchecker_base_isdefined, buf, count, type); @@ -76,5 +78,6 @@ int MPI_Send(const void *buf, int count, MPI_Datatype type, int dest, OPAL_CR_ENTER_LIBRARY(); rc = MCA_PML_CALL(send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm)); + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/opal/mca/base/mca_base_pvar.h b/opal/mca/base/mca_base_pvar.h index 44f23b3dfc1..e305cbcbfd6 100644 --- a/opal/mca/base/mca_base_pvar.h +++ b/opal/mca/base/mca_base_pvar.h @@ -319,7 +319,7 @@ OPAL_DECLSPEC int mca_base_pvar_register (const char *project, const char *frame * associated with a component. * * While quite similar to mca_base_pvar_register(), there is one key - * difference: pvars registered this this function will automatically + * difference: pvars registered with this function will automatically * be unregistered / made unavailable when that component is closed by * its framework. */ diff --git a/opal/mca/btl/btl.h b/opal/mca/btl/btl.h index 48564b573ed..afec7bd1d10 100644 --- a/opal/mca/btl/btl.h +++ b/opal/mca/btl/btl.h @@ -447,6 +447,7 @@ typedef struct mca_btl_base_segment_t mca_btl_base_segment_t; * des_segments */ + struct mca_btl_base_descriptor_t { opal_free_list_item_t super; mca_btl_base_segment_t *des_segments; /**< local segments */ diff --git a/opal/runtime/Makefile.am b/opal/runtime/Makefile.am index fab8ead6104..6ecb583e64f 100644 --- a/opal/runtime/Makefile.am +++ b/opal/runtime/Makefile.am @@ -36,7 +36,9 @@ headers += \ runtime/opal_cr.h \ runtime/opal_info_support.h \ runtime/opal_params.h \ - runtime/opal_progress_threads.h + runtime/opal_progress_threads.h \ + runtime/ompi_software_events.h \ + runtime/papi_sde_interface.h lib@OPAL_LIB_PREFIX@open_pal_la_SOURCES += \ runtime/opal_progress.c \ @@ -45,4 +47,6 @@ lib@OPAL_LIB_PREFIX@open_pal_la_SOURCES += \ runtime/opal_params.c \ runtime/opal_cr.c \ runtime/opal_info_support.c \ - runtime/opal_progress_threads.c + runtime/opal_progress_threads.c \ + runtime/ompi_software_events.c \ + runtime/papi_sde_interface.c diff --git a/opal/runtime/ompi_software_events.c b/opal/runtime/ompi_software_events.c new file mode 100644 index 00000000000..6289f9e21e7 --- /dev/null +++ b/opal/runtime/ompi_software_events.c @@ -0,0 +1,437 @@ +#include "ompi_software_events.h" + +OMPI_DECLSPEC const char *counter_names[OMPI_NUM_COUNTERS] = { + "OMPI_SEND", + "OMPI_RECV", + "OMPI_ISEND", + "OMPI_IRECV", + "OMPI_BCAST", + "OMPI_REDUCE", + "OMPI_ALLREDUCE", + "OMPI_SCATTER", + "OMPI_GATHER", + "OMPI_ALLTOALL", + "OMPI_ALLGATHER", + "OMPI_BYTES_RECEIVED_USER", + "OMPI_BYTES_RECEIVED_MPI", + "OMPI_BYTES_SENT_USER", + "OMPI_BYTES_SENT_MPI", + "OMPI_BYTES_PUT", + "OMPI_BYTES_GET", + "OMPI_UNEXPECTED", + "OMPI_OUT_OF_SEQUENCE", + "OMPI_MATCH_TIME", + "OMPI_OOS_MATCH_TIME", + "OMPI_PROGRESS_SWITCH" +}; + +OMPI_DECLSPEC const char *counter_descriptions[OMPI_NUM_COUNTERS] = { + "The number of times MPI_Send was called.", + "The number of times MPI_Recv was called.", + "The number of times MPI_Isend was called.", + "The number of times MPI_Irecv was called.", + "The number of times MPI_Bcast was called.", + "The number of times MPI_Reduce was called.", + "The number of times MPI_Allreduce was called.", + "The number of times MPI_Scatter was called.", + "The number of times MPI_Gather was called.", + "The number of times MPI_Alltoall was called.", + "The number of times MPI_Allgather was called.", + "The number of bytes received by the user through point-to-point communications. Note: Excludes RDMA operations.", + "The number of bytes received by MPI through collective, control, or other internal communications.", + "The number of bytes sent by the user through point-to-point communications. Note: Excludes RDMA operations.", + "The number of bytes sent by MPI through collective, control, or other internal communications.", + "The number of bytes sent/received using RDMA Put operations.", + "The number of bytes sent/received using RDMA Get operations.", + "The number of messages that arrived as unexpected messages.", + "The number of messages that arrived out of the proper sequence.", + "The number of microseconds spent matching unexpected messages.", + "The number of microseconds spent matching out of sequence messages.", + "The number of times the progress thread changed." +}; + +/* An array of integer values to denote whether an event is activated (1) or not (0) */ +OMPI_DECLSPEC unsigned int attached_event[OMPI_NUM_COUNTERS] = { 0 }; +/* An array of event structures to store the event data (name and value) */ +OMPI_DECLSPEC ompi_event_t *events = NULL; + +/* ############################################################## + * ################# Begin MPI_T Functions ###################### + * ############################################################## + */ + +static int ompi_sw_event_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, void *obj_handle, int *count) +{ + (void)obj_handle; + if(MCA_BASE_PVAR_HANDLE_BIND == event) + *count = 1; + + return OPAL_SUCCESS; +} + +inline long long ompi_sw_event_get_counter(int counter_id) +{ + if(events != NULL) + return events[counter_id].value; + else + return 0; /* -1 would be preferred to indicate lack of initialization, but the type needs to be unsigned */ +} + +static int ompi_sw_event_get_send(const struct mca_base_pvar_t *pvar, void *value, void *obj_handle) +{ + (void) obj_handle; + long long *counter_value = (long long*)value; + *counter_value = ompi_sw_event_get_counter(OMPI_SEND); + + return OPAL_SUCCESS; +} + +/* ############################################################## + * ############ Begin PAPI software_events Code ################# + * ############################################################## + */ + +/* Allocates and initializes the events data structure */ +int iter_start() +{ + int i; + + if(events == NULL){ + events = (ompi_event_t*)malloc(OMPI_NUM_COUNTERS * sizeof(ompi_event_t)); + } + else{ + fprintf(stderr, "The events data structure has already been allocated.\n"); + } + + for(i = 0; i < OMPI_NUM_COUNTERS; i++){ + events[i].name = counter_names[i]; + events[i].value = 0; + } + return 0; +} + +/* Returns the name of the next event in the data structure */ +char* iter_next() +{ + static int i = 0; + + if(i < OMPI_NUM_COUNTERS){ + i++; + return events[i-1].name; + } + else{ + /* Finished iterating through the list. Return NULL and reset i */ + i = 0; + return NULL; + } +} + +/* Frees the events data structure */ +int iter_release() +{ + free(events); + return 0; +} + +/* If an event named 'event_name' exists, attach the corresponding event's value + * to the supplied long long pointer. + */ +int attach_event(char *event_name, long long **value) +{ + int i; + + if(events == NULL){ + fprintf(stderr, "Error: The iterator hasn't been started. The event cannot be attached.\n"); + return -1; + } + + if(event_name == NULL){ + fprintf(stderr, "Error: No event name specified for attach_event.\n"); + return -1; + } + + for(i = 0; i < OMPI_NUM_COUNTERS; i++){ + if(strcmp(event_name, events[i].name) == 0){ + break; + } + } + + if(i < OMPI_NUM_COUNTERS){ + *value = &events[i].value; + attached_event[i] = 1; + + return 0; + } + else{ + fprintf(stderr, "Error: Could not find an event by that name. The event cannot be attached.\n"); + return -1; + } +} + +/* If an event with the name 'event_name' exists, reset its value to 0 + * and set the corresponding value in attached_event to 0. + */ +int detach_event(char *event_name) +{ + int i; + + if(events == NULL){ + fprintf(stderr, "Error: The iterator hasn't been started. The event cannot be detached.\n"); + return -1; + } + + if(event_name == NULL){ + fprintf(stderr, "Error: No event name specified for detach_event.\n"); + return -1; + } + + for(i = 0; i < OMPI_NUM_COUNTERS; i++){ + if(strcmp(event_name, events[i].name) == 0){ + break; + } + } + + if(i < OMPI_NUM_COUNTERS){ + attached_event[i] = 0; + events[i].value = 0; + + return 0; + } + else{ + fprintf(stderr, "Error: Could not find an event by that name. The event cannot be detached.\n"); + return -1; + } +} + +/* A structure to expose to the PAPI software_events component to use these events */ +struct PAPI_SOFTWARE_EVENT_S papi_software_events = {"ompi", {0, 0, 0}, iter_start, iter_next, iter_release, attach_event, detach_event}; + +/* ############################################################## + * ############ End of PAPI software_events Code ################ + * ############################################################## + */ + +/* ############################################################## + * ############### Begin PAPI sde Code ########################## + * ############################################################## + */ + +/* An initialization function for the PAPI sde component. + * This creates an sde handle with the name OMPI and registers all events and + * event descriptions with the sde component. + */ +void ompi_sde_init() { + int i, event_count = OMPI_NUM_COUNTERS; + void *sde_handle = (void *)papi_sde_init("OMPI", &event_count); + + /* Required registration of counters and optional counter descriptions */ + for(i = 0; i < OMPI_NUM_COUNTERS; i++){ + printf("Registering: %s (%d of %d)\n", counter_names[i], i, OMPI_NUM_COUNTERS); + papi_sde_register_counter(sde_handle, counter_names[i], &(events[i].value) ); + papi_sde_describe_counter(sde_handle, counter_names[i], counter_descriptions[i]); + } +} + +/* Define PAPI_DYNAMIC_SDE since we are assuming PAPI is linked dynamically. + * Note: In the future we should support both dynamic and static linking of PAPI. + */ +#define PAPI_DYNAMIC_SDE +/* This function will be called from papi_native_avail to list all of the OMPI + * events with their names and descriptions. In order for the dynamic version + * to work, the environment variable PAPI_SHARED_LIB must contain the full path + * to the PAPI shared library like the following: + * /path/to/papi/install/lib/libpapi.so + * + * This function will use dlsym to get the appropriate functions for initializing + * the PAPI sde component's environment and register all of the events. + */ +void* papi_sde_hook_list_events(void) +{ + int i, event_count = OMPI_NUM_COUNTERS; + char *error; + void *papi_handle; + void* (*sym_init)(char *name_of_library, int *event_count); + void (*sym_reg)( void *handle, char *event_name, long long *counter); + void (*sym_desc)(void *handle, char *event_name, char *event_description); + void *sde_handle = NULL; + + printf("papi_sde_hook_list_events\n"); + +#ifdef PAPI_DYNAMIC_SDE + printf("PAPI_DYNAMIC_SDE defined\n"); + fflush(stdout); + + char *path_to_papi = getenv("PAPI_SHARED_LIB"); + if(path_to_papi == NULL) + return NULL; + + printf("path_to_papi = %s\n", path_to_papi); + + papi_handle = dlopen(path_to_papi, RTLD_LOCAL | RTLD_LAZY); + if(!papi_handle){ + fputs(dlerror(), stderr); + exit(1); + } + printf("papi_handle opened\n"); + fflush(stdout); + + dlerror(); + sym_init = (void* (*)(char*, int*)) dlsym(papi_handle, "papi_sde_init"); + if((error = dlerror()) != NULL) { + fputs(error, stderr); + exit(1); + } + + sym_reg = (void (*)(void*, char*, long long int*)) dlsym(papi_handle, "papi_sde_register_counter"); + if((error = dlerror()) != NULL){ + fputs(error, stderr); + exit(1); + } + + sym_desc = (void (*)(void*, char*, char*)) dlsym(papi_handle, "papi_sde_describe_counter"); + if((error = dlerror()) != NULL){ + fputs(error, stderr); + exit(1); + } + + printf("symbols found\n"); + fflush(stdout); + + sde_handle = (void *) (*sym_init)("OMPI", &event_count); + printf("sde_handle opened\n"); + fflush(stdout); + if((error = dlerror()) != NULL){ + fputs(error, stderr); + exit(1); + } + + printf("sde_handle preparing to register\n"); + fflush(stdout); + + /* We need to register the counters so they can be printed in papi_native_avail + * Note: sde::: will be prepended to the names + */ + iter_start(); + for(i = 0; i < OMPI_NUM_COUNTERS; i++){ + printf("Registering: %s (%d of %d)\n", counter_names[i], i+1, OMPI_NUM_COUNTERS); + (*sym_reg)(sde_handle, counter_names[i], &(events[i].value)); + (*sym_desc)(sde_handle, counter_names[i], counter_descriptions[i]); + events[i].value = 0; + } +#endif + + printf("done papi_sde_hook_list_events %s %d\n", __FILE__, __LINE__); + return sde_handle; +} + +/* ############################################################## + * ############### End of PAPI sde Code ######################### + * ############################################################## + */ + +/* ############################################################## + * ############### Begin Utility Functions ###################### + * ############################################################## + */ + +/* Initializes the OMPI software events. The default functionality is to + * turn all of the counters on. + * Note: in the future, turning events on and off should be done through + * an MCA parameter. + */ +void ompi_sw_event_init() +{ + int i; + + iter_start(); + + /* Turn all counters on */ + for(i = 0; i < OMPI_NUM_COUNTERS; i++){ + attached_event[i] = 1; + } + + (void)mca_base_pvar_register("ompi", "opal", "software_events", counter_names[OMPI_SEND], counter_descriptions[OMPI_SEND], + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, + MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT, + MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, + ompi_sw_event_get_send, NULL, ompi_sw_event_notify, NULL); + + /* For initializing the PAPI sde component environment */ + ompi_sde_init(); +} + +/* Calls iter_release to free all of the OMPI software events data structures */ +void ompi_sw_event_fini() +{ + iter_release(); +} + +/* Records an update to a counter using an atomic add operation. */ +void ompi_sw_event_record(unsigned int event_id, long long value) +{ + if(OPAL_UNLIKELY(attached_event[event_id] == 1)){ + OPAL_THREAD_ADD64(&events[event_id].value, value); + } +} + +/* Starts microsecond-precision timer and stores the start value in usec */ +void ompi_sw_event_timer_start(unsigned int event_id, opal_timer_t *usec) +{ + /* Check whether usec == 0.0 to make sure the timer hasn't started yet */ + if(OPAL_UNLIKELY(attached_event[event_id] == 1 && *usec == 0)){ + *usec = opal_timer_base_get_usec(); + } +} + +/* Stops a microsecond-precision timer and calculates the total elapsed time + * based on the starting time in usec and putting the result in usec. + */ +void ompi_sw_event_timer_stop(unsigned int event_id, opal_timer_t *usec) +{ + if(OPAL_UNLIKELY(attached_event[event_id] == 1)){ + *usec = opal_timer_base_get_usec() - *usec; + OPAL_THREAD_ADD64(&events[event_id].value, (long long)*usec); + } +} + +/* A function to output the value of all of the counters. This is currently + * implemented in MPI_Finalize, but we need to find a better way for this to + * happen. + */ +void ompi_sw_event_print_all() +{ + /*int i, j, rank, world_size, offset; + long long *recv_buffer, *send_buffer; + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + + if(rank == 0){ + send_buffer = (long long*)malloc(OMPI_NUM_COUNTERS * sizeof(long long)); + recv_buffer = (long long*)malloc(world_size * OMPI_NUM_COUNTERS * sizeof(long long)); + for(i = 0; i < OMPI_NUM_COUNTERS; i++){ + send_buffer[i] = events[i].value; + } + MPI_Gather(send_buffer, OMPI_NUM_COUNTERS, MPI_LONG_LONG, recv_buffer, OMPI_NUM_COUNTERS, MPI_LONG_LONG, 0, MPI_COMM_WORLD); + } + else{ + send_buffer = (long long*)malloc(OMPI_NUM_COUNTERS * sizeof(long long)); + for(i = 0; i < OMPI_NUM_COUNTERS; i++){ + send_buffer[i] = events[i].value; + } + MPI_Gather(send_buffer, OMPI_NUM_COUNTERS, MPI_LONG_LONG, recv_buffer, OMPI_NUM_COUNTERS, MPI_LONG_LONG, 0, MPI_COMM_WORLD); + } + + if(rank == 0){ + fprintf(stdout, "OMPI Software Counters:\n"); + offset = 0; + for(j = 0; j < world_size; j++){ + fprintf(stdout, "World Rank %d:\n", j); + for(i = 0; i < OMPI_NUM_COUNTERS; i++){ + fprintf(stdout, "%s\t%lld\n", counter_names[offset+i], events[offset+i].value); + } + offset += OMPI_NUM_COUNTERS; + } + }*/ +} + diff --git a/opal/runtime/ompi_software_events.h b/opal/runtime/ompi_software_events.h new file mode 100644 index 00000000000..551b65de2d3 --- /dev/null +++ b/opal/runtime/ompi_software_events.h @@ -0,0 +1,138 @@ +#ifndef OMPI_SOFTWARE_EVENT +#define OMPI_SOFTWARE_EVENT + +#include +#include +#include +#include +#include +#include "ompi/include/ompi_config.h" +#include "opal/mca/timer/timer.h" +#include "opal/mca/base/mca_base_pvar.h" + +#include MCA_timer_IMPLEMENTATION_HEADER + +/* This enumeration serves as event ids for the various events */ +enum OMPI_COUNTERS{ + OMPI_SEND, + OMPI_RECV, + OMPI_ISEND, + OMPI_IRECV, + OMPI_BCAST, + OMPI_REDUCE, + OMPI_ALLREDUCE, + OMPI_SCATTER, + OMPI_GATHER, + OMPI_ALLTOALL, + OMPI_ALLGATHER, + OMPI_BYTES_RECEIVED_USER, + OMPI_BYTES_RECEIVED_MPI, + OMPI_BYTES_SENT_USER, + OMPI_BYTES_SENT_MPI, + OMPI_BYTES_PUT, + OMPI_BYTES_GET, + OMPI_UNEXPECTED, + OMPI_OUT_OF_SEQUENCE, + OMPI_MATCH_TIME, + OMPI_OOS_MATCH_TIME, + OMPI_PROGRESS_SWITCH, + OMPI_NUM_COUNTERS +}; + +/* A structure for storing the event data */ +typedef struct ompi_event_s{ + char *name; + long long value; +} ompi_event_t; + +/* Structure and helper functions for PAPI software_events component + * Note: This component is being superceded by the sde component. + */ +struct PAPI_SOFTWARE_EVENT_S{ + char name[32]; + int version[3]; + int (*iter_start)(void); + char* (*iter_next)(void); + int (*iter_release)(void); + int (*attach_event)(char*, long long**); + int (*detach_event)(char*); +}; + +int iter_start(void); +char* iter_next(void); +int iter_release(void); +int attach_event(char *name, long long **value); +int detach_event(char *name); + +/* End of PAPI software_events component stuff */ + +OMPI_DECLSPEC extern unsigned int attached_event[OMPI_NUM_COUNTERS]; +OMPI_DECLSPEC extern ompi_event_t *events; + +/* OMPI software event utility functions */ +void ompi_sw_event_init(void); +void ompi_sw_event_fini(void); +void ompi_sw_event_record(unsigned int event_id, long long value); +void ompi_sw_event_timer_start(unsigned int event_id, opal_timer_t *usec); +void ompi_sw_event_timer_stop(unsigned int event_id, opal_timer_t *usec); +void ompi_sw_event_print_all(void); + +/* MPI_T utility functions */ +static int ompi_sw_event_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, void *obj_handle, int *count); +long long ompi_sw_event_get_counter(int counter_id); +static int ompi_sw_event_get_send(const struct mca_base_pvar_t *pvar, void *value, void *obj_handle); + +/* Functions for the PAPI sde component */ +void ompi_sde_init(void); +/* PAPI sde component interface functions */ +typedef void* papi_handle_t; + +/* This should be defined at build time through an MCA parameter */ +#define SOFTWARE_EVENTS_ENABLE + +/* Macros for using the utility functions throughout the codebase. + * If SOFTWARE_EVENTS_ENABLE is not defined, the macros become no-ops. + */ +#ifdef SOFTWARE_EVENTS_ENABLE + +#define SW_EVENT_INIT() \ + ompi_sw_event_init() + +#define SW_EVENT_FINI() \ + ompi_sw_event_fini() + +#define SW_EVENT_RECORD(event_id, value) \ + ompi_sw_event_record(event_id, value) + +#define SW_EVENT_TIMER_START(event_id, usec) \ + ompi_sw_event_timer_start(event_id, usec) + +#define SW_EVENT_TIMER_STOP(event_id, usec) \ + ompi_sw_event_timer_stop(event_id, usec) + +#define SW_EVENT_PRINT_ALL() \ + ompi_sw_event_print_all() + +#else /* Software events are not enabled */ + +#define SW_EVENT_INIT() \ + do {} while (0) + +#define SW_EVENT_FINI() \ + do {} while (0) + +#define SW_EVENT_RECORD(event_id, value) \ + do {} while (0) + +#define SW_EVENT_TIMER_START(event_id, usec) \ + do {} while (0) + +#define SW_EVENT_TIMER_STOP(event_id, usec) \ + do {} while (0) + +#define SW_EVENT_PRINT_ALL() \ + do {} while (0) + +#endif + +#endif diff --git a/opal/threads/wait_sync.c b/opal/threads/wait_sync.c index 92b6096406c..c74a7ecaafe 100644 --- a/opal/threads/wait_sync.c +++ b/opal/threads/wait_sync.c @@ -94,8 +94,10 @@ int ompi_sync_wait_mt(ompi_wait_sync_t *sync) /* In case I am the progress manager, pass the duties on */ if( sync == wait_sync_list ) { wait_sync_list = (sync == sync->next) ? NULL : sync->next; - if( NULL != wait_sync_list ) + if( NULL != wait_sync_list ){ + /* This is a possible placement for a progress switch counter */ WAIT_SYNC_PASS_OWNERSHIP(wait_sync_list); + } } OPAL_THREAD_UNLOCK(&wait_sync_lock); From c402bb0abb815a506c8cbc6a712080b7bd06623b Mon Sep 17 00:00:00 2001 From: David Eberius Date: Wed, 28 Jun 2017 13:43:08 -0400 Subject: [PATCH 2/8] Added PAPI sde driver code to ensure there are existing weak symbols for the sde function calls even if the system does not have PAPI. --- opal/runtime/ompi_software_events.c | 3 +-- opal/runtime/ompi_software_events.h | 1 + opal/runtime/papi_sde_interface.c | 30 +++++++++++++++++++++++++++++ opal/runtime/papi_sde_interface.h | 15 +++++++++++++++ 4 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 opal/runtime/papi_sde_interface.c create mode 100644 opal/runtime/papi_sde_interface.h diff --git a/opal/runtime/ompi_software_events.c b/opal/runtime/ompi_software_events.c index 6289f9e21e7..0bbfd725eb9 100644 --- a/opal/runtime/ompi_software_events.c +++ b/opal/runtime/ompi_software_events.c @@ -98,8 +98,7 @@ int iter_start() if(events == NULL){ events = (ompi_event_t*)malloc(OMPI_NUM_COUNTERS * sizeof(ompi_event_t)); - } - else{ + } else { fprintf(stderr, "The events data structure has already been allocated.\n"); } diff --git a/opal/runtime/ompi_software_events.h b/opal/runtime/ompi_software_events.h index 551b65de2d3..4910a996163 100644 --- a/opal/runtime/ompi_software_events.h +++ b/opal/runtime/ompi_software_events.h @@ -5,6 +5,7 @@ #include #include #include + #include #include "ompi/include/ompi_config.h" #include "opal/mca/timer/timer.h" diff --git a/opal/runtime/papi_sde_interface.c b/opal/runtime/papi_sde_interface.c new file mode 100644 index 00000000000..7b0e86c83d2 --- /dev/null +++ b/opal/runtime/papi_sde_interface.c @@ -0,0 +1,30 @@ +#include +#include +#include "papi_sde_interface.h" + +#pragma weak papi_sde_init +#pragma weak papi_sde_register_counter +#pragma weak papi_sde_describe_counter + +OMPI_DECLSPEC papi_handle_t +__attribute__((weak)) +papi_sde_init(char *name_of_library, int *event_count) +{ + printf("weak papi_sde_init called from %s\n", __FILE__); + void * ptr = NULL; + return ptr; +} + +OMPI_DECLSPEC void +__attribute__((weak)) +papi_sde_register_counter(papi_handle_t handle, char *event_name, long long int *counter) +{ + printf("weak papi_sde_register_counter called from %s\n", __FILE__); +} + +OMPI_DECLSPEC void +__attribute__((weak)) +papi_sde_describe_counter(papi_handle_t handle, char *event_name, char *event_description) +{ + printf("weak papi_sde_describe_counter called from %s\n", __FILE__); +} diff --git a/opal/runtime/papi_sde_interface.h b/opal/runtime/papi_sde_interface.h new file mode 100644 index 00000000000..54154f8a186 --- /dev/null +++ b/opal/runtime/papi_sde_interface.h @@ -0,0 +1,15 @@ +#ifndef PAPI_SDE_INTERFACE_H +#define PAPI_SDE_INTERFACE_H + +#include "ompi/include/ompi_config.h" + +// interface to papi SDE functions +typedef void* papi_handle_t; +papi_handle_t papi_sde_init(char *name_of_library, int *event_count); +void papi_sde_register_counter(papi_handle_t handle, char *event_name, long long int *counter); +void papi_sde_describe_counter(papi_handle_t handle, char *event_name, char *event_description ); + +// required for papi_native_avail +void* papi_sde_hook_list_events( void ); + +#endif From b7b23a4c9089068cbb7d7b7143833f3f96347471 Mon Sep 17 00:00:00 2001 From: David Eberius Date: Fri, 7 Jul 2017 16:46:45 -0400 Subject: [PATCH 3/8] Temporarily removed MPI_T helper functions from being compiled since the MPI_T symbols aren't found in the OPAL level. --- opal/runtime/ompi_software_events.c | 8 +++++--- opal/runtime/ompi_software_events.h | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/opal/runtime/ompi_software_events.c b/opal/runtime/ompi_software_events.c index 0bbfd725eb9..7e78b7e5690 100644 --- a/opal/runtime/ompi_software_events.c +++ b/opal/runtime/ompi_software_events.c @@ -59,7 +59,7 @@ OMPI_DECLSPEC ompi_event_t *events = NULL; * ################# Begin MPI_T Functions ###################### * ############################################################## */ - +#if 0 static int ompi_sw_event_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, void *obj_handle, int *count) { (void)obj_handle; @@ -86,6 +86,8 @@ static int ompi_sw_event_get_send(const struct mca_base_pvar_t *pvar, void *valu return OPAL_SUCCESS; } +#endif + /* ############################################################## * ############ Begin PAPI software_events Code ################# * ############################################################## @@ -348,13 +350,13 @@ void ompi_sw_event_init() for(i = 0; i < OMPI_NUM_COUNTERS; i++){ attached_event[i] = 1; } - + /* (void)mca_base_pvar_register("ompi", "opal", "software_events", counter_names[OMPI_SEND], counter_descriptions[OMPI_SEND], OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, ompi_sw_event_get_send, NULL, ompi_sw_event_notify, NULL); - + */ /* For initializing the PAPI sde component environment */ ompi_sde_init(); } diff --git a/opal/runtime/ompi_software_events.h b/opal/runtime/ompi_software_events.h index 4910a996163..865991a42c6 100644 --- a/opal/runtime/ompi_software_events.h +++ b/opal/runtime/ompi_software_events.h @@ -6,7 +6,6 @@ #include #include -#include #include "ompi/include/ompi_config.h" #include "opal/mca/timer/timer.h" #include "opal/mca/base/mca_base_pvar.h" @@ -79,10 +78,11 @@ void ompi_sw_event_timer_stop(unsigned int event_id, opal_timer_t *usec); void ompi_sw_event_print_all(void); /* MPI_T utility functions */ +/* static int ompi_sw_event_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, void *obj_handle, int *count); long long ompi_sw_event_get_counter(int counter_id); static int ompi_sw_event_get_send(const struct mca_base_pvar_t *pvar, void *value, void *obj_handle); - +*/ /* Functions for the PAPI sde component */ void ompi_sde_init(void); /* PAPI sde component interface functions */ From bb842449c730c95fed92e63a6ae99e8f7793ff16 Mon Sep 17 00:00:00 2001 From: David Eberius Date: Tue, 11 Jul 2017 15:38:39 -0400 Subject: [PATCH 4/8] Moved the software counters driver code up to the ompi level. --- ompi/mca/pml/ob1/pml_ob1.c | 2 +- ompi/mca/pml/ob1/pml_ob1_isend.c | 2 +- ompi/mca/pml/ob1/pml_ob1_recvfrag.c | 2 +- ompi/mca/pml/ob1/pml_ob1_recvreq.c | 2 +- ompi/mca/pml/ob1/pml_ob1_sendreq.c | 2 +- ompi/mpi/c/allgather.c | 2 +- ompi/mpi/c/allreduce.c | 2 +- ompi/mpi/c/alltoall.c | 2 +- ompi/mpi/c/bcast.c | 2 +- ompi/mpi/c/finalize.c | 2 +- ompi/mpi/c/gather.c | 2 +- ompi/mpi/c/init.c | 2 +- ompi/mpi/c/init_thread.c | 2 +- ompi/mpi/c/irecv.c | 2 +- ompi/mpi/c/isend.c | 2 +- ompi/mpi/c/recv.c | 2 +- ompi/mpi/c/reduce.c | 2 +- ompi/mpi/c/scatter.c | 2 +- ompi/mpi/c/send.c | 2 +- ompi/runtime/Makefile.am | 12 ++++++++---- {opal => ompi}/runtime/ompi_software_events.c | 6 +++--- {opal => ompi}/runtime/ompi_software_events.h | 1 + {opal => ompi}/runtime/papi_sde_interface.c | 0 {opal => ompi}/runtime/papi_sde_interface.h | 0 opal/runtime/Makefile.am | 9 +++------ 25 files changed, 34 insertions(+), 32 deletions(-) rename {opal => ompi}/runtime/ompi_software_events.c (98%) rename {opal => ompi}/runtime/ompi_software_events.h (99%) rename {opal => ompi}/runtime/papi_sde_interface.c (100%) rename {opal => ompi}/runtime/papi_sde_interface.h (100%) diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index 1c0d55805ed..7eb2b82fe33 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -36,7 +36,7 @@ #include "opal_stdint.h" #include "opal/mca/btl/btl.h" #include "opal/mca/btl/base/base.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/base.h" diff --git a/ompi/mca/pml/ob1/pml_ob1_isend.c b/ompi/mca/pml/ob1/pml_ob1_isend.c index e107f97967c..d4c3efe1e1a 100644 --- a/ompi/mca/pml/ob1/pml_ob1_isend.c +++ b/ompi/mca/pml/ob1/pml_ob1_isend.c @@ -28,7 +28,7 @@ #include "pml_ob1_sendreq.h" #include "pml_ob1_recvreq.h" #include "ompi/peruse/peruse-internal.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" /** * Single usage request. As we allow recursive calls (as an diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index 9a08d1eab00..69629988224 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -39,7 +39,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/peruse/peruse-internal.h" #include "ompi/memchecker.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #include "pml_ob1.h" #include "pml_ob1_comm.h" diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index bded1d48fc9..75b484b73ca 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -29,7 +29,7 @@ #include "opal/mca/mpool/mpool.h" #include "opal/util/arch.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/bml/bml.h" #include "pml_ob1_comm.h" diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index 87351135f78..4d566767cde 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -29,7 +29,7 @@ #include "ompi_config.h" #include "opal/prefetch.h" #include "opal/mca/mpool/mpool.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #include "ompi/constants.h" #include "ompi/mca/pml/pml.h" #include "pml_ob1.h" diff --git a/ompi/mpi/c/allgather.c b/ompi/mpi/c/allgather.c index 5a05e2685df..07d62deb282 100644 --- a/ompi/mpi/c/allgather.c +++ b/ompi/mpi/c/allgather.c @@ -32,7 +32,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/c/allreduce.c b/ompi/mpi/c/allreduce.c index c38293667ff..f485e202965 100644 --- a/ompi/mpi/c/allreduce.c +++ b/ompi/mpi/c/allreduce.c @@ -31,7 +31,7 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" #include "ompi/memchecker.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/c/alltoall.c b/ompi/mpi/c/alltoall.c index 471b38e00e5..5001293d117 100644 --- a/ompi/mpi/c/alltoall.c +++ b/ompi/mpi/c/alltoall.c @@ -33,7 +33,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/c/bcast.c b/ompi/mpi/c/bcast.c index 7f4de50e561..b0ba0b8f8e9 100644 --- a/ompi/mpi/c/bcast.c +++ b/ompi/mpi/c/bcast.c @@ -26,7 +26,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/c/finalize.c b/ompi/mpi/c/finalize.c index 22ef9004c78..b35751fcebf 100644 --- a/ompi/mpi/c/finalize.c +++ b/ompi/mpi/c/finalize.c @@ -23,7 +23,7 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/errhandler/errhandler.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/c/gather.c b/ompi/mpi/c/gather.c index 1890d13cbf8..4fd06446609 100644 --- a/ompi/mpi/c/gather.c +++ b/ompi/mpi/c/gather.c @@ -32,7 +32,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/c/init.c b/ompi/mpi/c/init.c index 31c21b74961..472e9a9efcc 100644 --- a/ompi/mpi/c/init.c +++ b/ompi/mpi/c/init.c @@ -25,7 +25,7 @@ #include #include "opal/util/show_help.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #include "ompi/mpi/c/bindings.h" #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" diff --git a/ompi/mpi/c/init_thread.c b/ompi/mpi/c/init_thread.c index fba9d6371d0..061d6d99c4b 100644 --- a/ompi/mpi/c/init_thread.c +++ b/ompi/mpi/c/init_thread.c @@ -26,7 +26,7 @@ #include "ompi_config.h" #include "opal/util/show_help.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" diff --git a/ompi/mpi/c/irecv.c b/ompi/mpi/c/irecv.c index 42f0236bb9e..5b40bf0bf64 100644 --- a/ompi/mpi/c/irecv.c +++ b/ompi/mpi/c/irecv.c @@ -27,7 +27,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/request/request.h" #include "ompi/memchecker.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/c/isend.c b/ompi/mpi/c/isend.c index 18ce9b241ab..63d81dbdec6 100644 --- a/ompi/mpi/c/isend.c +++ b/ompi/mpi/c/isend.c @@ -31,7 +31,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/request/request.h" #include "ompi/memchecker.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/c/recv.c b/ompi/mpi/c/recv.c index 2c036f8b634..37504971b69 100644 --- a/ompi/mpi/c/recv.c +++ b/ompi/mpi/c/recv.c @@ -27,7 +27,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/memchecker.h" #include "ompi/request/request.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/c/reduce.c b/ompi/mpi/c/reduce.c index e9603a8dadf..76e0354a023 100644 --- a/ompi/mpi/c/reduce.c +++ b/ompi/mpi/c/reduce.c @@ -31,7 +31,7 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" #include "ompi/memchecker.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/c/scatter.c b/ompi/mpi/c/scatter.c index 44a318646e4..3564d233282 100644 --- a/ompi/mpi/c/scatter.c +++ b/ompi/mpi/c/scatter.c @@ -32,7 +32,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/c/send.c b/ompi/mpi/c/send.c index 7b73647dec5..2dee463be22 100644 --- a/ompi/mpi/c/send.c +++ b/ompi/mpi/c/send.c @@ -30,7 +30,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#include "opal/runtime/ompi_software_events.h" +#include "ompi/runtime/ompi_software_events.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/runtime/Makefile.am b/ompi/runtime/Makefile.am index 427abba2674..0aa5e999c32 100644 --- a/ompi/runtime/Makefile.am +++ b/ompi/runtime/Makefile.am @@ -25,9 +25,11 @@ dist_ompidata_DATA += runtime/help-mpi-runtime.txt headers += \ runtime/mpiruntime.h \ - runtime/ompi_cr.h \ + runtime/ompi_cr.h \ runtime/params.h \ - runtime/ompi_info_support.h + runtime/ompi_info_support.h \ + runtime/ompi_software_events.h \ + runtime/papi_sde_interface.h lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ runtime/ompi_mpi_abort.c \ @@ -36,5 +38,7 @@ lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ runtime/ompi_mpi_finalize.c \ runtime/ompi_mpi_params.c \ runtime/ompi_mpi_preconnect.c \ - runtime/ompi_cr.c \ - runtime/ompi_info_support.c + runtime/ompi_cr.c \ + runtime/ompi_info_support.c \ + runtime/ompi_software_events.c \ + runtime/papi_sde_interface.c diff --git a/opal/runtime/ompi_software_events.c b/ompi/runtime/ompi_software_events.c similarity index 98% rename from opal/runtime/ompi_software_events.c rename to ompi/runtime/ompi_software_events.c index 7e78b7e5690..dcc9ad78ca5 100644 --- a/opal/runtime/ompi_software_events.c +++ b/ompi/runtime/ompi_software_events.c @@ -83,7 +83,7 @@ static int ompi_sw_event_get_send(const struct mca_base_pvar_t *pvar, void *valu long long *counter_value = (long long*)value; *counter_value = ompi_sw_event_get_counter(OMPI_SEND); - return OPAL_SUCCESS; + return OMPI_SUCCESS; } #endif @@ -227,7 +227,7 @@ void ompi_sde_init() { /* Required registration of counters and optional counter descriptions */ for(i = 0; i < OMPI_NUM_COUNTERS; i++){ - printf("Registering: %s (%d of %d)\n", counter_names[i], i, OMPI_NUM_COUNTERS); + //printf("Registering: %s (%d of %d)\n", counter_names[i], i, OMPI_NUM_COUNTERS); papi_sde_register_counter(sde_handle, counter_names[i], &(events[i].value) ); papi_sde_describe_counter(sde_handle, counter_names[i], counter_descriptions[i]); } @@ -351,7 +351,7 @@ void ompi_sw_event_init() attached_event[i] = 1; } /* - (void)mca_base_pvar_register("ompi", "opal", "software_events", counter_names[OMPI_SEND], counter_descriptions[OMPI_SEND], + (void)mca_base_pvar_register("ompi", "runtime", "software_events", counter_names[OMPI_SEND], counter_descriptions[OMPI_SEND], OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, diff --git a/opal/runtime/ompi_software_events.h b/ompi/runtime/ompi_software_events.h similarity index 99% rename from opal/runtime/ompi_software_events.h rename to ompi/runtime/ompi_software_events.h index 865991a42c6..ca08490decc 100644 --- a/opal/runtime/ompi_software_events.h +++ b/ompi/runtime/ompi_software_events.h @@ -6,6 +6,7 @@ #include #include +#include "ompi/include/mpi.h" #include "ompi/include/ompi_config.h" #include "opal/mca/timer/timer.h" #include "opal/mca/base/mca_base_pvar.h" diff --git a/opal/runtime/papi_sde_interface.c b/ompi/runtime/papi_sde_interface.c similarity index 100% rename from opal/runtime/papi_sde_interface.c rename to ompi/runtime/papi_sde_interface.c diff --git a/opal/runtime/papi_sde_interface.h b/ompi/runtime/papi_sde_interface.h similarity index 100% rename from opal/runtime/papi_sde_interface.h rename to ompi/runtime/papi_sde_interface.h diff --git a/opal/runtime/Makefile.am b/opal/runtime/Makefile.am index 6ecb583e64f..5bef9322ae7 100644 --- a/opal/runtime/Makefile.am +++ b/opal/runtime/Makefile.am @@ -36,9 +36,7 @@ headers += \ runtime/opal_cr.h \ runtime/opal_info_support.h \ runtime/opal_params.h \ - runtime/opal_progress_threads.h \ - runtime/ompi_software_events.h \ - runtime/papi_sde_interface.h + runtime/opal_progress_threads.h lib@OPAL_LIB_PREFIX@open_pal_la_SOURCES += \ runtime/opal_progress.c \ @@ -47,6 +45,5 @@ lib@OPAL_LIB_PREFIX@open_pal_la_SOURCES += \ runtime/opal_params.c \ runtime/opal_cr.c \ runtime/opal_info_support.c \ - runtime/opal_progress_threads.c \ - runtime/ompi_software_events.c \ - runtime/papi_sde_interface.c + runtime/opal_progress_threads.c + From 5f668c505b1ae53cb9077791fbda679d96dbc516 Mon Sep 17 00:00:00 2001 From: David Eberius Date: Wed, 23 Aug 2017 16:16:32 -0400 Subject: [PATCH 5/8] Added a new USER_OR_MPI macro for determining which counter the bytes sent/received should be put into. Added some more protection into the code. Started work on getting rid of the loops in pml_ob1_sendreq.c. --- ompi/mca/bml/bml.h | 2 -- ompi/mca/pml/ob1/pml_ob1.c | 1 - ompi/mca/pml/ob1/pml_ob1_isend.c | 2 +- ompi/mca/pml/ob1/pml_ob1_recvfrag.c | 12 ++++----- ompi/mca/pml/ob1/pml_ob1_recvreq.c | 39 ++++------------------------- ompi/mca/pml/ob1/pml_ob1_sendreq.c | 35 +++++++++++++++++++++----- ompi/mpi/c/bcast.c | 7 ------ ompi/mpi/c/isend.c | 1 - ompi/mpi/c/recv.c | 1 - ompi/mpi/c/send.c | 1 - ompi/runtime/ompi_software_events.c | 34 +++++++++++++++++++++---- ompi/runtime/ompi_software_events.h | 11 ++++++-- opal/mca/btl/btl.h | 1 - opal/threads/wait_sync.c | 2 +- 14 files changed, 79 insertions(+), 70 deletions(-) diff --git a/ompi/mca/bml/bml.h b/ompi/mca/bml/bml.h index d48df9643d6..df731a64a04 100644 --- a/ompi/mca/bml/bml.h +++ b/ompi/mca/bml/bml.h @@ -286,7 +286,6 @@ static inline int mca_bml_base_send_status( mca_bml_base_btl_t* bml_btl, mca_btl_base_module_t* btl = bml_btl->btl; des->des_context = (void*) bml_btl; - return btl->btl_send(btl, bml_btl->btl_endpoint, des, tag); } @@ -301,7 +300,6 @@ static inline int mca_bml_base_sendi( mca_bml_base_btl_t* bml_btl, mca_btl_base_descriptor_t** descriptor ) { mca_btl_base_module_t* btl = bml_btl->btl; - return btl->btl_sendi(btl, bml_btl->btl_endpoint, convertor, header, header_size, payload_size, order, flags, tag, descriptor); diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index 7eb2b82fe33..fc2eed6793f 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -663,7 +663,6 @@ int mca_pml_ob1_send_fin( ompi_proc_t* proc, /* queue request */ rc = mca_bml_base_send( bml_btl, fin, MCA_PML_OB1_HDR_TYPE_FIN ); - if( OPAL_LIKELY( rc >= 0 ) ) { if( OPAL_LIKELY( 1 == rc ) ) { MCA_PML_OB1_PROGRESS_PENDING(bml_btl); diff --git a/ompi/mca/pml/ob1/pml_ob1_isend.c b/ompi/mca/pml/ob1/pml_ob1_isend.c index d4c3efe1e1a..d1f2289869a 100644 --- a/ompi/mca/pml/ob1/pml_ob1_isend.c +++ b/ompi/mca/pml/ob1/pml_ob1_isend.c @@ -121,7 +121,7 @@ static inline int mca_pml_ob1_send_inline (const void *buf, size_t count, size, MCA_BTL_NO_ORDER, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, MCA_PML_OB1_HDR_TYPE_MATCH, NULL); - if(rc == OPAL_SUCCESS){ + if(OPAL_LIKELY(rc == OPAL_SUCCESS)){ if(tag >= 0){ SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, size); } diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index 69629988224..22fa681002f 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -235,8 +235,7 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, if(match->req_recv.req_base.req_tag >= 0){ SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_USER, (long long)(bytes_received)); - } - else{ + } else { SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_MPI, (long long)(bytes_received)); } @@ -545,7 +544,9 @@ match_one(mca_btl_base_module_t *btl, mca_pml_ob1_comm_proc_t *proc, mca_pml_ob1_recv_frag_t* frag) { +#ifdef SOFTWARE_EVENTS_ENABLE opal_timer_t usecs = 0; +#endif SW_EVENT_TIMER_START(OMPI_MATCH_TIME, &usecs); mca_pml_ob1_recv_request_t *match; @@ -586,7 +587,6 @@ match_one(mca_btl_base_module_t *btl, /* this frag is already processed, so we want to break out of the loop and not end up back on the unexpected queue. */ SW_EVENT_TIMER_STOP(OMPI_MATCH_TIME, &usecs); - /*SW_EVENT_RECORD(OMPI_MATCH_TIME, (long long)usecs);*/ return NULL; } @@ -594,7 +594,6 @@ match_one(mca_btl_base_module_t *btl, PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_MSG_MATCH_POSTED_REQ, &(match->req_recv.req_base), PERUSE_RECV); SW_EVENT_TIMER_STOP(OMPI_MATCH_TIME, &usecs); - /*SW_EVENT_RECORD(OMPI_MATCH_TIME, (long long)usecs);*/ return match; } @@ -608,7 +607,6 @@ match_one(mca_btl_base_module_t *btl, PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm_ptr, hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); SW_EVENT_TIMER_STOP(OMPI_MATCH_TIME, &usecs); - /*SW_EVENT_RECORD(OMPI_MATCH_TIME, (long long)usecs);*/ return NULL; } while(true); @@ -617,7 +615,9 @@ match_one(mca_btl_base_module_t *btl, static mca_pml_ob1_recv_frag_t* check_cantmatch_for_match(mca_pml_ob1_comm_proc_t *proc) { mca_pml_ob1_recv_frag_t *frag; +#ifdef SOFTWARE_EVENTS_ENABLE opal_timer_t usecs = 0; +#endif SW_EVENT_TIMER_START(OMPI_OOS_MATCH_TIME, &usecs); @@ -638,13 +638,11 @@ static mca_pml_ob1_recv_frag_t* check_cantmatch_for_match(mca_pml_ob1_comm_proc_ opal_list_remove_item(&proc->frags_cant_match, (opal_list_item_t*)frag); SW_EVENT_TIMER_STOP(OMPI_OOS_MATCH_TIME, &usecs); - /*SW_EVENT_RECORD(OMPI_OOS_MATCH_TIME, (long long)usecs);*/ return frag; } SW_EVENT_TIMER_STOP(OMPI_OOS_MATCH_TIME, &usecs); - /*SW_EVENT_RECORD(OMPI_OOS_MATCH_TIME, (long long)usecs);*/ return NULL; } diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 75b484b73ca..f72178dfd5a 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -243,6 +243,7 @@ int mca_pml_ob1_recv_request_ack_send_btl( des->des_cbfunc = mca_pml_ob1_recv_ctl_completion; rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_ACK); + SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_MPI, (long long)size); if( OPAL_LIKELY( rc >= 0 ) ) { return OMPI_SUCCESS; @@ -432,28 +433,8 @@ static int mca_pml_ob1_recv_request_put_frag (mca_pml_ob1_rdma_frag_t *frag) /* send rdma request to peer */ rc = mca_bml_base_send (bml_btl, ctl, MCA_PML_OB1_HDR_TYPE_PUT); -#ifdef SOFTWARE_EVENTS_ENABLE - volatile int64_t bytes_sent; - unsigned int i; - if(attached_event[OMPI_BYTES_SENT_USER] == 1){ - if(recvreq->req_recv.req_base.req_tag >= 0){ - bytes_sent = 0; - for(i = 0; i < ctl->des_segment_count; i++){ - bytes_sent += ctl->des_segments[i].seg_len; - } - SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, bytes_sent); - } - } - if(attached_event[OMPI_BYTES_SENT_MPI] == 1){ - if(recvreq->req_recv.req_base.req_tag < 0){ - bytes_sent = 0; - for(i = 0; i < ctl->des_segment_count; i++){ - bytes_sent += ctl->des_segments[i].seg_len; - } - SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, bytes_sent); - } - } -#endif + /* Increment counter for bytes_put even though they probably haven't all been received yet */ + SW_EVENT_RECORD(OMPI_BYTES_PUT, frag->rdma_length); if (OPAL_UNLIKELY(rc < 0)) { mca_bml_base_free (bml_btl, ctl); @@ -556,12 +537,7 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); - if(recvreq->req_recv.req_base.req_tag >= 0){ - SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_USER, (long long)bytes_received); - } - else{ - SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_MPI, (long long)bytes_received); - } + SW_EVENT_USER_OR_MPI(recvreq->req_recv.req_base.req_tag, (long long)bytes_received, OMPI_BYTES_RECEIVED_USER, OMPI_BYTES_RECEIVED_MPI); /* check completion status */ if(recv_request_pml_complete_check(recvreq) == false && @@ -925,12 +901,7 @@ void mca_pml_ob1_recv_request_progress_match( mca_pml_ob1_recv_request_t* recvre */ recvreq->req_bytes_received += bytes_received; - if(recvreq->req_recv.req_base.req_tag >= 0){ - SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_USER, (long long)bytes_received); - } - else{ - SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_MPI, (long long)bytes_received); - } + SW_EVENT_USER_OR_MPI(recvreq->req_recv.req_base.req_tag, (long long)bytes_received, OMPI_BYTES_RECEIVED_USER, OMPI_BYTES_RECEIVED_MPI); recv_request_pml_complete(recvreq); } diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index 4d566767cde..fe1fde464b4 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -451,15 +451,19 @@ int mca_pml_ob1_send_request_start_buffered( /* send */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RNDV); + /*SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, sendreq->req_bytes_delivered + req_bytes_delivered, + OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI);*/ + #ifdef SOFTWARE_EVENTS_ENABLE volatile int64_t bytes_sent; unsigned int i; if(attached_event[OMPI_BYTES_SENT_USER] == 1){ if(sendreq->req_send.req_base.req_tag >= 0){ - bytes_sent = 0; - for(i = 0; i < des->des_segment_count; i++){ - bytes_sent += des->des_segments[i].seg_len; - } + bytes_sent = 0; + for(i = 0; i < des->des_segment_count; i++){ + bytes_sent += des->des_segments[i].seg_len; + } + bytes_sent = sendreq->req_bytes_delivered; SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, bytes_sent); } } @@ -469,6 +473,7 @@ int mca_pml_ob1_send_request_start_buffered( for(i = 0; i < des->des_segment_count; i++){ bytes_sent += des->des_segments[i].seg_len; } + bytes_sent = sendreq->req_bytes_delivered; SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, bytes_sent); } } @@ -522,12 +527,13 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, &des); if(rc == OPAL_SUCCESS){ - if(sendreq->req_send.req_base.req_tag >= 0){ + /*if(sendreq->req_send.req_base.req_tag >= 0){ SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, size); } else{ SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, size); - } + }*/ + SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, size, OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI); } if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) { @@ -603,6 +609,11 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, /* send */ rc = mca_bml_base_send_status(bml_btl, des, MCA_PML_OB1_HDR_TYPE_MATCH); + /*SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, sendreq->req_bytes_delivered - OMPI_PML_OB1_MATCH_HDR_LEN, + OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI);*/ + /*SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, size, + OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI);*/ + #ifdef SOFTWARE_EVENTS_ENABLE volatile int64_t bytes_sent; unsigned int i; @@ -687,6 +698,9 @@ int mca_pml_ob1_send_request_start_prepare( mca_pml_ob1_send_request_t* sendreq, /* send */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_MATCH); + /*SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, size,//sendreq->req_bytes_delivered, + OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI);*/ + #ifdef SOFTWARE_EVENTS_ENABLE volatile int64_t bytes_sent; unsigned int i; @@ -815,6 +829,9 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq, /* send */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RGET); + /*SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, sendreq->req_bytes_delivered, + OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI);*/ + #ifdef SOFTWARE_EVENTS_ENABLE volatile int64_t bytes_sent; unsigned int i; @@ -919,6 +936,9 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq, /* send */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RNDV); + /*SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, sendreq->req_bytes_delivered, + OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI);*/ + #ifdef SOFTWARE_EVENTS_ENABLE volatile int64_t bytes_sent; unsigned int i; @@ -1187,6 +1207,9 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) /* initiate send - note that this may complete before the call returns */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_FRAG); + /*SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, sendreq->req_bytes_delivered, + OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI);*/ + #ifdef SOFTWARE_EVENTS_ENABLE volatile int64_t bytes_sent; unsigned int i; diff --git a/ompi/mpi/c/bcast.c b/ompi/mpi/c/bcast.c index b0ba0b8f8e9..ea0613e4ad6 100644 --- a/ompi/mpi/c/bcast.c +++ b/ompi/mpi/c/bcast.c @@ -113,12 +113,5 @@ int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, err = comm->c_coll->coll_bcast(buffer, count, datatype, root, comm, comm->c_coll->coll_bcast_module); - if(ompi_comm_rank(comm) == root){ - SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, count * sizeof(datatype) * ((int)ompi_comm_size(comm)-1)); - } - else{ - SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_MPI, count * sizeof(datatype)); - } - OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/isend.c b/ompi/mpi/c/isend.c index 63d81dbdec6..fe808f3e4a4 100644 --- a/ompi/mpi/c/isend.c +++ b/ompi/mpi/c/isend.c @@ -93,7 +93,6 @@ int MPI_Isend(const void *buf, int count, MPI_Datatype type, int dest, rc = MCA_PML_CALL(isend(buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm, request)); - OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/recv.c b/ompi/mpi/c/recv.c index 37504971b69..118ca9e8395 100644 --- a/ompi/mpi/c/recv.c +++ b/ompi/mpi/c/recv.c @@ -79,6 +79,5 @@ int MPI_Recv(void *buf, int count, MPI_Datatype type, int source, OPAL_CR_ENTER_LIBRARY(); rc = MCA_PML_CALL(recv(buf, count, type, source, tag, comm, status)); - OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/send.c b/ompi/mpi/c/send.c index 2dee463be22..b85b6803657 100644 --- a/ompi/mpi/c/send.c +++ b/ompi/mpi/c/send.c @@ -78,6 +78,5 @@ int MPI_Send(const void *buf, int count, MPI_Datatype type, int dest, OPAL_CR_ENTER_LIBRARY(); rc = MCA_PML_CALL(send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm)); - OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/runtime/ompi_software_events.c b/ompi/runtime/ompi_software_events.c index dcc9ad78ca5..100dd51acbf 100644 --- a/ompi/runtime/ompi_software_events.c +++ b/ompi/runtime/ompi_software_events.c @@ -1,5 +1,7 @@ #include "ompi_software_events.h" +opal_timer_t sys_clock_freq_mhz = 0; + OMPI_DECLSPEC const char *counter_names[OMPI_NUM_COUNTERS] = { "OMPI_SEND", "OMPI_RECV", @@ -59,14 +61,13 @@ OMPI_DECLSPEC ompi_event_t *events = NULL; * ################# Begin MPI_T Functions ###################### * ############################################################## */ -#if 0 static int ompi_sw_event_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, void *obj_handle, int *count) { (void)obj_handle; if(MCA_BASE_PVAR_HANDLE_BIND == event) *count = 1; - return OPAL_SUCCESS; + return MPI_SUCCESS; } inline long long ompi_sw_event_get_counter(int counter_id) @@ -83,11 +84,9 @@ static int ompi_sw_event_get_send(const struct mca_base_pvar_t *pvar, void *valu long long *counter_value = (long long*)value; *counter_value = ompi_sw_event_get_counter(OMPI_SEND); - return OMPI_SUCCESS; + return MPI_SUCCESS; } -#endif - /* ############################################################## * ############ Begin PAPI software_events Code ################# * ############################################################## @@ -343,7 +342,18 @@ void* papi_sde_hook_list_events(void) void ompi_sw_event_init() { int i; + /* +#if OPAL_HAVE_SYS_TIMER_GET_CYCLES + printf("OPAL_HAVE_SYS_TIMER_GET_CYCLES defined\n"); +#endif +#if OPAL_HAVE_CLOCK_GETTIME + printf("OPAL_HAVE_CLOCK_GETTIME defined\n"); +#endif + printf("Clock Frequency: %d Hz\n", (int)opal_timer_base_get_freq()); + sys_clock_freq_mhz = 2300;//opal_timer_base_get_freq() / 1000000; + printf("Clock Frequency (converted): %d MHz\n", (int)sys_clock_freq_mhz); +*/ iter_start(); /* Turn all counters on */ @@ -380,6 +390,7 @@ void ompi_sw_event_timer_start(unsigned int event_id, opal_timer_t *usec) { /* Check whether usec == 0.0 to make sure the timer hasn't started yet */ if(OPAL_UNLIKELY(attached_event[event_id] == 1 && *usec == 0)){ + //*usec = opal_timer_base_get_cycles(); *usec = opal_timer_base_get_usec(); } } @@ -390,11 +401,24 @@ void ompi_sw_event_timer_start(unsigned int event_id, opal_timer_t *usec) void ompi_sw_event_timer_stop(unsigned int event_id, opal_timer_t *usec) { if(OPAL_UNLIKELY(attached_event[event_id] == 1)){ + //*usec = (opal_timer_base_get_cycles() - *usec) / sys_clock_freq_mhz; *usec = opal_timer_base_get_usec() - *usec; OPAL_THREAD_ADD64(&events[event_id].value, (long long)*usec); } } +/* Checks a tag, and records the user version of the counter if it's greater + * than or equal to 0 and records the mpi version of the counter otherwise. + */ +void ompi_sw_event_user_or_mpi(int tag, long long value, unsigned int user_enum, unsigned int mpi_enum) +{ + if(tag >= 0){ + SW_EVENT_RECORD(user_enum, value); + } else { + SW_EVENT_RECORD(mpi_enum, value); + } +} + /* A function to output the value of all of the counters. This is currently * implemented in MPI_Finalize, but we need to find a better way for this to * happen. diff --git a/ompi/runtime/ompi_software_events.h b/ompi/runtime/ompi_software_events.h index ca08490decc..198b196f4e3 100644 --- a/ompi/runtime/ompi_software_events.h +++ b/ompi/runtime/ompi_software_events.h @@ -76,14 +76,15 @@ void ompi_sw_event_fini(void); void ompi_sw_event_record(unsigned int event_id, long long value); void ompi_sw_event_timer_start(unsigned int event_id, opal_timer_t *usec); void ompi_sw_event_timer_stop(unsigned int event_id, opal_timer_t *usec); +void ompi_sw_event_user_or_mpi(int tag, long long value, unsigned int user_enum, unsigned int mpi_enum); void ompi_sw_event_print_all(void); /* MPI_T utility functions */ -/* + static int ompi_sw_event_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, void *obj_handle, int *count); long long ompi_sw_event_get_counter(int counter_id); static int ompi_sw_event_get_send(const struct mca_base_pvar_t *pvar, void *value, void *obj_handle); -*/ + /* Functions for the PAPI sde component */ void ompi_sde_init(void); /* PAPI sde component interface functions */ @@ -112,6 +113,9 @@ typedef void* papi_handle_t; #define SW_EVENT_TIMER_STOP(event_id, usec) \ ompi_sw_event_timer_stop(event_id, usec) +#define SW_EVENT_USER_OR_MPI(tag, value, enum_if_user, enum_if_mpi) \ + ompi_sw_event_user_or_mpi(tag, value, enum_if_user, enum_if_mpi) + #define SW_EVENT_PRINT_ALL() \ ompi_sw_event_print_all() @@ -132,6 +136,9 @@ typedef void* papi_handle_t; #define SW_EVENT_TIMER_STOP(event_id, usec) \ do {} while (0) +#define SW_EVENT_USER_OR_MPI(tag, value, enum_if_user, enum_if_mpi) \ + do {} while (0) + #define SW_EVENT_PRINT_ALL() \ do {} while (0) diff --git a/opal/mca/btl/btl.h b/opal/mca/btl/btl.h index afec7bd1d10..48564b573ed 100644 --- a/opal/mca/btl/btl.h +++ b/opal/mca/btl/btl.h @@ -447,7 +447,6 @@ typedef struct mca_btl_base_segment_t mca_btl_base_segment_t; * des_segments */ - struct mca_btl_base_descriptor_t { opal_free_list_item_t super; mca_btl_base_segment_t *des_segments; /**< local segments */ diff --git a/opal/threads/wait_sync.c b/opal/threads/wait_sync.c index c74a7ecaafe..9b5d75ece71 100644 --- a/opal/threads/wait_sync.c +++ b/opal/threads/wait_sync.c @@ -95,7 +95,7 @@ int ompi_sync_wait_mt(ompi_wait_sync_t *sync) if( sync == wait_sync_list ) { wait_sync_list = (sync == sync->next) ? NULL : sync->next; if( NULL != wait_sync_list ){ - /* This is a possible placement for a progress switch counter */ + /* This is a possible placement for an MPI_T progress switch counter */ WAIT_SYNC_PASS_OWNERSHIP(wait_sync_list); } } From daaf7c13f300dd7632e4e4c0117c0da8e554c70c Mon Sep 17 00:00:00 2001 From: David Eberius Date: Thu, 12 Oct 2017 18:47:08 -0400 Subject: [PATCH 6/8] Switched the timer units over to cycles to avoid division operations. Moved the bytes sent/received counters to reflect when ompi updates its own internal counters. Cleaned up some unnecessary print statements in the papi sde code. --- ompi/mca/pml/ob1/pml_ob1_recvfrag.c | 6 +- ompi/mca/pml/ob1/pml_ob1_recvreq.c | 7 +- ompi/mca/pml/ob1/pml_ob1_sendreq.c | 181 ++-------------------------- ompi/mpi/c/finalize.c | 8 -- ompi/runtime/ompi_software_events.c | 30 ++--- ompi/runtime/ompi_software_events.h | 1 - ompi/runtime/papi_sde_interface.c | 6 +- 7 files changed, 38 insertions(+), 201 deletions(-) diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index 22fa681002f..318c00949b6 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -233,11 +233,7 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, &bytes_received ); match->req_bytes_received = bytes_received; - if(match->req_recv.req_base.req_tag >= 0){ - SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_USER, (long long)(bytes_received)); - } else { - SW_EVENT_RECORD(OMPI_BYTES_RECEIVED_MPI, (long long)(bytes_received)); - } + SW_EVENT_USER_OR_MPI(match->req_recv.req_base.req_tag, (long long)bytes_received, OMPI_BYTES_RECEIVED_USER, OMPI_BYTES_RECEIVED_MPI); /* * Unpacking finished, make the user buffer unaccessable again. diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index f72178dfd5a..d4d2cba8176 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -199,7 +199,10 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r assert ((uint64_t) rdma_size == frag->rdma_length); /* check completion status */ + OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, rdma_size); + SW_EVENT_USER_OR_MPI(recvreq->req_recv.req_base.req_tag, (long long)rdma_size, OMPI_BYTES_RECEIVED_USER, OMPI_BYTES_RECEIVED_MPI); + if (recv_request_pml_complete_check(recvreq) == false && recvreq->req_rdma_offset < recvreq->req_send_offset) { /* schedule additional rdma operations */ @@ -377,6 +380,7 @@ static void mca_pml_ob1_rget_completion (mca_btl_base_module_t* btl, struct mca_ } else { /* is receive request complete */ OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length); + SW_EVENT_USER_OR_MPI(recvreq->req_recv.req_base.req_tag, (long long)frag->rdma_length, OMPI_BYTES_RECEIVED_USER, OMPI_BYTES_RECEIVED_MPI); /* TODO: re-add order */ mca_pml_ob1_send_fin (recvreq->req_recv.req_base.req_proc, bml_btl, frag->rdma_hdr.hdr_rget.hdr_frag, @@ -536,7 +540,6 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq ); OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); - SW_EVENT_USER_OR_MPI(recvreq->req_recv.req_base.req_tag, (long long)bytes_received, OMPI_BYTES_RECEIVED_USER, OMPI_BYTES_RECEIVED_MPI); /* check completion status */ @@ -616,6 +619,7 @@ void mca_pml_ob1_recv_request_frag_copy_finished( mca_btl_base_module_t* btl, des->des_cbfunc(NULL, NULL, des, 0); OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); + SW_EVENT_USER_OR_MPI(recvreq->req_recv.req_base.req_tag, (long long)bytes_received, OMPI_BYTES_RECEIVED_USER, OMPI_BYTES_RECEIVED_MPI); /* check completion status */ if(recv_request_pml_complete_check(recvreq) == false && @@ -830,6 +834,7 @@ void mca_pml_ob1_recv_request_progress_rndv( mca_pml_ob1_recv_request_t* recvreq recvreq->req_recv.req_base.req_datatype); ); OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); + SW_EVENT_USER_OR_MPI(recvreq->req_recv.req_base.req_tag, (long long)bytes_received, OMPI_BYTES_RECEIVED_USER, OMPI_BYTES_RECEIVED_MPI); } /* check completion status */ if(recv_request_pml_complete_check(recvreq) == false && diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index fe1fde464b4..3e2f4e1582f 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -206,6 +206,8 @@ mca_pml_ob1_rndv_completion_request( mca_bml_base_btl_t* bml_btl, } OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); + SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, req_bytes_delivered, + OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI); /* advance the request */ OPAL_THREAD_ADD32(&sendreq->req_state, -1); @@ -262,6 +264,8 @@ mca_pml_ob1_rget_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_length) /* count bytes of user data actually delivered and check for request completion */ if (OPAL_LIKELY(0 < rdma_length)) { OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length); + SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, rdma_length, + OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI); } send_request_pml_complete_check(sendreq); @@ -315,6 +319,8 @@ mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl, OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, -1); OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); + SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, req_bytes_delivered, + OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI); if(send_request_pml_complete_check(sendreq) == false) { mca_pml_ob1_send_request_schedule(sendreq); @@ -451,34 +457,6 @@ int mca_pml_ob1_send_request_start_buffered( /* send */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RNDV); - /*SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, sendreq->req_bytes_delivered + req_bytes_delivered, - OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI);*/ - -#ifdef SOFTWARE_EVENTS_ENABLE - volatile int64_t bytes_sent; - unsigned int i; - if(attached_event[OMPI_BYTES_SENT_USER] == 1){ - if(sendreq->req_send.req_base.req_tag >= 0){ - bytes_sent = 0; - for(i = 0; i < des->des_segment_count; i++){ - bytes_sent += des->des_segments[i].seg_len; - } - bytes_sent = sendreq->req_bytes_delivered; - SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, bytes_sent); - } - } - if(attached_event[OMPI_BYTES_SENT_MPI] == 1){ - if(sendreq->req_send.req_base.req_tag < 0){ - bytes_sent = 0; - for(i = 0; i < des->des_segment_count; i++){ - bytes_sent += des->des_segments[i].seg_len; - } - bytes_sent = sendreq->req_bytes_delivered; - SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, bytes_sent); - } - } -#endif - if( OPAL_LIKELY( rc >= 0 ) ) { if( OPAL_LIKELY( 1 == rc ) ) { mca_pml_ob1_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered); @@ -526,18 +504,9 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, MCA_PML_OB1_HDR_TYPE_MATCH, &des); - if(rc == OPAL_SUCCESS){ - /*if(sendreq->req_send.req_base.req_tag >= 0){ - SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, size); - } - else{ - SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, size); - }*/ - SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, size, OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI); - } - if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) { /* signal request completion */ + SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, size, OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI); send_request_pml_complete(sendreq); return OMPI_SUCCESS; } @@ -609,33 +578,8 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, /* send */ rc = mca_bml_base_send_status(bml_btl, des, MCA_PML_OB1_HDR_TYPE_MATCH); - /*SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, sendreq->req_bytes_delivered - OMPI_PML_OB1_MATCH_HDR_LEN, - OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI);*/ - /*SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, size, - OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI);*/ - -#ifdef SOFTWARE_EVENTS_ENABLE - volatile int64_t bytes_sent; - unsigned int i; - if(attached_event[OMPI_BYTES_SENT_USER] == 1){ - if(sendreq->req_send.req_base.req_tag >= 0){ - bytes_sent = 0; - for(i = 0; i < des->des_segment_count; i++){ - bytes_sent += des->des_segments[i].seg_len; - } - SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, bytes_sent - OMPI_PML_OB1_MATCH_HDR_LEN); - } - } - if(attached_event[OMPI_BYTES_SENT_MPI] == 1){ - if(sendreq->req_send.req_base.req_tag < 0){ - bytes_sent = 0; - for(i = 0; i < des->des_segment_count; i++){ - bytes_sent += des->des_segments[i].seg_len; - } - SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, bytes_sent - OMPI_PML_OB1_MATCH_HDR_LEN); - } - } -#endif + SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, size, + OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI); if( OPAL_LIKELY( rc >= OPAL_SUCCESS ) ) { if( OPAL_LIKELY( 1 == rc ) ) { @@ -698,31 +642,8 @@ int mca_pml_ob1_send_request_start_prepare( mca_pml_ob1_send_request_t* sendreq, /* send */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_MATCH); - /*SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, size,//sendreq->req_bytes_delivered, - OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI);*/ - -#ifdef SOFTWARE_EVENTS_ENABLE - volatile int64_t bytes_sent; - unsigned int i; - if(attached_event[OMPI_BYTES_SENT_USER] == 1){ - if(sendreq->req_send.req_base.req_tag >= 0){ - bytes_sent = 0; - for(i = 0; i < des->des_segment_count; i++){ - bytes_sent += des->des_segments[i].seg_len; - } - SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, bytes_sent); - } - } - if(attached_event[OMPI_BYTES_SENT_MPI] == 1){ - if(sendreq->req_send.req_base.req_tag < 0){ - bytes_sent = 0; - for(i = 0; i < des->des_segment_count; i++){ - bytes_sent += des->des_segments[i].seg_len; - } - SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, bytes_sent); - } - } -#endif + SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, size, + OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI); if( OPAL_LIKELY( rc >= OPAL_SUCCESS ) ) { if( OPAL_LIKELY( 1 == rc ) ) { @@ -829,32 +750,6 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq, /* send */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RGET); - /*SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, sendreq->req_bytes_delivered, - OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI);*/ - -#ifdef SOFTWARE_EVENTS_ENABLE - volatile int64_t bytes_sent; - unsigned int i; - if(attached_event[OMPI_BYTES_SENT_USER] == 1){ - if(sendreq->req_send.req_base.req_tag >= 0){ - bytes_sent = 0; - for(i = 0; i < des->des_segment_count; i++){ - bytes_sent += des->des_segments[i].seg_len; - } - SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, bytes_sent); - } - } - if(attached_event[OMPI_BYTES_SENT_MPI] == 1){ - if(sendreq->req_send.req_base.req_tag < 0){ - bytes_sent = 0; - for(i = 0; i < des->des_segment_count; i++){ - bytes_sent += des->des_segments[i].seg_len; - } - SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, bytes_sent); - } - } -#endif - if (OPAL_UNLIKELY(rc < 0)) { mca_bml_base_free(bml_btl, des); return rc; @@ -936,32 +831,6 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq, /* send */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RNDV); - /*SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, sendreq->req_bytes_delivered, - OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI);*/ - -#ifdef SOFTWARE_EVENTS_ENABLE - volatile int64_t bytes_sent; - unsigned int i; - if(attached_event[OMPI_BYTES_SENT_USER] == 1){ - if(sendreq->req_send.req_base.req_tag >= 0){ - bytes_sent = 0; - for(i = 0; i < des->des_segment_count; i++){ - bytes_sent += des->des_segments[i].seg_len; - } - SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, bytes_sent); - } - } - if(attached_event[OMPI_BYTES_SENT_MPI] == 1){ - if(sendreq->req_send.req_base.req_tag < 0){ - bytes_sent = 0; - for(i = 0; i < des->des_segment_count; i++){ - bytes_sent += des->des_segments[i].seg_len; - } - SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, bytes_sent); - } - } -#endif - if( OPAL_LIKELY( rc >= 0 ) ) { if( OPAL_LIKELY( 1 == rc ) ) { mca_pml_ob1_rndv_completion_request( bml_btl, sendreq, size ); @@ -1207,32 +1076,6 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) /* initiate send - note that this may complete before the call returns */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_FRAG); - /*SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, sendreq->req_bytes_delivered, - OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI);*/ - -#ifdef SOFTWARE_EVENTS_ENABLE - volatile int64_t bytes_sent; - unsigned int i; - if(attached_event[OMPI_BYTES_SENT_USER] == 1){ - if(sendreq->req_send.req_base.req_tag >= 0){ - bytes_sent = 0; - for(i = 0; i < des->des_segment_count; i++){ - bytes_sent += des->des_segments[i].seg_len; - } - SW_EVENT_RECORD(OMPI_BYTES_SENT_USER, bytes_sent); - } - } - if(attached_event[OMPI_BYTES_SENT_MPI] == 1){ - if(sendreq->req_send.req_base.req_tag < 0){ - bytes_sent = 0; - for(i = 0; i < des->des_segment_count; i++){ - bytes_sent += des->des_segments[i].seg_len; - } - SW_EVENT_RECORD(OMPI_BYTES_SENT_MPI, bytes_sent); - } - } -#endif - if( OPAL_LIKELY(rc >= 0) ) { /* update state */ range->range_btls[btl_idx].length -= size; @@ -1305,6 +1148,8 @@ static void mca_pml_ob1_put_completion (mca_btl_base_module_t* btl, struct mca_b /* check for request completion */ OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); + SW_EVENT_USER_OR_MPI(sendreq->req_send.req_base.req_tag, frag->rdma_length, + OMPI_BYTES_SENT_USER, OMPI_BYTES_SENT_MPI); send_request_pml_complete_check(sendreq); } else { diff --git a/ompi/mpi/c/finalize.c b/ompi/mpi/c/finalize.c index b35751fcebf..02f21433c00 100644 --- a/ompi/mpi/c/finalize.c +++ b/ompi/mpi/c/finalize.c @@ -41,7 +41,6 @@ int MPI_Finalize(void) int i, j, rank, world_size, offset; long long *recv_buffer, *send_buffer; char *filename; - FILE *fptr; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &world_size); @@ -64,26 +63,19 @@ int MPI_Finalize(void) if(rank == 0){ asprintf(&filename, "sw_events_output_XXXXXX"); - filename = mktemp(filename); - fptr = fopen(filename, "w+"); - - fprintf(fptr, "%d %d\n", world_size, OMPI_NUM_COUNTERS); fprintf(stdout, "OMPI Software Counters:\n"); offset = 0; for(j = 0; j < world_size; j++){ fprintf(stdout, "World Rank %d:\n", j); - fprintf(fptr, "%d\n", j); for(i = 0; i < OMPI_NUM_COUNTERS; i++){ fprintf(stdout, "%s -> %lld\n", events[i].name, recv_buffer[offset+i]); - fprintf(fptr, "%s %lld\n", events[i].name, recv_buffer[offset+i]); } fprintf(stdout, "\n"); offset += OMPI_NUM_COUNTERS; } free(recv_buffer); free(send_buffer); - fclose(fptr); } else{ free(send_buffer); diff --git a/ompi/runtime/ompi_software_events.c b/ompi/runtime/ompi_software_events.c index 100dd51acbf..f7db5fea05b 100644 --- a/ompi/runtime/ompi_software_events.c +++ b/ompi/runtime/ompi_software_events.c @@ -23,8 +23,7 @@ OMPI_DECLSPEC const char *counter_names[OMPI_NUM_COUNTERS] = { "OMPI_UNEXPECTED", "OMPI_OUT_OF_SEQUENCE", "OMPI_MATCH_TIME", - "OMPI_OOS_MATCH_TIME", - "OMPI_PROGRESS_SWITCH" + "OMPI_OOS_MATCH_TIME" }; OMPI_DECLSPEC const char *counter_descriptions[OMPI_NUM_COUNTERS] = { @@ -48,8 +47,7 @@ OMPI_DECLSPEC const char *counter_descriptions[OMPI_NUM_COUNTERS] = { "The number of messages that arrived as unexpected messages.", "The number of messages that arrived out of the proper sequence.", "The number of microseconds spent matching unexpected messages.", - "The number of microseconds spent matching out of sequence messages.", - "The number of times the progress thread changed." + "The number of microseconds spent matching out of sequence messages." }; /* An array of integer values to denote whether an event is activated (1) or not (0) */ @@ -381,29 +379,31 @@ void ompi_sw_event_fini() void ompi_sw_event_record(unsigned int event_id, long long value) { if(OPAL_UNLIKELY(attached_event[event_id] == 1)){ - OPAL_THREAD_ADD64(&events[event_id].value, value); + OPAL_THREAD_ADD64(&(events[event_id].value), value); } } -/* Starts microsecond-precision timer and stores the start value in usec */ -void ompi_sw_event_timer_start(unsigned int event_id, opal_timer_t *usec) +/* Starts cycle-precision timer and stores the start value in 'cycles' */ +void ompi_sw_event_timer_start(unsigned int event_id, opal_timer_t *cycles) { - /* Check whether usec == 0.0 to make sure the timer hasn't started yet */ - if(OPAL_UNLIKELY(attached_event[event_id] == 1 && *usec == 0)){ + /* Check whether cycles == 0.0 to make sure the timer hasn't started yet */ + if(OPAL_UNLIKELY(attached_event[event_id] == 1 && *cycles == 0)){ //*usec = opal_timer_base_get_cycles(); - *usec = opal_timer_base_get_usec(); + //*usec = opal_timer_base_get_usec(); + *cycles = opal_timer_base_get_cycles(); } } -/* Stops a microsecond-precision timer and calculates the total elapsed time - * based on the starting time in usec and putting the result in usec. +/* Stops a cycle-precision timer and calculates the total elapsed time + * based on the starting time in 'cycles' and putting the result in 'cycles'. */ -void ompi_sw_event_timer_stop(unsigned int event_id, opal_timer_t *usec) +void ompi_sw_event_timer_stop(unsigned int event_id, opal_timer_t *cycles) { if(OPAL_UNLIKELY(attached_event[event_id] == 1)){ //*usec = (opal_timer_base_get_cycles() - *usec) / sys_clock_freq_mhz; - *usec = opal_timer_base_get_usec() - *usec; - OPAL_THREAD_ADD64(&events[event_id].value, (long long)*usec); + //*usec = opal_timer_base_get_usec() - *usec; + *cycles = opal_timer_base_get_cycles() - *cycles; + OPAL_THREAD_ADD64(&events[event_id].value, (long long)*cycles); } } diff --git a/ompi/runtime/ompi_software_events.h b/ompi/runtime/ompi_software_events.h index 198b196f4e3..665ea746500 100644 --- a/ompi/runtime/ompi_software_events.h +++ b/ompi/runtime/ompi_software_events.h @@ -36,7 +36,6 @@ enum OMPI_COUNTERS{ OMPI_OUT_OF_SEQUENCE, OMPI_MATCH_TIME, OMPI_OOS_MATCH_TIME, - OMPI_PROGRESS_SWITCH, OMPI_NUM_COUNTERS }; diff --git a/ompi/runtime/papi_sde_interface.c b/ompi/runtime/papi_sde_interface.c index 7b0e86c83d2..4e8b6820156 100644 --- a/ompi/runtime/papi_sde_interface.c +++ b/ompi/runtime/papi_sde_interface.c @@ -10,7 +10,7 @@ OMPI_DECLSPEC papi_handle_t __attribute__((weak)) papi_sde_init(char *name_of_library, int *event_count) { - printf("weak papi_sde_init called from %s\n", __FILE__); + printf("Weak papi_sde_init called from %s. Weak functions will be called papi sde functionality. If you aren't using the papi sde component, disregard this message.\n", __FILE__); void * ptr = NULL; return ptr; } @@ -19,12 +19,12 @@ OMPI_DECLSPEC void __attribute__((weak)) papi_sde_register_counter(papi_handle_t handle, char *event_name, long long int *counter) { - printf("weak papi_sde_register_counter called from %s\n", __FILE__); + /*printf("weak papi_sde_register_counter called from %s\n", __FILE__);*/ } OMPI_DECLSPEC void __attribute__((weak)) papi_sde_describe_counter(papi_handle_t handle, char *event_name, char *event_description) { - printf("weak papi_sde_describe_counter called from %s\n", __FILE__); + /*printf("weak papi_sde_describe_counter called from %s\n", __FILE__);*/ } From e9b8c0c4c81c0c267be35ab101c8e8d93df52f41 Mon Sep 17 00:00:00 2001 From: David Eberius Date: Fri, 20 Oct 2017 16:27:12 -0400 Subject: [PATCH 7/8] Temporarily disabled the PAPI sde component for portability. --- ompi/runtime/ompi_software_events.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ompi/runtime/ompi_software_events.c b/ompi/runtime/ompi_software_events.c index f7db5fea05b..b110f965172 100644 --- a/ompi/runtime/ompi_software_events.c +++ b/ompi/runtime/ompi_software_events.c @@ -214,6 +214,7 @@ struct PAPI_SOFTWARE_EVENT_S papi_software_events = {"ompi", {0, 0, 0}, iter_sta * ############################################################## */ +#if 0 /* An initialization function for the PAPI sde component. * This creates an sde handle with the name OMPI and registers all events and * event descriptions with the sde component. @@ -321,6 +322,7 @@ void* papi_sde_hook_list_events(void) printf("done papi_sde_hook_list_events %s %d\n", __FILE__, __LINE__); return sde_handle; } +#endif /* ############################################################## * ############### End of PAPI sde Code ######################### @@ -366,7 +368,7 @@ void ompi_sw_event_init() ompi_sw_event_get_send, NULL, ompi_sw_event_notify, NULL); */ /* For initializing the PAPI sde component environment */ - ompi_sde_init(); + //ompi_sde_init(); } /* Calls iter_release to free all of the OMPI software events data structures */ From c44f915728fff9120fa5dbc758b5b8cfc01728ce Mon Sep 17 00:00:00 2001 From: David Eberius Date: Wed, 6 Dec 2017 18:06:09 -0500 Subject: [PATCH 8/8] Added a --with-spc configure option to enable SPCs in the build. Added an MCA parameter, mpi_spc_enable, for turning on specific counters. Cleaned up the code and added more documentation. --- configure.ac | 18 ++ ompi/mca/pml/ob1/pml_ob1_recvfrag.c | 4 +- ompi/mpi/c/finalize.c | 12 +- ompi/mpi/c/init.c | 4 +- ompi/runtime/ompi_mpi_params.c | 10 + ompi/runtime/ompi_software_events.c | 435 ++++++++-------------------- ompi/runtime/ompi_software_events.h | 66 ++--- ompi/runtime/params.h | 8 + 8 files changed, 190 insertions(+), 367 deletions(-) diff --git a/configure.ac b/configure.ac index 929ec70e8b0..4bc2042a29c 100644 --- a/configure.ac +++ b/configure.ac @@ -286,6 +286,24 @@ AS_IF([test "$enable_oshmem" != "no"], [project_oshmem_amc=true], [project_oshme m4_ifndef([project_oshmem], [project_oshmem_amc=false]) AM_CONDITIONAL([PROJECT_OSHMEM], [test "$project_oshmem_amc" = "true"]) +# Enable/Disable Software-Based Performance Counters Capability +AC_ARG_ENABLE(spc, + AC_HELP_STRING([--enable-spc], + [Enable software-based performance counters capability (default: disabled)])) +if test "$enable_spc" = "yes"; then + AC_MSG_RESULT([yes]) + SOFTWARE_EVENTS_ENABLE=1 +else + AC_MSG_RESULT([no]) + SOFTWARE_EVENTS_ENABLE=0 +fi +AC_DEFINE_UNQUOTED([SOFTWARE_EVENTS_ENABLE], + [$SOFTWARE_EVENTS_ENABLE], + [If the software-based performance counters capability should be enabled.]) +AM_CONDITIONAL(SOFTWARE_EVENTS_ENABLE, test "$SOFTWARE_EVENTS_ENABLE" = "1") + +AS_IF([test "$enable_spc" != "no"], [project_spc_amc=true], [project_spc_amc=false]) + if test "$enable_binaries" = "no" && test "$enable_dist" = "yes"; then AC_MSG_WARN([--disable-binaries is incompatible with --enable dist]) AC_MSG_ERROR([Cannot continue]) diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index 318c00949b6..6d83182abf8 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -540,7 +540,7 @@ match_one(mca_btl_base_module_t *btl, mca_pml_ob1_comm_proc_t *proc, mca_pml_ob1_recv_frag_t* frag) { -#ifdef SOFTWARE_EVENTS_ENABLE +#if SOFTWARE_EVENTS_ENABLE == 1 opal_timer_t usecs = 0; #endif SW_EVENT_TIMER_START(OMPI_MATCH_TIME, &usecs); @@ -611,7 +611,7 @@ match_one(mca_btl_base_module_t *btl, static mca_pml_ob1_recv_frag_t* check_cantmatch_for_match(mca_pml_ob1_comm_proc_t *proc) { mca_pml_ob1_recv_frag_t *frag; -#ifdef SOFTWARE_EVENTS_ENABLE +#if SOFTWARE_EVENTS_ENABLE == 1 opal_timer_t usecs = 0; #endif diff --git a/ompi/mpi/c/finalize.c b/ompi/mpi/c/finalize.c index 02f21433c00..a8e503d0050 100644 --- a/ompi/mpi/c/finalize.c +++ b/ompi/mpi/c/finalize.c @@ -37,10 +37,12 @@ static const char FUNC_NAME[] = "MPI_Finalize"; int MPI_Finalize(void) { -#ifdef SOFTWARE_EVENTS_ENABLE + /* If --with-spc was specified, print all of the final SPC values + * aggregated across the whole MPI run. + */ +#if SOFTWARE_EVENTS_ENABLE == 1 int i, j, rank, world_size, offset; long long *recv_buffer, *send_buffer; - char *filename; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &world_size); @@ -62,8 +64,6 @@ int MPI_Finalize(void) } if(rank == 0){ - asprintf(&filename, "sw_events_output_XXXXXX"); - fprintf(stdout, "OMPI Software Counters:\n"); offset = 0; for(j = 0; j < world_size; j++){ @@ -82,10 +82,6 @@ int MPI_Finalize(void) } MPI_Barrier(MPI_COMM_WORLD); - - /*SW_EVENT_PRINT_ALL();*/ - - /*SW_EVENT_FINI();*/ #endif OPAL_CR_FINALIZE_LIBRARY(); diff --git a/ompi/mpi/c/init.c b/ompi/mpi/c/init.c index 472e9a9efcc..da378dbf048 100644 --- a/ompi/mpi/c/init.c +++ b/ompi/mpi/c/init.c @@ -48,8 +48,6 @@ int MPI_Init(int *argc, char ***argv) char *env; int required = MPI_THREAD_SINGLE; - SW_EVENT_INIT(); - /* check for environment overrides for required thread level. If there is, check to see that it is a valid/supported thread level. If not, default to MPI_THREAD_MULTIPLE. */ @@ -86,5 +84,7 @@ int MPI_Init(int *argc, char ***argv) OPAL_CR_INIT_LIBRARY(); + SW_EVENT_INIT(); + return MPI_SUCCESS; } diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c index f8376db633d..77a368d36f7 100644 --- a/ompi/runtime/ompi_mpi_params.c +++ b/ompi/runtime/ompi_mpi_params.c @@ -75,6 +75,8 @@ bool ompi_async_mpi_finalize = false; uint32_t ompi_add_procs_cutoff = OMPI_ADD_PROCS_CUTOFF_DEFAULT; bool ompi_mpi_dynamics_enabled = true; +char *ompi_mpi_spc_enable_string = NULL; + static bool show_default_mca_params = false; static bool show_file_mca_params = false; static bool show_enviro_mca_params = false; @@ -315,6 +317,14 @@ int ompi_mpi_register_params(void) MCA_BASE_VAR_SYN_FLAG_DEPRECATED); } + ompi_mpi_spc_enable_string = NULL; + (void) mca_base_var_register("ompi", "mpi", NULL, "spc_enable", + "A comma delimeted string listing the SPC counters to enable.", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_mpi_spc_enable_string); + return OMPI_SUCCESS; } diff --git a/ompi/runtime/ompi_software_events.c b/ompi/runtime/ompi_software_events.c index b110f965172..14a077dad7c 100644 --- a/ompi/runtime/ompi_software_events.c +++ b/ompi/runtime/ompi_software_events.c @@ -2,6 +2,10 @@ opal_timer_t sys_clock_freq_mhz = 0; +/* Array for converting from SPC indices to MPI_T indices */ +OMPI_DECLSPEC int mpi_t_indices[OMPI_NUM_COUNTERS] = {0}; + +/* Array of names for each counter. Used for MPI_T and PAPI sde initialization */ OMPI_DECLSPEC const char *counter_names[OMPI_NUM_COUNTERS] = { "OMPI_SEND", "OMPI_RECV", @@ -26,6 +30,7 @@ OMPI_DECLSPEC const char *counter_names[OMPI_NUM_COUNTERS] = { "OMPI_OOS_MATCH_TIME" }; +/* Array of descriptions for each counter. Used for MPI_T and PAPI sde initialization */ OMPI_DECLSPEC const char *counter_descriptions[OMPI_NUM_COUNTERS] = { "The number of times MPI_Send was called.", "The number of times MPI_Recv was called.", @@ -62,348 +67,182 @@ OMPI_DECLSPEC ompi_event_t *events = NULL; static int ompi_sw_event_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, void *obj_handle, int *count) { (void)obj_handle; - if(MCA_BASE_PVAR_HANDLE_BIND == event) - *count = 1; - - return MPI_SUCCESS; -} -inline long long ompi_sw_event_get_counter(int counter_id) -{ - if(events != NULL) - return events[counter_id].value; - else - return 0; /* -1 would be preferred to indicate lack of initialization, but the type needs to be unsigned */ -} + int i; -static int ompi_sw_event_get_send(const struct mca_base_pvar_t *pvar, void *value, void *obj_handle) -{ - (void) obj_handle; - long long *counter_value = (long long*)value; - *counter_value = ompi_sw_event_get_counter(OMPI_SEND); + /* For this event, we need to set count to the number of long long type + * values for this counter. All SPC counters are one long long, so we + * always set count to 1. + */ + if(MCA_BASE_PVAR_HANDLE_BIND == event) + *count = 1; + /* For this event, we need to turn on the counter */ + else if(MCA_BASE_PVAR_HANDLE_START == event){ + /* Loop over the mpi_t_inddices array and find the correct SPC index to turn on */ + for(i = 0; i < OMPI_NUM_COUNTERS; i++){ + if(pvar->pvar_index == mpi_t_indices[i]){ + attached_event[i] = 1; + break; + } + } + } + /* For this event, we need to turn off the counter */ + else if(MCA_BASE_PVAR_HANDLE_STOP == event){ + /* Loop over the mpi_t_inddices array and find the correct SPC index to turn off */ + for(i = 0; i < OMPI_NUM_COUNTERS; i++){ + if(pvar->pvar_index == mpi_t_indices[i]){ + attached_event[i] = 0; + break; + } + } + } return MPI_SUCCESS; } /* ############################################################## - * ############ Begin PAPI software_events Code ################# + * ################# Begin SPC Functions ######################## * ############################################################## */ -/* Allocates and initializes the events data structure */ -int iter_start() -{ - int i; - - if(events == NULL){ - events = (ompi_event_t*)malloc(OMPI_NUM_COUNTERS * sizeof(ompi_event_t)); - } else { - fprintf(stderr, "The events data structure has already been allocated.\n"); - } - - for(i = 0; i < OMPI_NUM_COUNTERS; i++){ - events[i].name = counter_names[i]; - events[i].value = 0; - } - return 0; -} - -/* Returns the name of the next event in the data structure */ -char* iter_next() -{ - static int i = 0; - - if(i < OMPI_NUM_COUNTERS){ - i++; - return events[i-1].name; - } - else{ - /* Finished iterating through the list. Return NULL and reset i */ - i = 0; - return NULL; - } -} - -/* Frees the events data structure */ -int iter_release() -{ - free(events); - return 0; -} - -/* If an event named 'event_name' exists, attach the corresponding event's value - * to the supplied long long pointer. +/* This function returns the current count of an SPC counter that has been retistered + * as an MPI_T pvar. The MPI_T index is not necessarily the same as the SPC index, + * so we need to convert from MPI_T index to SPC index and then set the 'value' argument + * to the correct value for this pvar. */ -int attach_event(char *event_name, long long **value) -{ - int i; - - if(events == NULL){ - fprintf(stderr, "Error: The iterator hasn't been started. The event cannot be attached.\n"); - return -1; - } +static int ompi_sw_event_get_count(const struct mca_base_pvar_t *pvar, void *value, void *obj_handle) +{ + (void) obj_handle; - if(event_name == NULL){ - fprintf(stderr, "Error: No event name specified for attach_event.\n"); - return -1; - } + int i; + long long *counter_value = (long long*)value; for(i = 0; i < OMPI_NUM_COUNTERS; i++){ - if(strcmp(event_name, events[i].name) == 0){ - break; + if(pvar->pvar_index == mpi_t_indices[i]){ + /* If this is a timer-based counter, we need to convert from cycles to microseconds */ + if(i == OMPI_MATCH_TIME || i == OMPI_OOS_MATCH_TIME) + *counter_value = events[i].value / sys_clock_freq_mhz; + else + *counter_value = events[i].value; + return MPI_SUCCESS; } } - - if(i < OMPI_NUM_COUNTERS){ - *value = &events[i].value; - attached_event[i] = 1; - - return 0; - } - else{ - fprintf(stderr, "Error: Could not find an event by that name. The event cannot be attached.\n"); - return -1; - } + /* If all else fails, simply set value to 0 */ + *counter_value = 0; + return MPI_SUCCESS; } -/* If an event with the name 'event_name' exists, reset its value to 0 - * and set the corresponding value in attached_event to 0. - */ -int detach_event(char *event_name) +/* Initializes the events data structure and allocates memory for it if needed. */ +void events_init() { int i; + /* If the events data structure hasn't been allocated yet, allocate memory for it */ if(events == NULL){ - fprintf(stderr, "Error: The iterator hasn't been started. The event cannot be detached.\n"); - return -1; - } - - if(event_name == NULL){ - fprintf(stderr, "Error: No event name specified for detach_event.\n"); - return -1; + events = (ompi_event_t*)malloc(OMPI_NUM_COUNTERS * sizeof(ompi_event_t)); } - + /* The data structure has been allocated, so we simply initialize all of the counters + * with their names and an initial count of 0. + */ for(i = 0; i < OMPI_NUM_COUNTERS; i++){ - if(strcmp(event_name, events[i].name) == 0){ - break; - } - } - - if(i < OMPI_NUM_COUNTERS){ - attached_event[i] = 0; + events[i].name = counter_names[i]; events[i].value = 0; - - return 0; - } - else{ - fprintf(stderr, "Error: Could not find an event by that name. The event cannot be detached.\n"); - return -1; - } -} - -/* A structure to expose to the PAPI software_events component to use these events */ -struct PAPI_SOFTWARE_EVENT_S papi_software_events = {"ompi", {0, 0, 0}, iter_start, iter_next, iter_release, attach_event, detach_event}; - -/* ############################################################## - * ############ End of PAPI software_events Code ################ - * ############################################################## - */ - -/* ############################################################## - * ############### Begin PAPI sde Code ########################## - * ############################################################## - */ - -#if 0 -/* An initialization function for the PAPI sde component. - * This creates an sde handle with the name OMPI and registers all events and - * event descriptions with the sde component. - */ -void ompi_sde_init() { - int i, event_count = OMPI_NUM_COUNTERS; - void *sde_handle = (void *)papi_sde_init("OMPI", &event_count); - - /* Required registration of counters and optional counter descriptions */ - for(i = 0; i < OMPI_NUM_COUNTERS; i++){ - //printf("Registering: %s (%d of %d)\n", counter_names[i], i, OMPI_NUM_COUNTERS); - papi_sde_register_counter(sde_handle, counter_names[i], &(events[i].value) ); - papi_sde_describe_counter(sde_handle, counter_names[i], counter_descriptions[i]); } } -/* Define PAPI_DYNAMIC_SDE since we are assuming PAPI is linked dynamically. - * Note: In the future we should support both dynamic and static linking of PAPI. +/* Initializes the SPC data structures and registers all counters as MPI_T pvars. + * Turns on only the counters that were specified in the mpi_spc_enable MCA parameter. */ -#define PAPI_DYNAMIC_SDE -/* This function will be called from papi_native_avail to list all of the OMPI - * events with their names and descriptions. In order for the dynamic version - * to work, the environment variable PAPI_SHARED_LIB must contain the full path - * to the PAPI shared library like the following: - * /path/to/papi/install/lib/libpapi.so - * - * This function will use dlsym to get the appropriate functions for initializing - * the PAPI sde component's environment and register all of the events. - */ -void* papi_sde_hook_list_events(void) +void ompi_sw_event_init() { - int i, event_count = OMPI_NUM_COUNTERS; - char *error; - void *papi_handle; - void* (*sym_init)(char *name_of_library, int *event_count); - void (*sym_reg)( void *handle, char *event_name, long long *counter); - void (*sym_desc)(void *handle, char *event_name, char *event_description); - void *sde_handle = NULL; + int i, j, ret, found = 0, all_on = 0; - printf("papi_sde_hook_list_events\n"); + /* Initialize the clock frequency variable as the CPU's frequency in MHz */ + sys_clock_freq_mhz = opal_timer_base_get_freq() / 1000000; -#ifdef PAPI_DYNAMIC_SDE - printf("PAPI_DYNAMIC_SDE defined\n"); - fflush(stdout); + events_init(); - char *path_to_papi = getenv("PAPI_SHARED_LIB"); - if(path_to_papi == NULL) - return NULL; + /* Get the MCA params string of counters to turn on */ + char **arg_strings = opal_argv_split(ompi_mpi_spc_enable_string, ','); + int num_args = opal_argv_count(arg_strings); - printf("path_to_papi = %s\n", path_to_papi); - - papi_handle = dlopen(path_to_papi, RTLD_LOCAL | RTLD_LAZY); - if(!papi_handle){ - fputs(dlerror(), stderr); - exit(1); - } - printf("papi_handle opened\n"); - fflush(stdout); - - dlerror(); - sym_init = (void* (*)(char*, int*)) dlsym(papi_handle, "papi_sde_init"); - if((error = dlerror()) != NULL) { - fputs(error, stderr); - exit(1); - } - - sym_reg = (void (*)(void*, char*, long long int*)) dlsym(papi_handle, "papi_sde_register_counter"); - if((error = dlerror()) != NULL){ - fputs(error, stderr); - exit(1); - } - - sym_desc = (void (*)(void*, char*, char*)) dlsym(papi_handle, "papi_sde_describe_counter"); - if((error = dlerror()) != NULL){ - fputs(error, stderr); - exit(1); - } - - printf("symbols found\n"); - fflush(stdout); - - sde_handle = (void *) (*sym_init)("OMPI", &event_count); - printf("sde_handle opened\n"); - fflush(stdout); - if((error = dlerror()) != NULL){ - fputs(error, stderr); - exit(1); - } - - printf("sde_handle preparing to register\n"); - fflush(stdout); - - /* We need to register the counters so they can be printed in papi_native_avail - * Note: sde::: will be prepended to the names + /* If there is only one argument and it is 'all', then all counters + * should be turned on. If the size is 0, then no counters will be enabled. */ - iter_start(); - for(i = 0; i < OMPI_NUM_COUNTERS; i++){ - printf("Registering: %s (%d of %d)\n", counter_names[i], i+1, OMPI_NUM_COUNTERS); - (*sym_reg)(sde_handle, counter_names[i], &(events[i].value)); - (*sym_desc)(sde_handle, counter_names[i], counter_descriptions[i]); - events[i].value = 0; + if(num_args == 1){ + if(strcmp(arg_strings[0], "all") == 0) + all_on = 1; } -#endif - - printf("done papi_sde_hook_list_events %s %d\n", __FILE__, __LINE__); - return sde_handle; -} -#endif - -/* ############################################################## - * ############### End of PAPI sde Code ######################### - * ############################################################## - */ - -/* ############################################################## - * ############### Begin Utility Functions ###################### - * ############################################################## - */ - -/* Initializes the OMPI software events. The default functionality is to - * turn all of the counters on. - * Note: in the future, turning events on and off should be done through - * an MCA parameter. - */ -void ompi_sw_event_init() -{ - int i; - /* -#if OPAL_HAVE_SYS_TIMER_GET_CYCLES - printf("OPAL_HAVE_SYS_TIMER_GET_CYCLES defined\n"); -#endif -#if OPAL_HAVE_CLOCK_GETTIME - printf("OPAL_HAVE_CLOCK_GETTIME defined\n"); -#endif - printf("Clock Frequency: %d Hz\n", (int)opal_timer_base_get_freq()); - sys_clock_freq_mhz = 2300;//opal_timer_base_get_freq() / 1000000; - printf("Clock Frequency (converted): %d MHz\n", (int)sys_clock_freq_mhz); -*/ - iter_start(); - - /* Turn all counters on */ + /* Turn on only the counters that were specified in the MCA parameter */ for(i = 0; i < OMPI_NUM_COUNTERS; i++){ - attached_event[i] = 1; + if(all_on) + attached_event[i] = 1; + else{ + /* Note: If no arguments were given, this will be skipped */ + for(j = 0; j < num_args && found < num_args; j++){ + if(strcmp(counter_names[i], arg_strings[j]) == 0){ + attached_event[i] = 1; + found++; + } + } + } + + /* Registers the current counter as an MPI_T pvar regardless of whether it's been turned on or not */ + ret = mca_base_pvar_register("ompi", "runtime", "software_events", counter_names[i], counter_descriptions[i], + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, + MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT, + MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, + ompi_sw_event_get_count, NULL, ompi_sw_event_notify, NULL); + /* Initialize the mpi_t_indices array with the MPI_T indices. + * The array index indicates the SPC index, while the value indicates + * the MPI_T index. + */ + if(ret != OPAL_ERROR){ + mpi_t_indices[i] = ret; + } else{ + mpi_t_indices[i] = -1; + } } - /* - (void)mca_base_pvar_register("ompi", "runtime", "software_events", counter_names[OMPI_SEND], counter_descriptions[OMPI_SEND], - OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, - MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT, - MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, - ompi_sw_event_get_send, NULL, ompi_sw_event_notify, NULL); - */ - /* For initializing the PAPI sde component environment */ - //ompi_sde_init(); } -/* Calls iter_release to free all of the OMPI software events data structures */ +/* Frees any dynamically alocated OMPI software events data structures */ void ompi_sw_event_fini() { - iter_release(); + free(events); } /* Records an update to a counter using an atomic add operation. */ void ompi_sw_event_record(unsigned int event_id, long long value) { + /* Denoted unlikely because counters will often be turned off. */ if(OPAL_UNLIKELY(attached_event[event_id] == 1)){ OPAL_THREAD_ADD64(&(events[event_id].value), value); } } -/* Starts cycle-precision timer and stores the start value in 'cycles' */ +/* Starts cycle-precision timer and stores the start value in the 'cycles' argument. + * Note: This assumes that the 'cycles' argument is initialized to 0 if the timer + * hasn't been started yet. + */ void ompi_sw_event_timer_start(unsigned int event_id, opal_timer_t *cycles) { - /* Check whether cycles == 0.0 to make sure the timer hasn't started yet */ + /* Check whether cycles == 0.0 to make sure the timer hasn't started yet. + * This is denoted unlikely because the counters will often be turned off. + */ if(OPAL_UNLIKELY(attached_event[event_id] == 1 && *cycles == 0)){ - //*usec = opal_timer_base_get_cycles(); - //*usec = opal_timer_base_get_usec(); *cycles = opal_timer_base_get_cycles(); } } /* Stops a cycle-precision timer and calculates the total elapsed time - * based on the starting time in 'cycles' and putting the result in 'cycles'. + * based on the starting time in 'cycles' and stores the result in the + * 'cycles' argument. */ void ompi_sw_event_timer_stop(unsigned int event_id, opal_timer_t *cycles) { + /* This is denoted unlikely because the counters will often be turned off. */ if(OPAL_UNLIKELY(attached_event[event_id] == 1)){ - //*usec = (opal_timer_base_get_cycles() - *usec) / sys_clock_freq_mhz; - //*usec = opal_timer_base_get_usec() - *usec; *cycles = opal_timer_base_get_cycles() - *cycles; OPAL_THREAD_ADD64(&events[event_id].value, (long long)*cycles); } @@ -420,45 +259,3 @@ void ompi_sw_event_user_or_mpi(int tag, long long value, unsigned int user_enum, SW_EVENT_RECORD(mpi_enum, value); } } - -/* A function to output the value of all of the counters. This is currently - * implemented in MPI_Finalize, but we need to find a better way for this to - * happen. - */ -void ompi_sw_event_print_all() -{ - /*int i, j, rank, world_size, offset; - long long *recv_buffer, *send_buffer; - - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &world_size); - - if(rank == 0){ - send_buffer = (long long*)malloc(OMPI_NUM_COUNTERS * sizeof(long long)); - recv_buffer = (long long*)malloc(world_size * OMPI_NUM_COUNTERS * sizeof(long long)); - for(i = 0; i < OMPI_NUM_COUNTERS; i++){ - send_buffer[i] = events[i].value; - } - MPI_Gather(send_buffer, OMPI_NUM_COUNTERS, MPI_LONG_LONG, recv_buffer, OMPI_NUM_COUNTERS, MPI_LONG_LONG, 0, MPI_COMM_WORLD); - } - else{ - send_buffer = (long long*)malloc(OMPI_NUM_COUNTERS * sizeof(long long)); - for(i = 0; i < OMPI_NUM_COUNTERS; i++){ - send_buffer[i] = events[i].value; - } - MPI_Gather(send_buffer, OMPI_NUM_COUNTERS, MPI_LONG_LONG, recv_buffer, OMPI_NUM_COUNTERS, MPI_LONG_LONG, 0, MPI_COMM_WORLD); - } - - if(rank == 0){ - fprintf(stdout, "OMPI Software Counters:\n"); - offset = 0; - for(j = 0; j < world_size; j++){ - fprintf(stdout, "World Rank %d:\n", j); - for(i = 0; i < OMPI_NUM_COUNTERS; i++){ - fprintf(stdout, "%s\t%lld\n", counter_names[offset+i], events[offset+i].value); - } - offset += OMPI_NUM_COUNTERS; - } - }*/ -} - diff --git a/ompi/runtime/ompi_software_events.h b/ompi/runtime/ompi_software_events.h index 665ea746500..a0274f3df31 100644 --- a/ompi/runtime/ompi_software_events.h +++ b/ompi/runtime/ompi_software_events.h @@ -8,11 +8,33 @@ #include "ompi/include/mpi.h" #include "ompi/include/ompi_config.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/runtime/params.h" #include "opal/mca/timer/timer.h" #include "opal/mca/base/mca_base_pvar.h" +#include "opal/util/argv.h" #include MCA_timer_IMPLEMENTATION_HEADER +/* INSTRUCTIONS FOR ADDING COUNTERS + * 1.) Add a new counter name in the OMPI_COUNTERS enum before + * OMPI_NUM_COUNTERS below. + * 2.) Add corresponding counter name and descriptions to the + * counter_names and counter_descriptions arrays in + * ompi_software_events.c NOTE: The names and descriptions + * MUST be in the same array location as where you added the + * counter name in step 1. + * 3.) If this counter is based on a timer, add its enum name to + * the logic for timer-based counters in the ompi_sw_event_get_count + * function in ompi_software_events.c + * 4.) Instrument the Open MPI code base where it makes sense for + * your counter to be modified using the SW_EVENT_RECORD macro. + * Note: If your counter is timer-based you should use the + * SW_EVENT_TIMER_START and SW_EVENT_TIMER_STOP macros to record + * the time in cycles to then be converted to microseconds later + * in the ompi_sw_event_get_count function when requested by MPI_T + */ + /* This enumeration serves as event ids for the various events */ enum OMPI_COUNTERS{ OMPI_SEND, @@ -36,7 +58,7 @@ enum OMPI_COUNTERS{ OMPI_OUT_OF_SEQUENCE, OMPI_MATCH_TIME, OMPI_OOS_MATCH_TIME, - OMPI_NUM_COUNTERS + OMPI_NUM_COUNTERS /* This serves as the number of counters. It must be last. */ }; /* A structure for storing the event data */ @@ -45,31 +67,13 @@ typedef struct ompi_event_s{ long long value; } ompi_event_t; -/* Structure and helper functions for PAPI software_events component - * Note: This component is being superceded by the sde component. - */ -struct PAPI_SOFTWARE_EVENT_S{ - char name[32]; - int version[3]; - int (*iter_start)(void); - char* (*iter_next)(void); - int (*iter_release)(void); - int (*attach_event)(char*, long long**); - int (*detach_event)(char*); -}; - -int iter_start(void); -char* iter_next(void); -int iter_release(void); -int attach_event(char *name, long long **value); -int detach_event(char *name); - -/* End of PAPI software_events component stuff */ - OMPI_DECLSPEC extern unsigned int attached_event[OMPI_NUM_COUNTERS]; OMPI_DECLSPEC extern ompi_event_t *events; -/* OMPI software event utility functions */ +/* Events data structure initialization function */ +void events_init(void); + +/* OMPI software event (SPC) utility functions */ void ompi_sw_event_init(void); void ompi_sw_event_fini(void); void ompi_sw_event_record(unsigned int event_id, long long value); @@ -79,23 +83,13 @@ void ompi_sw_event_user_or_mpi(int tag, long long value, unsigned int user_enum, void ompi_sw_event_print_all(void); /* MPI_T utility functions */ - static int ompi_sw_event_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, void *obj_handle, int *count); long long ompi_sw_event_get_counter(int counter_id); -static int ompi_sw_event_get_send(const struct mca_base_pvar_t *pvar, void *value, void *obj_handle); - -/* Functions for the PAPI sde component */ -void ompi_sde_init(void); -/* PAPI sde component interface functions */ -typedef void* papi_handle_t; - -/* This should be defined at build time through an MCA parameter */ -#define SOFTWARE_EVENTS_ENABLE -/* Macros for using the utility functions throughout the codebase. - * If SOFTWARE_EVENTS_ENABLE is not defined, the macros become no-ops. +/* Macros for using the software event utility functions throughout the codebase. + * If SOFTWARE_EVENTS_ENABLE is not 1, the macros become no-ops. */ -#ifdef SOFTWARE_EVENTS_ENABLE +#if SOFTWARE_EVENTS_ENABLE == 1 #define SW_EVENT_INIT() \ ompi_sw_event_init() diff --git a/ompi/runtime/params.h b/ompi/runtime/params.h index 5716e142523..399b4ac6da4 100644 --- a/ompi/runtime/params.h +++ b/ompi/runtime/params.h @@ -141,6 +141,14 @@ OMPI_DECLSPEC extern bool ompi_async_mpi_init; /* EXPERIMENTAL: do not perform an RTE barrier at the beginning of MPI_Finalize */ OMPI_DECLSPEC extern bool ompi_async_mpi_finalize; +/** + * Whether or not to print the MCA parameters to a file or to stdout + * + * If this argument is set then it is used when parameters are dumped + * when the mpi_show_mca_params is set. + */ +OMPI_DECLSPEC extern char * ompi_mpi_spc_enable_string; + /** * Register MCA parameters used by the MPI layer.