From bb0dff12b59fa6ef2b11fb2f348949829badd8f1 Mon Sep 17 00:00:00 2001 From: Khang D Nguyen Date: Thu, 28 Aug 2025 13:09:14 +0700 Subject: [PATCH] mctpd: Send Discovery Notify on Endpoint role set This commit adds support for Discovery Notify messages, specified in DSP0236 section 12.14. In our implementation, a Discovery Notify message is sent when mctpd sets an interface role from Unknown to Endpoint. To avoid notify discovery messages getting lost, retry the messages for a few time with delays. Signed-off-by: Khang D Nguyen --- src/mctpd.c | 148 ++++++++++++++++++++++++++++++++--- tests/test_mctpd_endpoint.py | 97 ++++++++++++++++++++++- 2 files changed, 231 insertions(+), 14 deletions(-) diff --git a/src/mctpd.c b/src/mctpd.c index 43994f11..ede69bcb 100644 --- a/src/mctpd.c +++ b/src/mctpd.c @@ -126,7 +126,6 @@ enum discovery_state { }; struct link { - enum discovery_state discovered; bool published; int ifindex; enum endpoint_role role; @@ -135,6 +134,14 @@ struct link { sd_bus_slot *slot_iface; sd_bus_slot *slot_busowner; + struct { + enum discovery_state flag; + sd_event_source *notify_source; + dest_phys notify_dest; + uint64_t notify_retry_delay; + uint8_t notify_tries_left; + } discovery; + struct ctx *ctx; }; @@ -805,8 +812,8 @@ static int handle_control_set_endpoint_id(struct ctx *ctx, int sd, warnx("ERR: cannot add bus owner to object lists"); } - if (link_data->discovered != DISCOVERY_UNSUPPORTED) { - link_data->discovered = DISCOVERY_DISCOVERED; + if (link_data->discovery.flag != DISCOVERY_UNSUPPORTED) { + link_data->discovery.flag = DISCOVERY_DISCOVERED; } resp->status = SET_MCTP_EID_ASSIGNMENT_STATUS(MCTP_SET_EID_ACCEPTED) | @@ -817,13 +824,13 @@ static int handle_control_set_endpoint_id(struct ctx *ctx, int sd, return reply_message(ctx, sd, resp, resp_len, addr); case MCTP_SET_EID_DISCOVERED: - if (link_data->discovered == DISCOVERY_UNSUPPORTED) { + if (link_data->discovery.flag == DISCOVERY_UNSUPPORTED) { resp->completion_code = MCTP_CTRL_CC_ERROR_INVALID_DATA; resp_len = sizeof(struct mctp_ctrl_resp); return reply_message(ctx, sd, resp, resp_len, addr); } - link_data->discovered = DISCOVERY_DISCOVERED; + link_data->discovery.flag = DISCOVERY_DISCOVERED; resp->status = SET_MCTP_EID_ASSIGNMENT_STATUS(MCTP_SET_EID_REJECTED) | SET_MCTP_EID_ALLOCATION_STATUS(MCTP_SET_EID_POOL_NONE); @@ -1061,7 +1068,7 @@ static int handle_control_prepare_endpoint_discovery( resp = (void *)resp; mctp_ctrl_msg_hdr_init_resp(&resp->ctrl_hdr, *req); - if (link_data->discovered == DISCOVERY_UNSUPPORTED) { + if (link_data->discovery.flag == DISCOVERY_UNSUPPORTED) { warnx("received prepare for discovery request to unsupported interface %d", addr->smctp_ifindex); resp->completion_code = MCTP_CTRL_CC_ERROR_UNSUPPORTED_CMD; @@ -1069,8 +1076,8 @@ static int handle_control_prepare_endpoint_discovery( sizeof(struct mctp_ctrl_resp), addr); } - if (link_data->discovered == DISCOVERY_DISCOVERED) { - link_data->discovered = DISCOVERY_UNDISCOVERED; + if (link_data->discovery.flag == DISCOVERY_DISCOVERED) { + link_data->discovery.flag = DISCOVERY_UNDISCOVERED; warnx("clear discovered flag of interface %d", addr->smctp_ifindex); } @@ -1105,13 +1112,13 @@ handle_control_endpoint_discovery(struct ctx *ctx, int sd, return 0; } - if (link_data->discovered == DISCOVERY_UNSUPPORTED) { + if (link_data->discovery.flag == DISCOVERY_UNSUPPORTED) { resp->completion_code = MCTP_CTRL_CC_ERROR_INVALID_DATA; return reply_message(ctx, sd, resp, sizeof(struct mctp_ctrl_resp), addr); } - if (link_data->discovered == DISCOVERY_DISCOVERED) { + if (link_data->discovery.flag == DISCOVERY_DISCOVERED) { // if we are already discovered (i.e, assigned an EID), then no reply return 0; } @@ -3659,6 +3666,88 @@ static int bus_link_get_prop(sd_bus *bus, const char *path, return rc; } +static int query_discovery_notify(struct link *link) +{ + struct mctp_ctrl_cmd_discovery_notify req = { 0 }; + struct mctp_ctrl_resp_discovery_notify *resp; + struct sockaddr_mctp_ext resp_addr; + size_t buf_size; + uint8_t *buf; + int rc; + + mctp_ctrl_msg_hdr_init_req(&req.ctrl_hdr, mctp_next_iid(link->ctx), + MCTP_CTRL_CMD_DISCOVERY_NOTIFY); + + rc = endpoint_query_phys(link->ctx, &link->discovery.notify_dest, + MCTP_CTRL_HDR_MSG_TYPE, &req, sizeof(req), + &buf, &buf_size, &resp_addr); + if (rc < 0) + goto free_buf; + + if (buf_size != sizeof(*resp)) { + warnx("%s: wrong reply length %zu bytes. dest %s", __func__, + buf_size, dest_phys_tostr(&link->discovery.notify_dest)); + rc = -ENOMSG; + goto free_buf; + } + + resp = (void *)buf; + if (resp->completion_code != 0) { + warnx("Failure completion code 0x%02x from %s", + resp->completion_code, + dest_phys_tostr(&link->discovery.notify_dest)); + rc = -ECONNREFUSED; + goto free_buf; + } + +free_buf: + free(buf); + return rc; +} + +static int link_discovery_notify_callback(sd_event_source *source, + uint64_t time, void *userdata) +{ + struct link *link = userdata; + struct ctx *ctx = link->ctx; + int rc; + + // sanity check + assert(link->discovery.notify_source == source); + + // Discovery notify succeeded + if (link->discovery.flag == DISCOVERY_DISCOVERED) + goto disarm; + + rc = query_discovery_notify(link); + if (rc < 0) { + if (ctx->verbose) { + warnx("failed to send discovery notify at retry %d: %s", + link->discovery.notify_tries_left, strerror(-rc)); + } + } + + link->discovery.notify_tries_left -= 1; + if (link->discovery.notify_tries_left == 0) { + warnx("failed to send discovery notify after all retries"); + goto disarm; + } + + rc = mctp_ops.sd_event.source_set_time_relative( + source, link->discovery.notify_retry_delay); + if (rc < 0) { + warnx("failed to rearm discovery notify timer"); + goto disarm; + } + + return 0; + +disarm: + sd_event_source_disable_unref(source); + link->discovery.notify_source = NULL; + return 0; +} + static int bus_link_set_prop(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, @@ -4496,7 +4585,7 @@ static int add_interface(struct ctx *ctx, int ifindex) if (!link) return -ENOMEM; - link->discovered = DISCOVERY_UNSUPPORTED; + link->discovery.flag = DISCOVERY_UNSUPPORTED; link->published = false; link->ifindex = ifindex; link->ctx = ctx; @@ -4526,7 +4615,42 @@ static int add_interface(struct ctx *ctx, int ifindex) } if (phys_binding == MCTP_PHYS_BINDING_PCIE_VDM) { - link->discovered = DISCOVERY_UNDISCOVERED; + link->discovery.flag = DISCOVERY_UNDISCOVERED; + // TODO: These numbers are respectively MN1 and MT4, specified in DSP0239 + // control message timing. + // + // Might need to extract these to macros like MCTP_I2C_TSYM_* in this file, + // or a commit to actually centralize those timing at one place, now that + // we have support for detecting link binding type. + link->discovery.notify_tries_left = 3; + link->discovery.notify_retry_delay = 5000000; + + // For PCIe-VDM, we want an all zeroes address for Route-to-Root-Complex. + rc = mctp_nl_hwaddr_len_byindex( + ctx->nl, ifindex, + &link->discovery.notify_dest.hwaddr_len); + if (rc < 0) { + warnx("Can't find hwaddr_len by index %d", ifindex); + return -ENOENT; + } + + memset(link->discovery.notify_dest.hwaddr, 0, + link->discovery.notify_dest.hwaddr_len); + link->discovery.notify_dest.ifindex = ifindex; + + rc = mctp_ops.sd_event.add_time_relative( + ctx->event, &link->discovery.notify_source, + CLOCK_MONOTONIC, 0, 0, link_discovery_notify_callback, + link); + if (rc >= 0) { + rc = sd_event_source_set_enabled( + link->discovery.notify_source, SD_EVENT_ON); + } + if (rc < 0) { + warnx("Failed to arm discovery notify timer"); + sd_event_source_disable_unref( + link->discovery.notify_source); + } } link->published = true; diff --git a/tests/test_mctpd_endpoint.py b/tests/test_mctpd_endpoint.py index 82ecf28c..00f7d9f2 100644 --- a/tests/test_mctpd_endpoint.py +++ b/tests/test_mctpd_endpoint.py @@ -22,11 +22,16 @@ async def iface(): @pytest.fixture -async def sysnet(iface): +async def bo(iface): + return Endpoint(iface, bytes([0x10]), eid=8) + + +@pytest.fixture +async def sysnet(iface, bo): system = System() await system.add_interface(iface) network = Network() - network.add_endpoint(Endpoint(iface, bytes([0x10]), eid=8)) + network.add_endpoint(bo) return Sysnet(system, network) @@ -113,12 +118,100 @@ class TestDiscovery: async def iface(self): return System.Interface("mctp0", 1, 1, bytes([0x1D]), 68, 254, True, PhysicalBinding.PCIE_VDM) + @pytest.fixture + async def bo(self, iface): + return TestDiscovery.BusOwnerEndpoint(iface, bytes([0x00]), eid=8) + + + class BusOwnerEndpoint(Endpoint): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.sem = trio.Semaphore(initial_value=0) + + async def handle_mctp_control(self, sock, addr, data): + print(addr, data) + flags, opcode = data[0:2] + if opcode != 0x0D: + return await super().handle_mctp_control(sock, addr, data) + dst_addr = MCTPSockAddr.for_ep_resp(self, addr, sock.addr_ext) + await sock.send(dst_addr, bytes([flags & 0x1F, opcode, 0x00])) + self.sem.release() + + """ Test simple Discovery sequence """ async def test_simple_discovery_sequence(self, dbus, mctpd): bo = mctpd.network.endpoints[0] assert len(mctpd.system.addresses) == 0 + # BMC should send a Discovery Notify message + with trio.move_on_after(5) as expected: + await bo.sem.acquire() + assert not expected.cancelled_caught + + # no EID yet + rsp = await bo.send_control(mctpd.network.mctp_socket, MCTPControlCommand(True, 0, 0x02)) + assert rsp.hex(' ') == '00 02 00 00 02 00' + + # BMC response to Prepare for Discovery + rsp = await bo.send_control(mctpd.network.mctp_socket, MCTPControlCommand(True, 0, 0x0B)) + assert rsp.hex(' ') == '00 0b 00' + + # BMC response to Endpoint Discovery + rsp = await bo.send_control(mctpd.network.mctp_socket, MCTPControlCommand(True, 0, 0x0C)) + assert rsp.hex(' ') == '00 0c 00' + + # set EID = 42 + eid = 42 + rsp = await bo.send_control(mctpd.network.mctp_socket, MCTPControlCommand(True, 0, 0x01, bytes([0x00, eid]))) + assert rsp.hex(' ') == f'00 01 00 00 {eid:02x} 00' + + # BMC should contains two object paths: bus owner and itself + assert await mctpd_mctp_endpoint_control_obj(dbus, f"/au/com/codeconstruct/mctp1/networks/1/endpoints/{bo.eid}") + assert await mctpd_mctp_endpoint_control_obj(dbus, f"/au/com/codeconstruct/mctp1/networks/1/endpoints/{eid}") + + +class TestDiscoveryRetry: + @pytest.fixture + async def iface(self): + return System.Interface("mctp0", 1, 1, bytes([0x1D]), 68, 254, True, PhysicalBinding.PCIE_VDM) + + @pytest.fixture + async def bo(self, iface): + return TestDiscoveryRetry.BusOwnerEndpoint(iface, bytes([0x00]), eid=8) + + + class BusOwnerEndpoint(Endpoint): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.sem = trio.Semaphore(initial_value=0) + self.retry_left = 1 + + async def handle_mctp_control(self, sock, src_addr, msg): + flags, opcode = msg[0:2] + if opcode != 0x0D: + return await super().handle_mctp_control(sock, src_addr, msg) + + # only reply after 2 retries + if self.retry_left == 0: + dst_addr = MCTPSockAddr.for_ep_resp(self, src_addr, sock.addr_ext) + await sock.send(dst_addr, bytes([flags & 0x1F, opcode, 0x00])) + self.sem.release() + else: + self.retry_left -= 1 + + + """ Test simple Discovery sequence """ + async def test_discovery_after_one_retry(self, dbus, mctpd, autojump_clock): + bo = mctpd.network.endpoints[0] + + assert len(mctpd.system.addresses) == 0 + + # BMC should send a Discovery Notify message + with trio.move_on_after(10) as expected: + await bo.sem.acquire() + assert not expected.cancelled_caught + # no EID yet rsp = await bo.send_control(mctpd.network.mctp_socket, MCTPControlCommand(True, 0, 0x02)) assert rsp.hex(' ') == '00 02 00 00 02 00'