diff --git a/conf/mctpd.conf b/conf/mctpd.conf index 3b7f9097..d505d115 100644 --- a/conf/mctpd.conf +++ b/conf/mctpd.conf @@ -16,3 +16,7 @@ message_timeout_ms = 30 dynamic_eid_range = [8, 254] max_pool_size = 15 + +# Bus Owner/bridge polling interval (ms) to check endpoint accessibility. +# A value of 0 disables polling. +endpoint_poll_ms = 0 diff --git a/docs/mctpd.md b/docs/mctpd.md index e54f56c0..4e138dc6 100644 --- a/docs/mctpd.md +++ b/docs/mctpd.md @@ -276,6 +276,8 @@ busctl call au.com.codeconstruct.MCTP1 \ ### `.Remove` Removes the MCTP endpoint from `mctpd`, and deletes routes and neighbour entries. +If endpoint is a bridge (have EID pool allocated for downstream devices) removing +it will cause removal of all downstream devices endpoint objects as well. ### MCTP bridge interface: `au.com.codeconstruct.MCTP.Bridge1` interface @@ -377,3 +379,15 @@ allocations. This setting determines the maximum EID pool size that a bridge peer may request via their Set Endpoint ID response. Requests larger than this size will be truncated. + +#### `endpoint_poll_ms`: Periodic polling interval for briged peers. + +* type: integer, in milliseconds +* default: 0 + +This is periodic polling interval time in milliseconds, which bus owner/bridge +needs to perform to identify accessible bridged eid among the allocated pool +space. Value should be between [```0.5 * TRECLAIM (5)```- ```10```] seconds. +Such periodic polling is common for all the briged endpoints among allocated +pool space [`.PoolStart` - `.PoolEnd`] of the bridge. +Polling could be provisioned to be disabled via setting the value as ```0```. diff --git a/src/mctpd.c b/src/mctpd.c index 754ff4bd..e03879c2 100644 --- a/src/mctpd.c +++ b/src/mctpd.c @@ -59,6 +59,8 @@ static const char *mctpd_appid = "67369c05-4b97-4b7e-be72-65cfd8639f10"; static const char *conf_file_default = MCTPD_CONF_FILE_DEFAULT; +static const uint64_t max_poll_interval_ms = 10000; +static const uint64_t min_poll_interval_ms = 2500; static const mctp_eid_t eid_alloc_min = 0x08; static const mctp_eid_t eid_alloc_max = 0xfe; @@ -101,6 +103,12 @@ struct role { const char *dbus_val; }; +// Endpoint poll context for bridged endpoint polling +struct ep_poll_ctx { + struct peer *bridge; + mctp_eid_t poll_eid; +}; + static const struct role roles[] = { [ENDPOINT_ROLE_UNKNOWN] = { .role = ENDPOINT_ROLE_UNKNOWN, @@ -199,6 +207,10 @@ struct peer { // Pool size uint8_t pool_size; uint8_t pool_start; + + struct { + sd_event_source **sources; + } bridge_ep_poll; }; struct msg_type_support { @@ -266,6 +278,10 @@ struct ctx { // maximum pool size for assumed MCTP Bridge uint8_t max_pool_size; + + // bus owner/bridge polling interval in usecs for + // checking endpoint's accessibility. + uint64_t endpoint_poll; }; static int emit_endpoint_added(const struct peer *peer); @@ -280,6 +296,7 @@ static int add_peer_from_addr(struct ctx *ctx, const struct sockaddr_mctp_ext *addr, struct peer **ret_peer); static int remove_peer(struct peer *peer); +static int remove_bridged_peers(struct peer *bridge); static int query_peer_properties(struct peer *peer); static int setup_added_peer(struct peer *peer); static void add_peer_route(struct peer *peer); @@ -1959,6 +1976,55 @@ static int check_peer_struct(const struct peer *peer, const struct net *n) return 0; } +static int remove_bridged_peers(struct peer *bridge) +{ + mctp_eid_t ep, pool_start, pool_end; + struct ep_poll_ctx *pctx = NULL; + struct peer *peer = NULL; + struct net *n = NULL; + int rc = 0; + + sd_event_source **sources = bridge->bridge_ep_poll.sources; + pool_end = bridge->pool_start + bridge->pool_size - 1; + n = lookup_net(bridge->ctx, bridge->net); + pool_start = bridge->pool_start; + + if (!sources) + return 0; + + for (ep = pool_start; ep <= pool_end; ep++) { + // stop endpoint polling before removing peer + // else next trigger will create peer again. + int idx = ep - pool_start; + + if (sources[idx]) { + pctx = sd_event_source_get_userdata(sources[idx]); + rc = sd_event_source_set_enabled(sources[idx], + SD_EVENT_OFF); + if (rc < 0) { + warnx("Failed to stop polling timer while removing peer %d: %s", + ep, strerror(-rc)); + } + + sd_event_source_unref(sources[idx]); + sources[idx] = NULL; + free(pctx); + } + peer = n->peers[ep]; + if (!peer) + continue; + + rc = remove_peer(peer); + if (rc < 0) { + warnx("Failed to remove peer %d from bridge eid %d pool [%d - %d]: %s", + ep, bridge->eid, pool_start, pool_end, + strerror(-rc)); + } + } + + return 0; +} + static int remove_peer(struct peer *peer) { struct ctx *ctx = peer->ctx; @@ -1993,6 +2059,12 @@ static int remove_peer(struct peer *peer) sd_event_source_unref(peer->recovery.source); } + if (peer->pool_size) { + remove_bridged_peers(peer); + free(peer->bridge_ep_poll.sources); + peer->bridge_ep_poll.sources = NULL; + } + n->peers[peer->eid] = NULL; free(peer->message_types); free(peer->uuid); @@ -2039,6 +2111,7 @@ static void free_peers(struct ctx *ctx) free(peer->message_types); free(peer->uuid); free(peer->path); + free(peer->bridge_ep_poll.sources); sd_bus_slot_unref(peer->slot_obmc_endpoint); sd_bus_slot_unref(peer->slot_cc_endpoint); sd_bus_slot_unref(peer->slot_bridge); @@ -2368,8 +2441,10 @@ static int query_get_endpoint_id(struct ctx *ctx, const dest_phys *dest, resp = (void *)buf; *ret_eid = resp->eid; - *ret_ep_type = resp->eid_type; - *ret_media_spec = resp->medium_data; + if (ret_ep_type) + *ret_ep_type = resp->eid_type; + if (ret_media_spec) + *ret_media_spec = resp->medium_data; out: free(buf); return rc; @@ -4992,6 +5067,18 @@ static int parse_config_bus_owner(struct ctx *ctx, toml_table_t *bus_owner) return rc; } + val = toml_int_in(bus_owner, "endpoint_poll_ms"); + if (val.ok && val.u.i) { + uint64_t i = val.u.i; + if ((i > max_poll_interval_ms) || (i < min_poll_interval_ms)) { + warnx("endpoint polling interval invalid (%lu - %lu ms)", + min_poll_interval_ms, max_poll_interval_ms); + return -1; + } + + ctx->endpoint_poll = i * 1000; + } + return 0; } @@ -5096,6 +5183,7 @@ static void setup_config_defaults(struct ctx *ctx) ctx->max_pool_size = 15; ctx->dyn_eid_min = eid_alloc_min; ctx->dyn_eid_max = eid_alloc_max; + ctx->endpoint_poll = 0; } static void free_config(struct ctx *ctx) @@ -5186,6 +5274,161 @@ static int endpoint_send_allocate_endpoint_ids( return rc; } +/* DSP0236 section 8.17.6 Reclaiming EIDs from hot-plug devices + * + * The bus owner/bridge can detect a removed device or devices by + * validating the EIDs that are presently allocated to endpoints that + * are directly on the bus and identifying which EIDs are missing. + * It can do this by attempting to access each endpoint that the bridge + * has listed in its routing table as being a device that is directly on + * the particular bus. Attempting to access each endpoint can be accomplished + * by issuing the Get Endpoint ID command... + + + * since bridged endpoints are routed from bridge, direct query + * to eid should work if gateway routes are in place. + */ + +static int peer_reschedule_poll(sd_event_source *source, uint64_t usec) +{ + int rc = 0; + rc = mctp_ops.sd_event.source_set_time_relative(source, usec); + if (rc >= 0) { + rc = sd_event_source_set_enabled(source, SD_EVENT_ONESHOT); + } + + return 0; +} + +static int peer_endpoint_poll(sd_event_source *s, uint64_t usec, void *userdata) +{ + struct ep_poll_ctx *pctx = userdata; + struct peer *bridge = pctx->bridge; + sd_event_source *source = NULL; + mctp_eid_t ep = pctx->poll_eid; + mctp_eid_t pool_start, idx; + struct peer *peer = NULL; + mctp_eid_t ret_eid = 0; + struct net *n; + int rc = 0; + + if (!bridge) { + free(pctx); + return 0; + } + + pool_start = bridge->pool_start; + idx = ep - pool_start; + source = bridge->bridge_ep_poll.sources[idx]; + + /* Polling policy : + * + * Once bridge eid pool space is allocated and gateway + * routes for downstream endpoints are in place, busowner + * would initiate periodic GET_ENDPOINT_ID command at an + * interval of atleast 1/2 * TRECLAIM. + + 1. The downstream endpoint if present behind the bridge, + responds to send poll command, that endpoint path is + considered accessible. + The endpoint path would be published as reachable to d-bus and + polling will no longer continue. + + 2. If endpoint is not present or doesn't responds to send poll + commmand, then it has not been establed yet that endpoint + path from the bridge is accessible or not, thus continue + to poll. + */ + + n = lookup_net(bridge->ctx, bridge->net); + peer = n->peers[ep]; + if (!peer) { + rc = add_peer(bridge->ctx, &(bridge->phys), ep, bridge->net, + &peer, true); + if (rc < 0) + goto exit; + } + + rc = query_get_endpoint_id(bridge->ctx, &(bridge->phys), &ret_eid, NULL, + NULL, peer); + if (rc < 0) { + if (rc == -ETIMEDOUT) { + peer_reschedule_poll(source, + bridge->ctx->endpoint_poll); + return 0; + } + goto exit; + } + + if (ret_eid != ep) { + warnx("Unexpected eid %d abort polling for eid %d", ret_eid, + ep); + goto exit; + } + + if (bridge->ctx->verbose) { + fprintf(stderr, "Endpoint %d is accessible\n", ep); + } + + rc = setup_added_peer(peer); + if (rc < 0) { + peer_reschedule_poll(source, bridge->ctx->endpoint_poll); + return 0; + } + +exit: + assert(sd_event_source_get_enabled(source, NULL) == 0); + sd_event_source_unref(source); + bridge->bridge_ep_poll.sources[idx] = NULL; + free(pctx); + return rc; +} + +static int bridge_poll_start(struct peer *bridge) +{ + mctp_eid_t pool_start = bridge->pool_start; + mctp_eid_t pool_size = bridge->pool_size; + sd_event_source **sources = NULL; + struct ctx *ctx; + int rc; + int i; + + sources = calloc(pool_size, sizeof(*sources)); + bridge->bridge_ep_poll.sources = sources; + ctx = bridge->ctx; + + if (!sources) { + rc = -ENOMEM; + warn("Failed to setup periodic polling for bridge (eid %d)", + bridge->eid); + return rc; + } + + for (i = 0; i < pool_size; i++) { + struct ep_poll_ctx *pctx = calloc(1, sizeof(*pctx)); + if (!pctx) { + warnx("Failed to allocate memory, skip polling for eid %d", + pool_start + i); + continue; + } + + pctx->bridge = bridge; + pctx->poll_eid = pool_start + i; + rc = mctp_ops.sd_event.add_time_relative( + ctx->event, &bridge->bridge_ep_poll.sources[i], + CLOCK_MONOTONIC, ctx->endpoint_poll, 0, + peer_endpoint_poll, pctx); + if (rc < 0) { + warnx("Failed to setup poll event source for eid %d", + (pool_start + i)); + free(pctx); + continue; + } + } + + return 0; +} + static int endpoint_allocate_eids(struct peer *peer) { uint8_t allocated_pool_size = 0; @@ -5254,7 +5497,10 @@ static int endpoint_allocate_eids(struct peer *peer) peer->pool_size); } - // TODO: Polling logic for downstream EID + // Poll for downstream endpoint accessibility + if (peer->ctx->endpoint_poll) { + bridge_poll_start(peer); + } return 0; } diff --git a/tests/mctpenv/__init__.py b/tests/mctpenv/__init__.py index cdf61955..6361c4ad 100644 --- a/tests/mctpenv/__init__.py +++ b/tests/mctpenv/__init__.py @@ -259,11 +259,23 @@ def find_endpoint(self, addr): route = self.lookup_route(addr.net, addr.eid) if route is None: return None - iface = route.iface - - neigh = self.lookup_neighbour(route.iface, addr.eid) - # if no neighbour, return an empty lladdr (eg mctpusb) - lladdr = neigh.lladdr if neigh else bytes() + if route.gw is not None: + # In case of gateway routes, we need not have neighbours + # for the gated endpoints, but only need to find the + # gateway's physical address + # TODO: handle recursive gateway and alternate routes + # for downstream endpoints + gw_net, gw_eid = route.gw + gw_route = self.lookup_route(gw_net, gw_eid) + if gw_route is None or gw_route.iface is None: + return None + iface = gw_route.iface + neigh = self.lookup_neighbour(gw_route.iface, gw_eid) + lladdr = neigh.lladdr if neigh else bytes() + else: + iface = route.iface + neigh = self.lookup_neighbour(route.iface, addr.eid) + lladdr = neigh.lladdr if neigh else bytes() if iface is None or lladdr is None: return None diff --git a/tests/test_mctpd.py b/tests/test_mctpd.py index 04d19b83..80aa147d 100644 --- a/tests/test_mctpd.py +++ b/tests/test_mctpd.py @@ -1376,3 +1376,207 @@ async def test_register_vdm_type_support_errors(dbus, mctpd): with pytest.raises(asyncdbus.errors.DBusError) as ex: await mctp.call_register_vdm_type_support(0x00, v_type, 0x0001) assert str(ex.value) == "VDM type already registered" + +""" Test that we use endpoint poll interval from the config and + that we discover bridged endpoints via polling""" +async def test_bridged_endpoint_poll(dbus, sysnet, nursery, autojump_clock): + poll_interval = 2500 + config = f""" + [bus-owner] + endpoint_poll_ms = {poll_interval} + """ + + mctpd = MctpdWrapper(dbus, sysnet, config = config) + await mctpd.start_mctpd(nursery) + + iface = mctpd.system.interfaces[0] + ep = mctpd.network.endpoints[0] + mctp = await mctpd_mctp_iface_obj(dbus, iface) + + bridged_ep = [ + Endpoint(iface, bytes(), types = [0, 1]), + Endpoint(iface, bytes(), types = [0, 1]) + ] + for bep in bridged_ep: + mctpd.network.add_endpoint(bep) + ep.add_bridged_ep(bep) + + (eid, _, path, new) = await mctp.call_assign_endpoint(ep.lladdr) + assert new + + mctp_obj = await dbus.get_proxy_object(MCTPD_C, MCTPD_MCTP_P) + mctp_objmgr = await mctp_obj.get_interface(DBUS_OBJECT_MANAGER_I) + endpoint_added = trio.Semaphore(initial_value=0) + + # We expect two bridged endpoints to be discovered + expected_bridged_eps = len(bridged_ep) + bridged_endpoints_found = [] + + def ep_added(ep_path, content): + if MCTPD_ENDPOINT_I in content: + bridged_endpoints_found.append(ep_path) + endpoint_added.release() + + await mctp_objmgr.on_interfaces_added(ep_added) + + # Wait for all expected bridged endpoints to be discovered + with trio.move_on_after(poll_interval / 1000 * 2) as expected: + for i in range(expected_bridged_eps): + await endpoint_added.acquire() + + # Verify we found all expected bridged endpoints + assert not expected.cancelled_caught, "Timeout waiting for bridged endpoints" + assert len(bridged_endpoints_found) == expected_bridged_eps + + res = await mctpd.stop_mctpd() + assert res == 0 + +""" Test that all downstream endpoints are removed when the bridge + endpoint is removed""" +async def test_bridged_endpoint_remove(dbus, sysnet, nursery, autojump_clock): + poll_interval = 2500 + config = f""" + [bus-owner] + endpoint_poll_ms = {poll_interval} + """ + + mctpd = MctpdWrapper(dbus, sysnet, config = config) + await mctpd.start_mctpd(nursery) + + iface = mctpd.system.interfaces[0] + ep = mctpd.network.endpoints[0] + mctp = await mctpd_mctp_iface_obj(dbus, iface) + + bridged_ep = [ + Endpoint(iface, bytes(), types = [0, 1]), + Endpoint(iface, bytes(), types = [0, 1]) + ] + for bep in bridged_ep: + mctpd.network.add_endpoint(bep) + ep.add_bridged_ep(bep) + + (eid, _, path, new) = await mctp.call_assign_endpoint(ep.lladdr) + assert new + + # Wait for the bridged endpoints to be discovered + await trio.sleep(poll_interval / 1000) + removed = trio.Semaphore(initial_value = 0) + removed_eps = [] + + # Capture the removed endpoints + def ep_removed(ep_path, interfaces): + if MCTPD_ENDPOINT_I in interfaces: + removed.release() + removed_eps.append(ep_path) + + mctp_obj = await dbus.get_proxy_object(MCTPD_C, MCTPD_MCTP_P) + mctp_objmgr = await mctp_obj.get_interface(DBUS_OBJECT_MANAGER_I) + await mctp_objmgr.on_interfaces_removed(ep_removed) + + # Remove the bridge endpoint + bridge_obj = await mctpd_mctp_endpoint_control_obj(dbus, path) + await bridge_obj.call_remove() + + # Assert that all downstream endpoints were removed + assert len(removed_eps) == (len(bridged_ep) + 1) + res = await mctpd.stop_mctpd() + assert res == 0 + +""" Test that polling stops once endponit has been discovered """ +async def test_bridged_endpoint_poll_stop(dbus, sysnet, nursery, autojump_clock): + poll_interval = 2500 + config = f""" + [bus-owner] + endpoint_poll_ms = {poll_interval} + """ + + mctpd = MctpdWrapper(dbus, sysnet, config = config) + await mctpd.start_mctpd(nursery) + + iface = mctpd.system.interfaces[0] + ep = mctpd.network.endpoints[0] + mctp = await mctpd_mctp_iface_obj(dbus, iface) + poll_count = 0 + + class BridgedEndpoint(Endpoint): + async def handle_mctp_control(self, sock, src_addr, msg): + flags, opcode = msg[0:2] + if opcode == 0x2: # Get Endpoint ID + nonlocal poll_count + poll_count += 1 + return await super().handle_mctp_control(sock, src_addr, msg) + + bridged_ep = BridgedEndpoint(iface, bytes(), types = [0, 1]) + mctpd.network.add_endpoint(bridged_ep) + ep.add_bridged_ep(bridged_ep) + + (eid, _, path, new) = await mctp.call_assign_endpoint(ep.lladdr) + assert new + + mctp_obj = await dbus.get_proxy_object(MCTPD_C, MCTPD_MCTP_P) + mctp_objmgr = await mctp_obj.get_interface(DBUS_OBJECT_MANAGER_I) + endpoint_added = trio.Semaphore(initial_value=0) + poll_count_by_discovery = 0 + + def ep_added(ep_path, content): + if MCTPD_ENDPOINT_I in content: + nonlocal poll_count_by_discovery + poll_count_by_discovery = poll_count + endpoint_added.release() + + await mctp_objmgr.on_interfaces_added(ep_added) + + # Wait longer than the poll interval for the bridged endpoint + # to be discovered + await trio.sleep(poll_interval / 1000) + + # We should have only poll until the discovery thus count should + # be the same even after longer wait. + assert poll_count == poll_count_by_discovery + + res = await mctpd.stop_mctpd() + assert res == 0 + +""" Test that polling continues until the endpoint is discovered """ +async def test_bridged_endpoint_poll_continue(dbus, sysnet, nursery, autojump_clock): + poll_interval = 2500 + config = f""" + [bus-owner] + endpoint_poll_ms = {poll_interval} + """ + + mctpd = MctpdWrapper(dbus, sysnet, config = config) + await mctpd.start_mctpd(nursery) + + iface = mctpd.system.interfaces[0] + ep = mctpd.network.endpoints[0] + mctp = await mctpd_mctp_iface_obj(dbus, iface) + poll_count = 0 + + class BridgedEndpoint(Endpoint): + async def handle_mctp_control(self, sock, src_addr, msg): + flags, opcode = msg[0:2] + # dont respond to simiulate device not accessible + # but increment poll count for the Get Endpoint ID + if opcode == 0x2: # Get Endpoint ID + nonlocal poll_count + poll_count += 1 + return None + + bridged_ep = BridgedEndpoint(iface, bytes(), types = [0, 1]) + mctpd.network.add_endpoint(bridged_ep) + ep.add_bridged_ep(bridged_ep) + + (eid, _, path, new) = await mctp.call_assign_endpoint(ep.lladdr) + assert new + + # Wait for sometime to continue polling + await trio.sleep(poll_interval / 1000) + + poll_count_before = poll_count + # Wait more to see if poll count increments + await trio.sleep(1) + assert poll_count > poll_count_before + + res = await mctpd.stop_mctpd() + assert res == 0