From 350b494a7190e65da0b706c29b6f4615b50fe0c0 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Thu, 8 Jan 2026 15:39:02 +0100 Subject: [PATCH 01/16] iface: return an error if vrf_id exceeds limits The vrf_id field was not validated against GR_MAX_VRFS before interface creation. This could lead to out-of-bounds array accesses in the RIB statistics or other VRF-indexed structures. Return EOVERFLOW early to reject invalid VRF identifiers. Signed-off-by: Robin Jarry --- modules/infra/control/iface.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/infra/control/iface.c b/modules/infra/control/iface.c index af08ba1ff..66909a33a 100644 --- a/modules/infra/control/iface.c +++ b/modules/infra/control/iface.c @@ -90,6 +90,10 @@ struct iface *iface_create(const struct gr_iface *conf, const void *api_info) { goto fail; } } + if (conf->vrf_id >= GR_MAX_VRFS) { + errno = EOVERFLOW; + goto fail; + } iface = rte_zmalloc(__func__, sizeof(*iface) + type->priv_size, RTE_CACHE_LINE_SIZE); if (iface == NULL) { From ce2e52416d56d1132a1bc0b7132a27ce4caccdf5 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Thu, 8 Jan 2026 15:40:13 +0100 Subject: [PATCH 02/16] nexthop: return a name for unspec origin The empty string was filtered out by telemetry handlers and would not produce valid dictionary keys. Return "UNSPEC" to make the origin visible in telemetry output. Signed-off-by: Robin Jarry --- modules/infra/api/gr_nexthop.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/infra/api/gr_nexthop.h b/modules/infra/api/gr_nexthop.h index 17774055c..04dcfc787 100644 --- a/modules/infra/api/gr_nexthop.h +++ b/modules/infra/api/gr_nexthop.h @@ -192,7 +192,7 @@ static inline const char *gr_nh_type_name(const gr_nh_type_t type) { static inline const char *gr_nh_origin_name(gr_nh_origin_t origin) { switch (origin) { case GR_NH_ORIGIN_UNSPEC: - return ""; + return "UNSPEC"; case GR_NH_ORIGIN_REDIRECT: return "redirect"; case GR_NH_ORIGIN_LINK: From 0baf9756f06de4a7232b9da776862991e4634ae7 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Tue, 6 Jan 2026 18:16:05 +0100 Subject: [PATCH 03/16] bond: fix mode/algo name helpers These functions must return const strings. Fixes: afef63ae7c60 ("iface: add bonding support with active-backup mode") Fixes: f2b09727b0b8 ("bond: add lacp support") Signed-off-by: Robin Jarry --- modules/infra/api/gr_infra.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/infra/api/gr_infra.h b/modules/infra/api/gr_infra.h index 045b5c787..1b8eb0a59 100644 --- a/modules/infra/api/gr_infra.h +++ b/modules/infra/api/gr_infra.h @@ -130,7 +130,7 @@ typedef enum : uint8_t { GR_BOND_MODE_LACP, } gr_bond_mode_t; -static inline char *gr_bond_mode_name(gr_bond_mode_t mode) { +static inline const char *gr_bond_mode_name(gr_bond_mode_t mode) { switch (mode) { case GR_BOND_MODE_ACTIVE_BACKUP: return "active-backup"; @@ -147,7 +147,7 @@ typedef enum : uint8_t { GR_BOND_ALGO_L3_L4, // Toeplitz hash on IP addresses and TCP/UDP ports. } gr_bond_algo_t; -static inline char *gr_bond_algo_name(gr_bond_algo_t algo) { +static inline const char *gr_bond_algo_name(gr_bond_algo_t algo) { switch (algo) { case GR_BOND_ALGO_RSS: return "rss"; From 74e8d200e15cd0c6ecd2f90e7f71e5175617fc37 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Wed, 14 Jan 2026 17:45:47 +0100 Subject: [PATCH 04/16] nexthop: add validation helpers for enum types Introduce gr_af_valid(), nexthop_type_valid(), and nexthop_origin_valid() functions to centralize validation of address family, nexthop type, and origin enum values. These helpers can now be used both internally and by external callers. Refactor nexthop_af_ops_register() and nexthop_type_ops_register() to use the new helpers instead of inline switch statements. This removes code duplication and improves error messages. Add input validation in nexthop_update() to reject invalid type or origin values with appropriate errno codes before modifying the nexthop. Also add a null pointer check in nexthop_new() for the base and info parameters. Signed-off-by: Robin Jarry --- api/gr_net_types.h | 11 +++ modules/infra/control/gr_nh_control.h | 6 ++ modules/infra/control/nexthop.c | 104 +++++++++++++++++--------- 3 files changed, 86 insertions(+), 35 deletions(-) diff --git a/api/gr_net_types.h b/api/gr_net_types.h index bf857ee95..688da1c2a 100644 --- a/api/gr_net_types.h +++ b/api/gr_net_types.h @@ -42,6 +42,17 @@ static inline const char *gr_af_name(addr_family_t af) { return "?"; } +// Check if address family value is valid. +static inline bool gr_af_valid(addr_family_t af) { + switch (af) { + case GR_AF_UNSPEC: + case GR_AF_IP4: + case GR_AF_IP6: + return true; + } + return false; +} + // Custom printf specifiers for network addresses. // struct rte_ether_addr * diff --git a/modules/infra/control/gr_nh_control.h b/modules/infra/control/gr_nh_control.h index a6ecb4a98..50b7a63bb 100644 --- a/modules/infra/control/gr_nh_control.h +++ b/modules/infra/control/gr_nh_control.h @@ -87,6 +87,12 @@ struct nexthop *nexthop_lookup_by_id(uint32_t nh_id); // Compare two nexthops, return True if the same, else False bool nexthop_equal(const struct nexthop *, const struct nexthop *); +// Check if a nexthop type value is valid. +bool nexthop_type_valid(gr_nh_type_t); + +// Check if an origin value is valid. +bool nexthop_origin_valid(gr_nh_origin_t); + // Allocate a new nexthop from the global pool with the provided initial values. struct nexthop *nexthop_new(const struct gr_nexthop_base *, const void *info); diff --git a/modules/infra/control/nexthop.c b/modules/infra/control/nexthop.c index d1c09d158..134a880ca 100644 --- a/modules/infra/control/nexthop.c +++ b/modules/infra/control/nexthop.c @@ -290,22 +290,7 @@ int nexthop_config_set(const struct gr_nexthop_config *c) { return 0; } -void nexthop_af_ops_register(addr_family_t af, const struct nexthop_af_ops *ops) { - switch (af) { - case GR_AF_UNSPEC: - case GR_AF_IP4: - case GR_AF_IP6: - if (ops == NULL || ops->cleanup_routes == NULL || ops->solicit == NULL) - ABORT("invalid af ops"); - if (af_ops[af] != NULL) - ABORT("duplicate af ops %hhu", af); - af_ops[af] = ops; - return; - } - ABORT("invalid nexthop family %hhu", af); -} - -void nexthop_type_ops_register(gr_nh_type_t type, const struct nexthop_type_ops *ops) { +bool nexthop_type_valid(gr_nh_type_t type) { switch (type) { case GR_NH_T_L3: case GR_NH_T_SR6_OUTPUT: @@ -314,14 +299,68 @@ void nexthop_type_ops_register(gr_nh_type_t type, const struct nexthop_type_ops case GR_NH_T_BLACKHOLE: case GR_NH_T_REJECT: case GR_NH_T_GROUP: - if (ops == NULL) - ABORT("invalid type ops"); - if (type_ops[type] != NULL) - ABORT("duplicate type ops %hhu", type); - type_ops[type] = ops; - return; + return true; } - ABORT("invalid nexthop type %hhu", type); + return false; +} + +bool nexthop_origin_valid(gr_nh_origin_t origin) { + switch (origin) { + case GR_NH_ORIGIN_UNSPEC: + case GR_NH_ORIGIN_REDIRECT: + case GR_NH_ORIGIN_LINK: + case GR_NH_ORIGIN_BOOT: + case GR_NH_ORIGIN_USER: + case GR_NH_ORIGIN_GATED: + case GR_NH_ORIGIN_RA: + case GR_NH_ORIGIN_MRT: + case GR_NH_ORIGIN_ZEBRA: + case GR_NH_ORIGIN_BIRD: + case GR_NH_ORIGIN_DNROUTED: + case GR_NH_ORIGIN_XORP: + case GR_NH_ORIGIN_NTK: + case GR_NH_ORIGIN_DHCP: + case GR_NH_ORIGIN_MROUTED: + case GR_NH_ORIGIN_KEEPALIVED: + case GR_NH_ORIGIN_BABEL: + case GR_NH_ORIGIN_OPENR: + case GR_NH_ORIGIN_BGP: + case GR_NH_ORIGIN_ISIS: + case GR_NH_ORIGIN_OSPF: + case GR_NH_ORIGIN_RIP: + case GR_NH_ORIGIN_RIPNG: + case GR_NH_ORIGIN_NHRP: + case GR_NH_ORIGIN_EIGRP: + case GR_NH_ORIGIN_LDP: + case GR_NH_ORIGIN_SHARP: + case GR_NH_ORIGIN_PBR: + case GR_NH_ORIGIN_ZSTATIC: + case GR_NH_ORIGIN_OPENFABRIC: + case GR_NH_ORIGIN_SRTE: + case GR_NH_ORIGIN_INTERNAL: + return true; + } + return false; +} + +void nexthop_af_ops_register(addr_family_t af, const struct nexthop_af_ops *ops) { + if (!gr_af_valid(af)) + ABORT("invalid af value %hhu", af); + if (ops == NULL || ops->cleanup_routes == NULL || ops->solicit == NULL) + ABORT("invalid af ops"); + if (af_ops[af] != NULL) + ABORT("duplicate af ops %s", gr_af_name(af)); + af_ops[af] = ops; +} + +void nexthop_type_ops_register(gr_nh_type_t type, const struct nexthop_type_ops *ops) { + if (!nexthop_type_valid(type)) + ABORT("invalid type value %hhu", type); + if (ops == NULL) + ABORT("invalid type ops"); + if (type_ops[type] != NULL) + ABORT("duplicate type ops %hhu", type); + type_ops[type] = ops; } struct nexthop *nexthop_new(const struct gr_nexthop_base *base, const void *info) { @@ -329,18 +368,8 @@ struct nexthop *nexthop_new(const struct gr_nexthop_base *base, const void *info void *data; int ret; - switch (base->type) { - case GR_NH_T_L3: - case GR_NH_T_SR6_OUTPUT: - case GR_NH_T_SR6_LOCAL: - case GR_NH_T_DNAT: - case GR_NH_T_BLACKHOLE: - case GR_NH_T_REJECT: - case GR_NH_T_GROUP: - break; - default: - ABORT("invalid nexthop type %hhu", base->type); - } + if (base == NULL || info == NULL) + return errno_set_null(EINVAL); if (rte_lcore_has_role(rte_lcore_id(), ROLE_NON_EAL)) ABORT("nexthop created from datapath thread"); @@ -370,6 +399,11 @@ int nexthop_update(struct nexthop *nh, const struct gr_nexthop_base *base, const struct gr_nexthop_base backup = nh->base; int ret; + if (!nexthop_type_valid(base->type)) + return errno_set(ESOCKTNOSUPPORT); + if (!nexthop_origin_valid(base->origin)) + return errno_set(EPFNOSUPPORT); + nexthop_id_put(nh); if (nh->ref_count > 0 && base->type != nh->type) { From 205bea3c2e719ded66d0db1fc6305bd1773b25de Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Wed, 14 Jan 2026 19:16:11 +0100 Subject: [PATCH 05/16] api: remove string.h include from gr_net_types.h The string.h header is not used in gr_net_types.h itself. Remove it from the public header and add it explicitly to the CLI source files that actually require it. This reduces unnecessary includes in translation units that only need the network type definitions. Signed-off-by: Robin Jarry --- api/gr_net_types.h | 1 - cli/ec_node_dyn.c | 1 + cli/ecoli.c | 1 + cli/exec.c | 1 + cli/interact.c | 1 + modules/infra/cli/affinity.c | 2 ++ modules/infra/cli/bond.c | 1 + modules/infra/cli/iface.c | 1 + modules/infra/cli/nexthop.c | 1 + modules/infra/cli/port.c | 1 + modules/infra/cli/stats.c | 1 + modules/srv6/cli/localsid.c | 1 + 12 files changed, 12 insertions(+), 1 deletion(-) diff --git a/api/gr_net_types.h b/api/gr_net_types.h index 688da1c2a..0aef72e0e 100644 --- a/api/gr_net_types.h +++ b/api/gr_net_types.h @@ -13,7 +13,6 @@ #include #include #include -#include #ifdef __GROUT_MAIN__ #include diff --git a/cli/ec_node_dyn.c b/cli/ec_node_dyn.c index 0050b8a65..19dc4a1cf 100644 --- a/cli/ec_node_dyn.c +++ b/cli/ec_node_dyn.c @@ -8,6 +8,7 @@ #include #include +#include EC_LOG_TYPE_REGISTER(node_dyn); diff --git a/cli/ecoli.c b/cli/ecoli.c index 5363c1790..c7cd6bb51 100644 --- a/cli/ecoli.c +++ b/cli/ecoli.c @@ -7,6 +7,7 @@ #include #include +#include struct ec_node *with_help(const char *help, struct ec_node *node) { if (node == NULL) diff --git a/cli/exec.c b/cli/exec.c index a21f5107a..313a6a2e3 100644 --- a/cli/exec.c +++ b/cli/exec.c @@ -10,6 +10,7 @@ #include #include +#include static STAILQ_HEAD(, cli_context) contexts = STAILQ_HEAD_INITIALIZER(contexts); diff --git a/cli/interact.c b/cli/interact.c index ae69b391b..5894ac5ee 100644 --- a/cli/interact.c +++ b/cli/interact.c @@ -12,6 +12,7 @@ #include #include +#include #include #define __PROMPT "grout#" diff --git a/modules/infra/cli/affinity.c b/modules/infra/cli/affinity.c index 59065bb1e..8fd789b3d 100644 --- a/modules/infra/cli/affinity.c +++ b/modules/infra/cli/affinity.c @@ -11,6 +11,8 @@ #include #include +#include + static cmd_status_t affinity_set(struct gr_api_client *c, const struct ec_pnode *p) { struct gr_infra_cpu_affinity_set_req req = {0}; const char *arg; diff --git a/modules/infra/cli/bond.c b/modules/infra/cli/bond.c index 2233e1fcc..c9f626a1d 100644 --- a/modules/infra/cli/bond.c +++ b/modules/infra/cli/bond.c @@ -12,6 +12,7 @@ #include #include +#include #include static void bond_show(struct gr_api_client *c, const struct gr_iface *iface) { diff --git a/modules/infra/cli/iface.c b/modules/infra/cli/iface.c index c1cf79667..4092a9098 100644 --- a/modules/infra/cli/iface.c +++ b/modules/infra/cli/iface.c @@ -14,6 +14,7 @@ #include #include +#include #include #include diff --git a/modules/infra/cli/nexthop.c b/modules/infra/cli/nexthop.c index 8917ee097..35448f3a4 100644 --- a/modules/infra/cli/nexthop.c +++ b/modules/infra/cli/nexthop.c @@ -15,6 +15,7 @@ #include #include +#include #include static STAILQ_HEAD(, cli_nexthop_formatter) formatters = STAILQ_HEAD_INITIALIZER(formatters); diff --git a/modules/infra/cli/port.c b/modules/infra/cli/port.c index 5d14116a9..5bed6d823 100644 --- a/modules/infra/cli/port.c +++ b/modules/infra/cli/port.c @@ -12,6 +12,7 @@ #include #include +#include #include static void port_show(struct gr_api_client *c, const struct gr_iface *iface) { diff --git a/modules/infra/cli/stats.c b/modules/infra/cli/stats.c index 03ce34e8b..44f7cd07b 100644 --- a/modules/infra/cli/stats.c +++ b/modules/infra/cli/stats.c @@ -11,6 +11,7 @@ #include #include +#include #include static int stats_order_name(const void *sa, const void *sb) { diff --git a/modules/srv6/cli/localsid.c b/modules/srv6/cli/localsid.c index e0a8657da..43678948a 100644 --- a/modules/srv6/cli/localsid.c +++ b/modules/srv6/cli/localsid.c @@ -13,6 +13,7 @@ #include #include +#include static struct { gr_srv6_behavior_t behavior; From cc563d067b6cd81340d17a6a1a711b23b218a834 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Thu, 8 Jan 2026 14:20:48 +0100 Subject: [PATCH 06/16] treewide: remove DPDK telemetry endpoints The DPDK telemetry library requires a python script to export metrics in prometheus format. This adds complexity and an external dependency that is hard to package and maintain. Remove all rte_telemetry_register_cmd() calls and their associated handler functions. Disable the DPDK telemetry socket by passing --no-telemetry to rte_eal_init(). Remove the lcore usage callback registration as well. Remove the now unused nexthop and RIB statistics. Drop the grout-prometheus package from both RPM and Debian packaging since it only contained the DPDK telemetry exporter script. A builtin openmetrics exporter will be added in following commits. Signed-off-by: Robin Jarry --- Containerfile.grout | 2 +- GNUmakefile | 2 - debian/control | 20 --- debian/grout-prometheus.install | 2 - debian/rules | 4 - main/dpdk.c | 1 + modules/infra/api/stats.c | 173 -------------------------- modules/infra/control/gr_nh_control.h | 9 -- modules/infra/control/nexthop.c | 36 ------ modules/infra/control/worker.c | 18 --- modules/ip/control/gr_ip4_control.h | 8 -- modules/ip/control/route.c | 67 ---------- modules/ip6/control/gr_ip6_control.h | 8 -- modules/ip6/control/route.c | 72 +---------- rpm/grout.spec | 17 --- 15 files changed, 3 insertions(+), 436 deletions(-) delete mode 100644 debian/grout-prometheus.install diff --git a/Containerfile.grout b/Containerfile.grout index a852037fd..267cb0780 100644 --- a/Containerfile.grout +++ b/Containerfile.grout @@ -2,7 +2,7 @@ # Copyright (c) 2025 Christophe Fontaine FROM registry.access.redhat.com/ubi9 as ubi-builder -COPY grout.*.rpm grout-prometheus.*.rpm /tmp +COPY grout.*.rpm /tmp RUN mkdir -p /tmp/null RUN dnf -y install --nodocs --setopt=install_weak_deps=0 --releasever 9 --installroot /tmp/null /tmp/grout*.rpm RUN dnf -y --installroot /tmp/null clean all diff --git a/GNUmakefile b/GNUmakefile index 5a95b376a..e44c30bc5 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -80,7 +80,6 @@ deb: GROUT_VERSION='$(debversion)' dpkg-buildpackage -b $Q arch=`dpkg-architecture -qDEB_HOST_ARCH` && \ mv -vf ../grout-headers_$(debversion)_all.deb grout-headers_all.deb && \ - mv -vf ../grout-prometheus_$(debversion)_all.deb grout-prometheus_all.deb && \ mv -vf ../grout_$(debversion)_$$arch.deb grout_$$arch.deb && \ mv -vf ../grout-dbgsym_$(debversion)_$$arch.deb grout-dbgsym_$$arch.deb && \ mv -vf ../grout-frr_$(debversion)_$$arch.deb grout-frr_$$arch.deb && \ @@ -96,7 +95,6 @@ rpm: $Q arch=`rpm --eval '%{_arch}'` && \ version="$(rpmversion)-$(rpmrelease)" && \ mv -vf ~/rpmbuild/RPMS/noarch/grout-headers-$$version.noarch.rpm grout-headers.noarch.rpm && \ - mv -vf ~/rpmbuild/RPMS/noarch/grout-prometheus-$$version.noarch.rpm grout-prometheus.noarch.rpm && \ mv -vf ~/rpmbuild/RPMS/$$arch/grout-$$version.$$arch.rpm grout.$$arch.rpm && \ mv -vf ~/rpmbuild/RPMS/$$arch/grout-debuginfo-$$version.$$arch.rpm grout-debuginfo.$$arch.rpm && \ mv -vf ~/rpmbuild/RPMS/$$arch/grout-frr-$$version.$$arch.rpm grout-frr.$$arch.rpm && \ diff --git a/debian/control b/debian/control index 8db82f3cb..ef0e6a5a6 100644 --- a/debian/control +++ b/debian/control @@ -71,26 +71,6 @@ Description: API headers for grout clients a CLI that uses that library. The CLI can be used as an interactive shell, but also in scripts one command at a time, or by batches. -Package: grout-prometheus -Architecture: all -Depends: - ${misc:Depends}, - python3, -Description: Prometheus exporter for grout - grout stands for Graph Router. In English, "grout" refers to thin mortar that - hardens to fill gaps between tiles. - . - grout is a DPDK based network processing application. It uses the rte_graph - library for data path processing. - . - Its main purpose is to simulate a network function or a physical router for - testing/replicating real (usually closed source) VNF/CNF behavior with an - opensource tool. - . - It comes with a client library to configure it over a standard UNIX socket and - a CLI that uses that library. The CLI can be used as an interactive shell, but - also in scripts one command at a time, or by batches. - Package: grout-frr Architecture: linux-any Depends: diff --git a/debian/grout-prometheus.install b/debian/grout-prometheus.install deleted file mode 100644 index 366fd833a..000000000 --- a/debian/grout-prometheus.install +++ /dev/null @@ -1,2 +0,0 @@ -/usr/bin/grout-telemetry-exporter -/usr/share/dpdk/telemetry-endpoints/* diff --git a/debian/rules b/debian/rules index 51793cdff..e5d6be5b7 100755 --- a/debian/rules +++ b/debian/rules @@ -22,10 +22,6 @@ override_dh_auto_configure: override_dh_auto_install: meson install -C $(build) --skip-subprojects --destdir=$(dest) - install -D -m 0755 subprojects/dpdk/usertools/dpdk-telemetry-exporter.py \ - $(dest)/usr/bin/grout-telemetry-exporter - install -D -m 0644 -t $(dest)/usr/share/dpdk/telemetry-endpoints \ - subprojects/dpdk/usertools/telemetry-endpoints/* override_dh_installsystemd: dh_installsystemd --no-start --no-stop-on-upgrade diff --git a/main/dpdk.c b/main/dpdk.c index b7ed3f231..9a59324e8 100644 --- a/main/dpdk.c +++ b/main/dpdk.c @@ -137,6 +137,7 @@ int dpdk_init(void) { gr_vec_add(eal_args, ""); gr_vec_add(eal_args, "-l"); gr_vec_add(eal_args, main_lcore); + gr_vec_add(eal_args, "--no-telemetry"); #ifdef RTE_BUS_PCI gr_vec_add(eal_args, "-a"); gr_vec_add(eal_args, "pci:0000:00:00.0"); diff --git a/modules/infra/api/stats.c b/modules/infra/api/stats.c index 87433f334..d25ceef11 100644 --- a/modules/infra/api/stats.c +++ b/modules/infra/api/stats.c @@ -12,7 +12,6 @@ #include #include -#include #include @@ -316,168 +315,6 @@ static struct api_out iface_stats_get(const void * /*request*/, struct api_ctx * return api_out(-ret, 0, NULL); } -static int -telemetry_sw_stats_get(const char * /*cmd*/, const char * /*params*/, struct rte_tel_data *d) { - gr_vec struct gr_infra_stat *stats = graph_stats(UINT16_MAX); - struct gr_infra_stat *s; - - rte_tel_data_start_dict(d); - - gr_vec_foreach_ref (s, stats) { - if (s->batches > 0) { - struct rte_tel_data *val = rte_tel_data_alloc(); - if (val == NULL) { - goto err; - } - rte_tel_data_start_dict(val); - rte_tel_data_add_dict_uint(val, "packets", s->packets); - rte_tel_data_add_dict_uint(val, "batches", s->batches); - rte_tel_data_add_dict_uint(val, "cycles", s->cycles); - if (rte_tel_data_add_dict_container(d, s->name, val, 0) != 0) { - rte_tel_data_free(val); - goto err; - } - } - } - - gr_vec_free(stats); - return 0; -err: - gr_vec_free(stats); - return -1; -} - -static int -telemetry_ifaces_info_get(const char * /*cmd*/, const char * /*params*/, struct rte_tel_data *d) { - struct iface *iface = NULL; - - rte_tel_data_start_dict(d); - - while ((iface = iface_next(GR_IFACE_TYPE_UNDEF, iface)) != NULL) { - if (iface->type != GR_IFACE_TYPE_LOOPBACK) { - struct rte_tel_data *iface_container = rte_tel_data_alloc(); - if (iface_container == NULL) { - goto err; - } - rte_tel_data_start_dict(iface_container); - - rte_tel_data_add_dict_string(iface_container, "name", iface->name); - rte_tel_data_add_dict_uint(iface_container, "id", iface->id); - rte_tel_data_add_dict_string( - iface_container, "type", gr_iface_type_name(iface->type) - ); - rte_tel_data_add_dict_uint(iface_container, "mtu", iface->mtu); - - struct rte_tel_data *flags_array = rte_tel_data_alloc(); - if (flags_array == NULL) { - rte_tel_data_free(iface_container); - goto err; - } - rte_tel_data_start_array(flags_array, RTE_TEL_STRING_VAL); - if (iface->flags & GR_IFACE_F_UP) - rte_tel_data_add_array_string(flags_array, "up"); - if (iface->state & GR_IFACE_S_RUNNING) - rte_tel_data_add_array_string(flags_array, "running"); - rte_tel_data_add_dict_container(iface_container, "flags", flags_array, 0); - - rte_tel_data_add_dict_string( - iface_container, "mode", gr_iface_mode_name(iface->mode) - ); - rte_tel_data_add_dict_uint(iface_container, "vrf_id", iface->vrf_id); - - struct rte_tel_data *stats_container = rte_tel_data_alloc(); - if (stats_container == NULL) { - rte_tel_data_free(iface_container); - goto err; - } - rte_tel_data_start_dict(stats_container); - - // Software stats - uint64_t rx_pkts = 0, rx_bytes = 0, tx_pkts = 0, tx_bytes = 0; - uint64_t cp_rx_pkts = 0, cp_rx_bytes = 0, cp_tx_pkts = 0, cp_tx_bytes = 0; - for (int i = 0; i < RTE_MAX_LCORE; i++) { - struct iface_stats *sw_stats = iface_get_stats(i, iface->id); - rx_pkts += sw_stats->rx_packets; - rx_bytes += sw_stats->rx_bytes; - tx_pkts += sw_stats->tx_packets; - tx_bytes += sw_stats->tx_bytes; - cp_rx_pkts += sw_stats->cp_rx_packets; - cp_rx_bytes += sw_stats->cp_rx_bytes; - cp_tx_pkts += sw_stats->cp_tx_packets; - cp_tx_bytes += sw_stats->cp_tx_bytes; - } - rte_tel_data_add_dict_uint(stats_container, "rx_packets", rx_pkts); - rte_tel_data_add_dict_uint(stats_container, "rx_bytes", rx_bytes); - rte_tel_data_add_dict_uint(stats_container, "tx_packets", tx_pkts); - rte_tel_data_add_dict_uint(stats_container, "tx_bytes", tx_bytes); - rte_tel_data_add_dict_uint(stats_container, "cp_rx_packets", cp_rx_pkts); - rte_tel_data_add_dict_uint(stats_container, "cp_rx_bytes", cp_rx_bytes); - rte_tel_data_add_dict_uint(stats_container, "cp_tx_packets", cp_tx_pkts); - rte_tel_data_add_dict_uint(stats_container, "cp_tx_bytes", cp_tx_bytes); - - // Get hardware stats for physical ports. - if (iface->type == GR_IFACE_TYPE_PORT) { - struct iface_info_port *port = iface_info_port(iface); - - struct rte_eth_stats eth_stats; - if (rte_eth_stats_get(port->port_id, ð_stats) == 0) { - rte_tel_data_add_dict_uint( - stats_container, "rx_missed", eth_stats.imissed - ); - rte_tel_data_add_dict_uint( - stats_container, "tx_errors", eth_stats.oerrors - ); - } - - int ret = rte_eth_xstats_get(port->port_id, NULL, 0); - if (ret > 0) { - unsigned num = ret; - struct rte_eth_xstat *xstats = calloc(num, sizeof(*xstats)); - struct rte_eth_xstat_name *names = calloc( - num, sizeof(*names) - ); - if (xstats != NULL && names != NULL - && rte_eth_xstats_get_names(port->port_id, names, num) - == (int)num - && rte_eth_xstats_get(port->port_id, xstats, num) - == (int)num) { - for (unsigned i = 0; i < num; i++) { - if (xstats[i].value > 0) { - rte_tel_data_add_dict_uint( - stats_container, - names[i].name, - xstats[i].value - ); - } - } - } - free(xstats); - free(names); - } - - if (rte_tel_data_add_dict_container( - iface_container, "statistics", stats_container, 0 - ) - != 0) { - rte_tel_data_free(stats_container); - rte_tel_data_free(iface_container); - goto err; - } - } - - if (rte_tel_data_add_dict_container(d, iface->name, iface_container, 0) - != 0) { - rte_tel_data_free(iface_container); - goto err; - } - } - } - return 0; - -err: - return -1; -} - static struct gr_api_handler stats_get_handler = { .name = "stats get", .request_type = GR_INFRA_STATS_GET, @@ -500,14 +337,4 @@ RTE_INIT(infra_stats_init) { gr_register_api_handler(&stats_get_handler); gr_register_api_handler(&stats_reset_handler); gr_register_api_handler(&iface_stats_get_handler); - rte_telemetry_register_cmd( - "/grout/stats/graph", - telemetry_sw_stats_get, - "Returns statistics of each graph node. No parameters" - ); - rte_telemetry_register_cmd( - "/grout/iface", - telemetry_ifaces_info_get, - "Returns information per interface. No parameters" - ); } diff --git a/modules/infra/control/gr_nh_control.h b/modules/infra/control/gr_nh_control.h index 50b7a63bb..6950b3030 100644 --- a/modules/infra/control/gr_nh_control.h +++ b/modules/infra/control/gr_nh_control.h @@ -149,14 +149,5 @@ struct nexthop_type_ops { void nexthop_type_ops_register(gr_nh_type_t type, const struct nexthop_type_ops *); -// Nexthop statistics structure -struct nh_stats { - uint32_t total; - uint32_t by_type[UINT_NUM_VALUES(gr_nh_type_t)]; -}; - -// Get nexthop statistics. -const struct nh_stats *nexthop_get_stats(void); - // Local IP address nexthops will have these flags set. #define NH_LOCAL_ADDR_FLAGS (GR_NH_F_LOCAL | GR_NH_F_LINK | GR_NH_F_STATIC) diff --git a/modules/infra/control/nexthop.c b/modules/infra/control/nexthop.c index 134a880ca..b3a7102d3 100644 --- a/modules/infra/control/nexthop.c +++ b/modules/infra/control/nexthop.c @@ -15,7 +15,6 @@ #include #include #include -#include #include @@ -33,7 +32,6 @@ static struct rte_hash *hash_by_id; static struct event *ageing_timer; static const struct nexthop_af_ops *af_ops[256]; static const struct nexthop_type_ops *type_ops[256]; -static struct nh_stats nh_stats; struct gr_nexthop_config nh_conf = { .max_count = DEFAULT_MAX_COUNT, @@ -385,9 +383,6 @@ struct nexthop *nexthop_new(const struct gr_nexthop_base *base, const void *info return errno_set_null(-ret); } - nh_stats.total++; - nh_stats.by_type[nh->type]++; - if (nh->origin != GR_NH_ORIGIN_INTERNAL) gr_event_push(GR_EVENT_NEXTHOP_NEW, nh); @@ -406,12 +401,6 @@ int nexthop_update(struct nexthop *nh, const struct gr_nexthop_base *base, const nexthop_id_put(nh); - if (nh->ref_count > 0 && base->type != nh->type) { - assert(nh_stats.by_type[nh->type] > 0); - nh_stats.by_type[nh->type]--; - nh_stats.by_type[base->type]++; - } - // Copy base fields nh->base = *base; @@ -591,11 +580,6 @@ void nexthop_destroy(struct nexthop *nh) { if (nh->origin != GR_NH_ORIGIN_INTERNAL) gr_event_push(GR_EVENT_NEXTHOP_DELETE, nh); - assert(nh_stats.total > 0); - nh_stats.total--; - assert(nh_stats.by_type[nh->type] > 0); - nh_stats.by_type[nh->type]--; - const struct nexthop_type_ops *ops = type_ops[nh->type]; if (ops != NULL && ops->free != NULL) ops->free(nh); @@ -673,10 +657,6 @@ static void do_ageing(evutil_socket_t, short /*what*/, void * /*priv*/) { nexthop_iter(nexthop_ageing_cb, NULL); } -const struct nh_stats *nexthop_get_stats(void) { - return &nh_stats; -} - static void nh_init(struct event_base *ev_base) { pool = create_mempool(&nh_conf); if (pool == NULL) @@ -740,19 +720,6 @@ static struct gr_module module = { .fini = nh_fini, }; -static int -telemetry_nexthop_stats_get(const char * /*cmd*/, const char * /*params*/, struct rte_tel_data *d) { - rte_tel_data_start_dict(d); - - rte_tel_data_add_dict_uint(d, "total", nh_stats.total); - for (unsigned t = 0; t < ARRAY_DIM(nh_stats.by_type); t++) { - if (nh_stats.by_type[t] > 0) - rte_tel_data_add_dict_uint(d, gr_nh_type_name(t), nh_stats.by_type[t]); - } - - return 0; -} - static void l3_free(struct nexthop *nh) { struct nexthop_info_l3 *l3 = nexthop_info_l3(nh); @@ -1058,9 +1025,6 @@ static struct nexthop_type_ops group_nh_ops = { RTE_INIT(init) { gr_event_register_serializer(&nh_serializer); gr_register_module(&module); - rte_telemetry_register_cmd( - "/grout/nexthop/stats", telemetry_nexthop_stats_get, "Get nexthop statistics" - ); nexthop_type_ops_register(GR_NH_T_L3, &l3_nh_ops); nexthop_type_ops_register(GR_NH_T_GROUP, &group_nh_ops); } diff --git a/modules/infra/control/worker.c b/modules/infra/control/worker.c index daae8547d..04ad14e69 100644 --- a/modules/infra/control/worker.c +++ b/modules/infra/control/worker.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -423,24 +422,7 @@ int worker_queue_distribute(const cpu_set_t *affinity, gr_vec struct iface_info_ return ret; } -static int lcore_usage_cb(unsigned int lcore_id, struct rte_lcore_usage *usage) { - const struct worker_stats *stats; - struct worker *worker; - STAILQ_FOREACH (worker, &workers, next) { - if (worker->lcore_id == lcore_id) { - stats = atomic_load(&worker->stats); - if (stats == NULL) - return -EIO; - usage->busy_cycles = stats->busy_cycles; - usage->total_cycles = stats->total_cycles; - return 0; - } - } - return -ENODEV; -} - static void worker_init(struct event_base *) { - rte_lcore_register_usage_cb(lcore_usage_cb); if (worker_queue_distribute(&gr_config.datapath_cpus, NULL) < 0) ABORT("initial worker start failed"); } diff --git a/modules/ip/control/gr_ip4_control.h b/modules/ip/control/gr_ip4_control.h index bba02131e..6da0d60cc 100644 --- a/modules/ip/control/gr_ip4_control.h +++ b/modules/ip/control/gr_ip4_control.h @@ -50,11 +50,3 @@ void rib4_iter(uint16_t vrf_id, rib4_iter_cb_t cb, void *priv); struct nexthop *addr4_get_preferred(uint16_t iface_id, ip4_addr_t dst); // get all addresses for a given interface struct hoplist *addr4_get_all(uint16_t iface_id); - -struct rib4_stats { - uint32_t total_routes; - uint32_t by_origin[UINT_NUM_VALUES(gr_nh_origin_t)]; -}; - -// Get route stats for IPv4 (exposed for telemetry) -const struct rib4_stats *rib4_get_stats(uint16_t vrf_id); diff --git a/modules/ip/control/route.c b/modules/ip/control/route.c index feab77d0c..3a7dfcd99 100644 --- a/modules/ip/control/route.c +++ b/modules/ip/control/route.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -29,7 +28,6 @@ #include static struct rte_rib **vrf_ribs; -static struct rib4_stats stats[GR_MAX_VRFS]; static struct rte_rib_conf rib_conf = { .ext_sz = sizeof(gr_nh_origin_t), @@ -171,9 +169,6 @@ static int rib4_insert_or_replace( rte_rib_set_nh(rn, nh_ptr_to_id(nh)); o = rte_rib_get_ext(rn); - gr_nh_origin_t old_origin = origin; - if (existing) - old_origin = *o; *o = origin; fib4_insert(vrf_id, ip, prefixlen, nh); if (origin != GR_NH_ORIGIN_INTERNAL) { @@ -190,12 +185,7 @@ static int rib4_insert_or_replace( if (existing) { nexthop_decref(existing); - assert(stats[vrf_id].by_origin[old_origin] > 0); - stats[vrf_id].by_origin[old_origin]--; - } else { - stats[vrf_id].total_routes++; } - stats[vrf_id].by_origin[origin]++; return 0; fail: @@ -248,11 +238,6 @@ int rib4_delete(uint16_t vrf_id, ip4_addr_t ip, uint8_t prefixlen, gr_nh_type_t } ); } - // Update statistics - assert(stats[vrf_id].total_routes > 0); - stats[vrf_id].total_routes--; - assert(stats[vrf_id].by_origin[origin] > 0); - stats[vrf_id].by_origin[origin]--; nexthop_decref(nh); @@ -423,7 +408,6 @@ static struct api_out route4_list(const void *request, struct api_ctx *ctx) { } static void route4_init(struct event_base *) { - memset(stats, 0, sizeof(stats)); vrf_ribs = rte_calloc(__func__, GR_MAX_VRFS, sizeof(struct rte_rib *), RTE_CACHE_LINE_SIZE); if (vrf_ribs == NULL) ABORT("rte_calloc(vrf_ribs): %s", rte_strerror(rte_errno)); @@ -457,54 +441,6 @@ void rib4_cleanup(struct nexthop *nh) { rib4_iter(GR_VRF_ID_ALL, rib4_cleanup_nh, nh); } -const struct rib4_stats *rib4_get_stats(uint16_t vrf_id) { - if (vrf_id >= GR_MAX_VRFS) - return NULL; - return &stats[vrf_id]; -} - -static int -telemetry_rib4_stats_get(const char * /*cmd*/, const char * /*params*/, struct rte_tel_data *d) { - rte_tel_data_start_dict(d); - - for (uint16_t vrf_id = 0; vrf_id < GR_MAX_VRFS; vrf_id++) { - const struct rib4_stats *vrf_stats = rib4_get_stats(vrf_id); - - if (vrf_id != 0 && (vrf_stats == NULL || vrf_stats->total_routes == 0)) - continue; - - char vrf_key[32]; - snprintf(vrf_key, sizeof(vrf_key), "%u", vrf_id); - - struct rte_tel_data *vrf_data = rte_tel_data_alloc(); - if (vrf_data == NULL) - continue; - - rte_tel_data_start_dict(vrf_data); - rte_tel_data_add_dict_uint(vrf_data, "vrf_id", vrf_id); - - struct rte_tel_data *ipv4_data = rte_tel_data_alloc(); - if (ipv4_data != NULL) { - rte_tel_data_start_dict(ipv4_data); - rte_tel_data_add_dict_uint(ipv4_data, "total", vrf_stats->total_routes); - for (unsigned o = 0; o < ARRAY_DIM(vrf_stats->by_origin); o++) { - const char *name = gr_nh_origin_name(o); - uint32_t count = vrf_stats->by_origin[o]; - if (count > 0 && strcmp(name, "") != 0 && strcmp(name, "?") != 0) - rte_tel_data_add_dict_uint(ipv4_data, name, count); - } - rte_tel_data_add_dict_container(vrf_data, "ipv4", ipv4_data, 1); - } - - if (rte_tel_data_add_dict_container(d, vrf_key, vrf_data, 0) != 0) { - rte_tel_data_free(vrf_data); - continue; - } - } - - return 0; -} - static int serialize_route4_event(const void *obj, void **buf) { const struct route4_event *priv = obj; struct gr_ip4_route *r; @@ -574,7 +510,4 @@ RTE_INIT(control_ip_init) { gr_register_api_handler(&route4_list_handler); gr_event_register_serializer(&route_serializer); gr_register_module(&route4_module); - rte_telemetry_register_cmd( - "/grout/rib4/stats", telemetry_rib4_stats_get, "Get IPv4 RIB statistics" - ); } diff --git a/modules/ip6/control/gr_ip6_control.h b/modules/ip6/control/gr_ip6_control.h index a9bc21561..7ab4c54eb 100644 --- a/modules/ip6/control/gr_ip6_control.h +++ b/modules/ip6/control/gr_ip6_control.h @@ -67,11 +67,3 @@ struct nexthop *addr6_get_linklocal(uint16_t iface_id); struct hoplist *addr6_get_all(uint16_t iface_id); // determine if the given interface is member of the provided multicast address group struct nexthop *mcast6_get_member(uint16_t iface_id, const struct rte_ipv6_addr *mcast); - -struct rib6_stats { - uint32_t total_routes; - uint32_t by_origin[UINT_NUM_VALUES(gr_nh_origin_t)]; -}; - -// Get route stats for IPv6 -const struct rib6_stats *rib6_get_stats(uint16_t vrf_id); diff --git a/modules/ip6/control/route.c b/modules/ip6/control/route.c index 5c890dace..b1000c50f 100644 --- a/modules/ip6/control/route.c +++ b/modules/ip6/control/route.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -28,7 +27,6 @@ #include static struct rte_rib6 **vrf_ribs; -static struct rib6_stats stats[GR_MAX_VRFS]; static struct rte_rib6_conf rib6_conf = { .ext_sz = sizeof(gr_nh_origin_t), @@ -185,9 +183,6 @@ static int rib6_insert_or_replace( rte_rib6_set_nh(rn, nh_ptr_to_id(nh)); o = rte_rib6_get_ext(rn); - gr_nh_origin_t old_origin = origin; - if (existing) - old_origin = *o; *o = origin; fib6_insert(vrf_id, iface_id, scoped_ip, prefixlen, nh); if (origin != GR_NH_ORIGIN_INTERNAL) { @@ -202,14 +197,8 @@ static int rib6_insert_or_replace( ); } - if (existing) { + if (existing) nexthop_decref(existing); - assert(stats[vrf_id].by_origin[old_origin] > 0); - stats[vrf_id].by_origin[old_origin]--; - } else { - stats[vrf_id].total_routes++; - } - stats[vrf_id].by_origin[origin]++; return 0; fail: @@ -272,11 +261,6 @@ int rib6_delete( } ); } - // Update statistics - assert(stats[vrf_id].total_routes > 0); - stats[vrf_id].total_routes--; - assert(stats[vrf_id].by_origin[origin] > 0); - stats[vrf_id].by_origin[origin]--; nexthop_decref(nh); @@ -461,9 +445,6 @@ static struct api_out route6_list(const void *request, struct api_ctx *ctx) { } static void route6_init(struct event_base *) { - // Initialize statistics arrays to zero - memset(stats, 0, sizeof(stats)); - vrf_ribs = rte_calloc( __func__, GR_MAX_VRFS, sizeof(struct rte_rib6 *), RTE_CACHE_LINE_SIZE ); @@ -499,54 +480,6 @@ void rib6_cleanup(struct nexthop *nh) { rib6_iter(GR_VRF_ID_ALL, rib6_cleanup_nh, nh); } -const struct rib6_stats *rib6_get_stats(uint16_t vrf_id) { - if (vrf_id >= GR_MAX_VRFS) - return NULL; - return &stats[vrf_id]; -} - -static int -telemetry_rib6_stats_get(const char * /*cmd*/, const char * /*params*/, struct rte_tel_data *d) { - rte_tel_data_start_dict(d); - - for (uint16_t vrf_id = 0; vrf_id < GR_MAX_VRFS; vrf_id++) { - const struct rib6_stats *vrf_stats = rib6_get_stats(vrf_id); - - if (vrf_id != 0 && (vrf_stats == NULL || vrf_stats->total_routes == 0)) - continue; - - char vrf_key[32]; - snprintf(vrf_key, sizeof(vrf_key), "%u", vrf_id); - - struct rte_tel_data *vrf_data = rte_tel_data_alloc(); - if (vrf_data == NULL) - continue; - - rte_tel_data_start_dict(vrf_data); - rte_tel_data_add_dict_uint(vrf_data, "vrf_id", vrf_id); - - struct rte_tel_data *ipv6_data = rte_tel_data_alloc(); - if (ipv6_data != NULL) { - rte_tel_data_start_dict(ipv6_data); - rte_tel_data_add_dict_uint(ipv6_data, "total", vrf_stats->total_routes); - for (unsigned o = 0; o < ARRAY_DIM(vrf_stats->by_origin); o++) { - uint32_t count = vrf_stats->by_origin[o]; - const char *name = gr_nh_origin_name(o); - if (count > 0 && strcmp(name, "") != 0 && strcmp(name, "?") != 0) - rte_tel_data_add_dict_uint(ipv6_data, name, count); - } - rte_tel_data_add_dict_container(vrf_data, "ipv6", ipv6_data, 1); - } - - if (rte_tel_data_add_dict_container(d, vrf_key, vrf_data, 0) != 0) { - rte_tel_data_free(vrf_data); - continue; - } - } - - return 0; -} - static int serialize_route6_event(const void *obj, void **buf) { const struct route6_event *priv = obj; struct gr_ip6_route *r; @@ -616,7 +549,4 @@ RTE_INIT(control_ip_init) { gr_register_api_handler(&route6_list_handler); gr_event_register_serializer(&route6_serializer); gr_register_module(&route6_module); - rte_telemetry_register_cmd( - "/grout/rib6/stats", telemetry_rib6_stats_get, "Get IPv6 RIB statistics" - ); } diff --git a/rpm/grout.spec b/rpm/grout.spec index 69edf8842..afbc87829 100644 --- a/rpm/grout.spec +++ b/rpm/grout.spec @@ -60,14 +60,6 @@ Suggests: %{name} %description headers This package contains the development headers to build grout API clients. -%package prometheus -Summary: Prometheus exporter for DPDK/grout -BuildArch: noarch -Requires: python3 - -%description prometheus -Prometheus exporter for grout. - %package frr Summary: FRR dplane plugin for grout Requires: frr = %(sed -n "s/revision = frr-//p" subprojects/frr.wrap) @@ -82,9 +74,6 @@ FRR dplane plugin for grout %install %meson_install --skip-subprojects -install -D -m 0755 subprojects/dpdk/usertools/dpdk-telemetry-exporter.py %{buildroot}%{_bindir}/grout-telemetry-exporter -install -D -m 0644 -t %{buildroot}%{_datadir}/dpdk/telemetry-endpoints subprojects/dpdk/usertools/telemetry-endpoints/* - %post %systemd_post %{name}.service @@ -110,12 +99,6 @@ install -D -m 0644 -t %{buildroot}%{_datadir}/dpdk/telemetry-endpoints subprojec %{_datadir}/pkgconfig/grout.pc %{_includedir}/grout/gr_*.h -%files prometheus -%doc README.md -%license licenses/BSD-3-clause.txt -%attr(755, root, root) %{_bindir}/grout-telemetry-exporter -%attr(644, root, root) %{_datadir}/dpdk/telemetry-endpoints/* - %files frr %doc README.md %license licenses/GPL-2.0-or-later.txt From 926ba85c6b526caf3d8af95a829134ab3b68f21e Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Mon, 12 Jan 2026 22:50:06 +0100 Subject: [PATCH 07/16] infra: move stats aggregation to worker module Move the graph_stats() function from the API layer to the worker module and rename it to worker_dump_stats(). This allows other components to aggregate per-worker node statistics without going through the API. The function will be used by the openmetrics exporter to collect graph node metrics. Signed-off-by: Robin Jarry --- modules/infra/api/stats.c | 78 +------------------------------ modules/infra/control/gr_worker.h | 1 + modules/infra/control/worker.c | 76 ++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 77 deletions(-) diff --git a/modules/infra/api/stats.c b/modules/infra/api/stats.c index d25ceef11..237eb0294 100644 --- a/modules/infra/api/stats.c +++ b/modules/infra/api/stats.c @@ -15,82 +15,6 @@ #include -static struct gr_infra_stat *find_stat(gr_vec struct gr_infra_stat *stats, const char *name) { - struct gr_infra_stat *s; - - gr_vec_foreach_ref (s, stats) { - if (strncmp(s->name, name, sizeof(s->name)) == 0) - return s; - } - - return errno_set_null(ENOENT); -} - -static gr_vec struct gr_infra_stat *graph_stats(uint16_t cpu_id) { - uint64_t loop_cycles = 0, node_cycles = 0, n_loops = 0, pkts = 0; - gr_vec struct gr_infra_stat *stats = NULL; - struct gr_infra_stat *s; - struct worker *worker; - - STAILQ_FOREACH (worker, &workers, next) { - const struct worker_stats *w_stats = atomic_load(&worker->stats); - if (w_stats == NULL) - continue; - if (cpu_id != UINT16_MAX && worker->cpu_id != cpu_id) - continue; - for (unsigned i = 0; i < w_stats->n_stats; i++) { - const struct node_stats *n = &w_stats->stats[i]; - const char *name = rte_node_id_to_name(n->node_id); - s = find_stat(stats, name); - if (s != NULL) { - s->packets += n->packets; - s->batches += n->batches; - s->cycles += n->cycles; - } else { - struct gr_infra_stat stat = { - .packets = n->packets, - .batches = n->batches, - .cycles = n->cycles, - .topo_order = n->topo_order, - }; - memccpy(stat.name, name, 0, sizeof(stat.name)); - gr_vec_add(stats, stat); - } - if (strncmp(name, "port_rx-", strlen("port_rx-")) == 0 - || strcmp(name, "control_input") == 0) - pkts += n->packets; - node_cycles += n->cycles; - } - s = find_stat(stats, "idle"); - if (s != NULL) { - s->batches += w_stats->n_sleeps; - s->cycles += w_stats->sleep_cycles; - } else { - struct gr_infra_stat stat = { - .packets = 0, - .batches = w_stats->n_sleeps, - .cycles = w_stats->sleep_cycles, - .topo_order = UINT64_MAX, - }; - memccpy(stat.name, "idle", 0, sizeof(stat.name)); - gr_vec_add(stats, stat); - } - loop_cycles += w_stats->loop_cycles - w_stats->sleep_cycles; - n_loops += w_stats->n_loops; - } - - struct gr_infra_stat stat = { - .packets = pkts, - .batches = n_loops, - .cycles = loop_cycles - node_cycles, - .topo_order = UINT64_MAX - 1, - }; - memccpy(stat.name, "overhead", 0, sizeof(stat.name)); - gr_vec_add(stats, stat); - - return stats; -} - static bool skip_stat(const struct gr_infra_stat *s, gr_infra_stats_flags_t flags) { if (flags & GR_INFRA_STAT_F_ZERO) return false; @@ -115,7 +39,7 @@ static struct api_out stats_get(const void *request, struct api_ctx *) { int ret; if (req->flags & GR_INFRA_STAT_F_SW) { - stats = graph_stats(req->cpu_id); + stats = worker_dump_stats(req->cpu_id); if (stats == NULL && req->cpu_id != UINT16_MAX) return api_out(ENODEV, 0, NULL); } diff --git a/modules/infra/control/gr_worker.h b/modules/infra/control/gr_worker.h index 0bcc789f7..8e4e9f08d 100644 --- a/modules/infra/control/gr_worker.h +++ b/modules/infra/control/gr_worker.h @@ -78,3 +78,4 @@ int worker_rxq_assign(uint16_t port_id, uint16_t rxq_id, uint16_t cpu_id); int worker_queue_distribute(const cpu_set_t *affinity, gr_vec struct iface_info_port **ports); void worker_wait_ready(struct worker *); void worker_signal_ready(struct worker *); +gr_vec struct gr_infra_stat *worker_dump_stats(uint16_t cpu_id); diff --git a/modules/infra/control/worker.c b/modules/infra/control/worker.c index 04ad14e69..869fad4c4 100644 --- a/modules/infra/control/worker.c +++ b/modules/infra/control/worker.c @@ -422,6 +422,82 @@ int worker_queue_distribute(const cpu_set_t *affinity, gr_vec struct iface_info_ return ret; } +static struct gr_infra_stat *find_stat(gr_vec struct gr_infra_stat *stats, const char *name) { + struct gr_infra_stat *s; + + gr_vec_foreach_ref (s, stats) { + if (strncmp(s->name, name, sizeof(s->name)) == 0) + return s; + } + + return errno_set_null(ENOENT); +} + +gr_vec struct gr_infra_stat *worker_dump_stats(uint16_t cpu_id) { + uint64_t loop_cycles = 0, node_cycles = 0, n_loops = 0, pkts = 0; + gr_vec struct gr_infra_stat *stats = NULL; + struct gr_infra_stat *s; + struct worker *worker; + + STAILQ_FOREACH (worker, &workers, next) { + const struct worker_stats *w_stats = atomic_load(&worker->stats); + if (w_stats == NULL) + continue; + if (cpu_id != UINT16_MAX && worker->cpu_id != cpu_id) + continue; + for (unsigned i = 0; i < w_stats->n_stats; i++) { + const struct node_stats *n = &w_stats->stats[i]; + const char *name = rte_node_id_to_name(n->node_id); + s = find_stat(stats, name); + if (s != NULL) { + s->packets += n->packets; + s->batches += n->batches; + s->cycles += n->cycles; + } else { + struct gr_infra_stat stat = { + .packets = n->packets, + .batches = n->batches, + .cycles = n->cycles, + .topo_order = n->topo_order, + }; + memccpy(stat.name, name, 0, sizeof(stat.name)); + gr_vec_add(stats, stat); + } + if (strncmp(name, "port_rx-", strlen("port_rx-")) == 0 + || strcmp(name, "control_input") == 0) + pkts += n->packets; + node_cycles += n->cycles; + } + s = find_stat(stats, "idle"); + if (s != NULL) { + s->batches += w_stats->n_sleeps; + s->cycles += w_stats->sleep_cycles; + } else { + struct gr_infra_stat stat = { + .packets = 0, + .batches = w_stats->n_sleeps, + .cycles = w_stats->sleep_cycles, + .topo_order = UINT64_MAX, + }; + memccpy(stat.name, "idle", 0, sizeof(stat.name)); + gr_vec_add(stats, stat); + } + loop_cycles += w_stats->loop_cycles - w_stats->sleep_cycles; + n_loops += w_stats->n_loops; + } + + struct gr_infra_stat stat = { + .packets = pkts, + .batches = n_loops, + .cycles = loop_cycles - node_cycles, + .topo_order = UINT64_MAX - 1, + }; + memccpy(stat.name, "overhead", 0, sizeof(stat.name)); + gr_vec_add(stats, stat); + + return stats; +} + static void worker_init(struct event_base *) { if (worker_queue_distribute(&gr_config.datapath_cpus, NULL) < 0) ABORT("initial worker start failed"); From 6730dbbf5ca43b2e98383175bc0e38e9aca5cae4 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Thu, 15 Jan 2026 13:10:05 +0100 Subject: [PATCH 08/16] main: allow arbitrary extra EAL arguments Instead of white listing which EAL arguments we support, do not pollute grout command line flags with them and allow users to specify any EAL argument after grout options and a "--" marker. Clearly mention that this feature is for debugging/development purposes. Signed-off-by: Robin Jarry --- docs/grout.8.scdoc | 73 +++++----------------------------------------- main/main.c | 45 +++++----------------------- 2 files changed, 14 insertions(+), 104 deletions(-) diff --git a/docs/grout.8.scdoc b/docs/grout.8.scdoc index 702da6d1e..26a5e18e9 100644 --- a/docs/grout.8.scdoc +++ b/docs/grout.8.scdoc @@ -11,12 +11,7 @@ Grout is a software router based on DPDK _rte_graph_. # SYNOPSIS *grout* -\[*-B* _SIZE_] -\[*-D* _PATH_] -\[*-L* _TYPE_:_LEVEL_] -\[*-M* _MODE_] \[*-S*] -\[*-T* _REGEXP_] \[*-V*] \[*-h*] \[*-m* _PERMISSIONS_] @@ -27,54 +22,13 @@ Grout is a software router based on DPDK _rte_graph_. \[*-u* _MTU_] \[*-v*] \[*-x*] +\[*--* _EAL ARGS_...] # OPTIONS -*-B*, *--trace-bufsz* _SIZE_ - Specify maximum size of allocated memory for trace output for each - thread. Valid unit can be either B or K or M for Bytes, KBytes and - MBytes respectively. For example: - - ``` - --trace-bufsz 2M - ``` - - By default, size of trace output file is 1MB and parameter must be - specified once only. - -*-D*, *--trace-dir* _PATH_ - Specify trace directory for trace output. For example: - - ``` - --trace-dir /tmp - ``` - - By default, trace output will created at home directory and parameter - must be specified once only. - *-h*, *--help* Display usage help. -*-L*, *--log-level* _TYPE_:_LEVEL_ - Specify log level for a specific component. For example: - - ``` - --log-level lib.eal:debug - ``` - - Can be specified multiple times. - -*-M*, *--trace-mode* _o_|_overwrite_|_d_|_discard_ - Specify the mode of update of trace output file. Either update on a file - can be wrapped or discarded when file size reaches its maximum limit. - For example: - - ``` - --trace-mode discard - ``` - - Default mode is _overwrite_ and parameter must be specified once only. - *-m*, *--socket-mode* _PERMISSIONS_ Change the API socket file permissions after creating it. Only octal values are supported. @@ -101,25 +55,6 @@ Grout is a software router based on DPDK _rte_graph_. *-t*, *--test-mode* Run in test mode (no huge pages). -*-T*, *--trace* _REGEXP_ - Enable trace based on regular expression trace name. By default, the - trace is disabled. User must specify this option to enable trace. For - example: - - Global trace configuration for EAL only: - - ``` - --trace eal - ``` - - Global trace configuration for ALL the components: - - ``` - --trace ".*" - ``` - - Can be specified multiple times up to 32 times. - *-u*, *--max-mtu* _MTU_ Maximum Transmission Unit. @@ -134,6 +69,12 @@ Grout is a software router based on DPDK _rte_graph_. *-x*, *--trace-packets* Print all ingress/egress packets (for debugging purposes). +# EXTRA EAL ARGUMENTS + +Any DPDK EAL argument can be specified after an optional *--*. This is an +advanced debugging/development feature that can break things. Use with great +care. + # SEE ALSO *grcli*(1) diff --git a/main/main.c b/main/main.c index a1922e57a..b8285970b 100644 --- a/main/main.c +++ b/main/main.c @@ -33,34 +33,25 @@ static void usage(void) { printf("Usage: grout"); - printf(" [-B SIZE]"); - printf(" [-D PATH]"); - printf(" [-L TYPE:LEVEL]"); - printf(" [-M MODE]"); printf(" [-S]"); - printf(" [-T REGEXP]"); printf(" [-V]"); printf(" [-h]"); - printf("\n "); printf(" [-m PERMISSIONS]"); printf(" [-o USER:GROUP]"); + printf("\n "); printf(" [-p]"); printf(" [-s PATH]"); printf(" [-t]"); printf(" [-u MTU]"); printf(" [-v]"); printf(" [-x]"); + printf(" [-- EAL ARGS...]"); puts(""); puts(""); printf(" Graph router version %s (%s).\n", GROUT_VERSION, rte_version()); puts(""); puts("options:"); - puts(" -B, --trace-bufsz SIZE Maximum size of allocated memory for trace output."); - puts(" -D, --trace-dir PATH Change path for trace output."); - puts(" -L, --log-level TYPE:LEVEL Specify log level for a specific component."); - puts(" -M, --trace-mode MODE Specify the mode of update of trace output file."); puts(" -S, --syslog Redirect logs to syslog."); - puts(" -T, --trace REGEXO Enable trace matching the regular expression."); puts(" -V, --version Print version and exit."); puts(" -h, --help Display this help message and exit."); puts(" -m, --socket-mode PERMISSIONS API socket file permissions (Default: 0660)."); @@ -73,6 +64,7 @@ static void usage(void) { puts(" -u, --max-mtu MTU Maximum Transmission Unit (default 1800)."); puts(" -v, --verbose Increase verbosity."); puts(" -x, --trace-packets Print all ingress/egress packets."); + puts(" EAL ARGS... Extra DPDK EAL arguments. Use with care."); } static int perr(const char *fmt, ...) { @@ -144,10 +136,9 @@ static int parse_sock_owner(char *user_group_str) { static int parse_args(int argc, char **argv) { int c; -#define FLAGS ":B:D:L:M:T:Vhm:o:pSs:tu:vx" +#define FLAGS ":Vhm:o:pSs:tu:vx" static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, - {"log-level", required_argument, NULL, 'L'}, {"max-mtu", required_argument, NULL, 'u'}, {"poll-mode", no_argument, NULL, 'p'}, {"socket", required_argument, NULL, 's'}, @@ -155,10 +146,6 @@ static int parse_args(int argc, char **argv) { {"socket-owner", required_argument, NULL, 'o'}, {"syslog", no_argument, NULL, 'S'}, {"test-mode", no_argument, NULL, 't'}, - {"trace", required_argument, NULL, 'T'}, - {"trace-bufsz", required_argument, NULL, 'B'}, - {"trace-dir", required_argument, NULL, 'D'}, - {"trace-mode", required_argument, NULL, 'M'}, {"trace-packets", no_argument, NULL, 'x'}, {"verbose", no_argument, NULL, 'v'}, {"version", no_argument, NULL, 'V'}, @@ -183,10 +170,6 @@ static int parse_args(int argc, char **argv) { usage(); exit(EXIT_SUCCESS); break; - case 'L': - gr_vec_add(gr_config.eal_extra_args, "--log-level"); - gr_vec_add(gr_config.eal_extra_args, optarg); - break; case 'm': if (parse_uint(&gr_config.api_sock_mode, optarg, 8, 0, 07777) < 0) return perr("--socket-mode: %s", strerror(errno)); @@ -207,18 +190,6 @@ static int parse_args(int argc, char **argv) { case 't': gr_config.test_mode = true; break; - case 'T': - gr_vec_add(gr_config.eal_extra_args, "--trace"); - gr_vec_add(gr_config.eal_extra_args, optarg); - break; - case 'D': - gr_vec_add(gr_config.eal_extra_args, "--trace-dir"); - gr_vec_add(gr_config.eal_extra_args, optarg); - break; - case 'M': - gr_vec_add(gr_config.eal_extra_args, "--trace-mode"); - gr_vec_add(gr_config.eal_extra_args, optarg); - break; case 'x': gr_config.log_packets = true; break; @@ -237,13 +208,11 @@ static int parse_args(int argc, char **argv) { return perr("-%c requires a value", optopt); case '?': return perr("-%c unknown option", optopt); - default: - goto end; } } -end: - if (optind < argc) - return perr("invalid arguments"); + + for (c = optind; c < argc; c++) + gr_vec_add(gr_config.eal_extra_args, argv[c]); return 0; } From c8f563fcda957451643e77c41b7fc6a2cc86e629 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Mon, 12 Jan 2026 23:15:16 +0100 Subject: [PATCH 09/16] main: add builtin openmetrics exporter Implement an openmetrics exporter using libevent HTTP server. The exporter listens on 127.0.0.1:9111 by default and can be configured via the -M/--metrics option or disabled with -M :0. The HTTP server runs in a dedicated thread with its own event loop, keeping it isolated from the main control plane. The thread inherits control_cpus affinity and can be updated at runtime via the affinity API. The implementation provides a collector registration mechanism where modules can register callbacks to emit their metrics. The API uses a context structure to manage labels and emit metric values with proper HELP and TYPE annotations in the openmetrics exposition format. Startup failures (bind errors, resource allocation) are logged but do not prevent grout from starting. Signed-off-by: Robin Jarry --- docs/grout.8.scdoc | 7 ++ main/gr_config.h | 2 + main/gr_metrics.h | 65 +++++++++++ main/main.c | 37 +++++- main/meson.build | 1 + main/metrics.c | 219 +++++++++++++++++++++++++++++++++++ meson.build | 3 +- modules/infra/api/affinity.c | 5 + 8 files changed, 337 insertions(+), 2 deletions(-) create mode 100644 main/gr_metrics.h create mode 100644 main/metrics.c diff --git a/docs/grout.8.scdoc b/docs/grout.8.scdoc index 26a5e18e9..c8955d49b 100644 --- a/docs/grout.8.scdoc +++ b/docs/grout.8.scdoc @@ -11,6 +11,7 @@ Grout is a software router based on DPDK _rte_graph_. # SYNOPSIS *grout* +\[*-M* _ADDR_:_PORT_] \[*-S*] \[*-V*] \[*-h*] @@ -29,6 +30,12 @@ Grout is a software router based on DPDK _rte_graph_. *-h*, *--help* Display usage help. +*-M*, *--metrics* _ADDR_:_PORT_ + Change the listen address and port where openmetrics will be exported + via HTTP GET in a dedicated thread. To disable, use *-M* _:0_. + + Default: _127.0.0.1:9111_ + *-m*, *--socket-mode* _PERMISSIONS_ Change the API socket file permissions after creating it. Only octal values are supported. diff --git a/main/gr_config.h b/main/gr_config.h index 50429316f..30b0a390a 100644 --- a/main/gr_config.h +++ b/main/gr_config.h @@ -24,6 +24,8 @@ struct gr_config { gr_vec char **eal_extra_args; cpu_set_t control_cpus; // control plane threads allowed CPUs cpu_set_t datapath_cpus; // datapath threads allowed CPUs + const char *metrics_addr; // openmetrics listen address (NULL to disable) + uint16_t metrics_port; // openmetrics listen port (0 to disable) }; extern struct gr_config gr_config; diff --git a/main/gr_metrics.h b/main/gr_metrics.h new file mode 100644 index 000000000..de2724847 --- /dev/null +++ b/main/gr_metrics.h @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#pragma once + +#include +#include +#include +#include +#include + +enum gr_metric_type { + GR_METRIC_COUNTER, + GR_METRIC_GAUGE, +}; + +// Metric definition (static, registered once per metric name) +struct gr_metric { + const char *name; // will be prefixed with "grout_" + const char *help; + enum gr_metric_type type; +}; + +// Convenience macros for static metric definitions. +#define METRIC_COUNTER(v, n, h) \ + static const struct gr_metric v = {.name = (n), .help = (h), .type = GR_METRIC_COUNTER} +#define METRIC_GAUGE(v, n, h) \ + static const struct gr_metric v = {.name = (n), .help = (h), .type = GR_METRIC_GAUGE} + +// Opaque writer context +struct gr_metrics_writer; + +// Label context - holds base labels for multiple emit calls +struct gr_metrics_ctx { + struct gr_metrics_writer *w; + char labels[512]; + size_t labels_len; +}; + +// Initialize context with base labels (varargs: key, val, ..., NULL) +void gr_metrics_ctx_init(struct gr_metrics_ctx *, struct gr_metrics_writer *, ...); + +// Add more labels to existing context (varargs: key, val, ..., NULL) +void gr_metrics_labels_add(struct gr_metrics_ctx *, ...); + +// Emit metric value using context's labels +void gr_metric_emit(struct gr_metrics_ctx *, const struct gr_metric *, uint64_t value); + +// Collector registration (groups related metrics + callback) +struct gr_metrics_collector { + const char *name; + void (*collect)(struct gr_metrics_writer *); + STAILQ_ENTRY(gr_metrics_collector) next; +}; + +void gr_metrics_register(struct gr_metrics_collector *); + +// Start the openmetrics HTTP server in a dedicated thread +void gr_metrics_start(void); + +// Stop the openmetrics HTTP server +void gr_metrics_stop(void); + +// Change the thread affinity of the openmetrics thread +int gr_metrics_set_affinity(size_t set_size, const cpu_set_t *affinity); diff --git a/main/main.c b/main/main.c index b8285970b..28756f711 100644 --- a/main/main.c +++ b/main/main.c @@ -3,6 +3,7 @@ #include "api.h" #include "dpdk.h" +#include "gr_metrics.h" #include "module.h" #include "sd_notify.h" #include "signals.h" @@ -33,6 +34,7 @@ static void usage(void) { printf("Usage: grout"); + printf(" [-M ADDR:PORT]"); printf(" [-S]"); printf(" [-V]"); printf(" [-h]"); @@ -51,6 +53,8 @@ static void usage(void) { printf(" Graph router version %s (%s).\n", GROUT_VERSION, rte_version()); puts(""); puts("options:"); + puts(" -M, --metrics ADDR:PORT Serve openmetrics on http://ADDR:PORT"); + puts(" (default http://127.0.0.1:9111)."); puts(" -S, --syslog Redirect logs to syslog."); puts(" -V, --version Print version and exit."); puts(" -h, --help Display this help message and exit."); @@ -101,6 +105,27 @@ parse_uint(unsigned int *v, const char *s, uint8_t base, unsigned long min, unsi struct gr_config gr_config; +static int parse_metrics_addr(char *addr_port_str) { + char *port_str; + char *colon; + unsigned port; + + colon = strrchr(addr_port_str, ':'); + if (!colon) + return perr("--metrics: missing ':' (expected ADDR:PORT)"); + + *colon = '\0'; + port_str = colon + 1; + + if (parse_uint(&port, port_str, 10, 0, 65535) < 0) + return perr("--metrics: invalid port: %s", strerror(errno)); + + gr_config.metrics_addr = addr_port_str; + gr_config.metrics_port = port; + + return 0; +} + static int parse_sock_owner(char *user_group_str) { char *group_str, *user_str = user_group_str; struct passwd *pw; @@ -136,10 +161,11 @@ static int parse_sock_owner(char *user_group_str) { static int parse_args(int argc, char **argv) { int c; -#define FLAGS ":Vhm:o:pSs:tu:vx" +#define FLAGS ":M:Vhm:o:pSs:tu:vx" static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"max-mtu", required_argument, NULL, 'u'}, + {"metrics", required_argument, NULL, 'M'}, {"poll-mode", no_argument, NULL, 'p'}, {"socket", required_argument, NULL, 's'}, {"socket-mode", required_argument, NULL, 'm'}, @@ -163,6 +189,8 @@ static int parse_args(int argc, char **argv) { gr_config.max_mtu = 1800; gr_config.log_level = RTE_LOG_NOTICE; gr_config.eal_extra_args = NULL; + gr_config.metrics_addr = "127.0.0.1"; + gr_config.metrics_port = 9111; while ((c = getopt_long(argc, argv, FLAGS, long_options, NULL)) != -1) { switch (c) { @@ -181,6 +209,10 @@ static int parse_args(int argc, char **argv) { case 'p': gr_config.poll_mode = true; break; + case 'M': + if (parse_metrics_addr(optarg) < 0) + return errno_set(EINVAL); + break; case 'S': gr_config.log_syslog = true; break; @@ -271,6 +303,8 @@ int main(int argc, char **argv) { goto shutdown; } + gr_metrics_start(); + if (register_signals(ev_base) < 0) { err = errno; goto shutdown; @@ -290,6 +324,7 @@ int main(int argc, char **argv) { shutdown: unregister_signals(); + gr_metrics_stop(); if (ev_base) { api_socket_stop(ev_base); modules_fini(ev_base); diff --git a/main/meson.build b/main/meson.build index 5f5eeff53..db0b458d5 100644 --- a/main/meson.build +++ b/main/meson.build @@ -6,6 +6,7 @@ src += files( 'dpdk.c', 'event.c', 'main.c', + 'metrics.c', 'module.c', 'sd_notify.c', 'signals.c', diff --git a/main/metrics.c b/main/metrics.c new file mode 100644 index 000000000..86c8755f6 --- /dev/null +++ b/main/metrics.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#include "gr_metrics.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +static STAILQ_HEAD(, gr_metrics_collector) collectors = STAILQ_HEAD_INITIALIZER(collectors); + +void gr_metrics_register(struct gr_metrics_collector *c) { + STAILQ_INSERT_TAIL(&collectors, c, next); +} + +struct gr_metrics_writer { + struct evbuffer *buf; + // Pointers to static metrics that have had HELP/TYPE written. + gr_vec const struct gr_metric **emitted; +}; + +static bool metric_emitted(const struct gr_metrics_writer *w, const struct gr_metric *m) { + gr_vec_foreach (const struct gr_metric *e, w->emitted) { + if (e == m) + return true; + } + return false; +} + +static void emit_help_type(struct gr_metrics_writer *w, const struct gr_metric *m) { + const char *type_str; + + if (metric_emitted(w, m)) + return; + + switch (m->type) { + case GR_METRIC_COUNTER: + type_str = "counter"; + break; + case GR_METRIC_GAUGE: + type_str = "gauge"; + break; + default: + ABORT("unsupported metric type %u", m->type); + } + + evbuffer_add_printf(w->buf, "# HELP grout_%s %s\n", m->name, m->help); + evbuffer_add_printf(w->buf, "# TYPE grout_%s %s\n", m->name, type_str); + gr_vec_add(w->emitted, m); +} + +static void append_labels_va(struct gr_metrics_ctx *ctx, va_list ap) { + const size_t len = sizeof(ctx->labels) - ctx->labels_len; + char *buf = ctx->labels + ctx->labels_len; + const char *key, *val; + size_t n = 0; + + for (key = va_arg(ap, const char *); key != NULL; key = va_arg(ap, const char *)) { + val = va_arg(ap, const char *); + + if (n > 0 || ctx->labels_len > 0) + SAFE_BUF(snprintf, len, ","); + + SAFE_BUF(snprintf, len, "%s=\"%s\"", key, val ?: ""); + } + + ctx->labels_len += n; + + return; +err: + LOG(ERR, "snprintf: %s", strerror(errno)); +} + +void gr_metrics_ctx_init(struct gr_metrics_ctx *ctx, struct gr_metrics_writer *w, ...) { + va_list ap; + + ctx->w = w; + ctx->labels_len = 0; + ctx->labels[0] = '\0'; + + va_start(ap, w); + append_labels_va(ctx, ap); + va_end(ap); +} + +void gr_metrics_labels_add(struct gr_metrics_ctx *ctx, ...) { + va_list ap; + + va_start(ap, ctx); + append_labels_va(ctx, ap); + va_end(ap); +} + +void gr_metric_emit(struct gr_metrics_ctx *ctx, const struct gr_metric *m, uint64_t value) { + emit_help_type(ctx->w, m); + evbuffer_add_printf(ctx->w->buf, "grout_%s{%s} %lu\n", m->name, ctx->labels, value); +} + +static void metrics_handler(struct evhttp_request *req, void *) { + if (gr_config.log_level >= RTE_LOG_DEBUG) { + struct evhttp_connection *conn = evhttp_request_get_connection(req); + char *peer_addr = NULL; + uint16_t peer_port = 0; + if (conn != NULL) + evhttp_connection_get_peer(conn, &peer_addr, &peer_port); + LOG(DEBUG, "GET %s - %s:%u", evhttp_request_get_uri(req), peer_addr, peer_port); + } + + struct gr_metrics_writer writer = { + .buf = evbuffer_new(), + .emitted = NULL, + }; + if (writer.buf == NULL) { + LOG(ERR, "evbuffer_new: %s", strerror(errno)); + evhttp_send_error(req, HTTP_INTERNAL, "Internal error"); + return; + } + + struct gr_metrics_collector *col; + STAILQ_FOREACH (col, &collectors, next) + col->collect(&writer); + + evhttp_send_reply(req, HTTP_OK, NULL, writer.buf); + + gr_vec_free(writer.emitted); + evbuffer_free(writer.buf); +} + +static struct event_base *ev_base; +static pthread_t thread_id; + +int gr_metrics_set_affinity(size_t set_size, const cpu_set_t *affinity) { + if (thread_id == 0) + return 0; + return pthread_setaffinity_np(thread_id, set_size, affinity); +} + +static void *metrics_thread(void *) { + const char *addr = gr_config.metrics_addr; + const uint16_t port = gr_config.metrics_port; + struct evhttp *http = NULL; + + pthread_setname_np(pthread_self(), "grout:metrics"); + pthread_setaffinity_np( + pthread_self(), sizeof(gr_config.control_cpus), &gr_config.control_cpus + ); + + ev_base = event_base_new(); + if (ev_base == NULL) { + errno = errno ?: ENOMEM; + LOG(ERR, "event_base_new: %s", strerror(errno)); + return NULL; + } + + http = evhttp_new(ev_base); + if (http == NULL) { + errno = errno ?: ENOMEM; + LOG(ERR, "evhttp_new: %s", strerror(errno)); + goto end; + } + + evhttp_set_max_headers_size(http, 4096); + evhttp_set_max_body_size(http, 0); + evhttp_set_allowed_methods(http, EVHTTP_REQ_GET); + evhttp_set_gencb(http, metrics_handler, NULL); + evhttp_set_default_content_type(http, "text/plain; version=0.0.4; charset=utf-8"); + + errno = 0; + if (evhttp_bind_socket(http, addr, port) < 0) { + errno = errno ?: EADDRNOTAVAIL; + LOG(ERR, "bind http://%s:%u/: %s", addr, port, strerror(errno)); + goto end; + } + + LOG(NOTICE, "openmetrics exporter listening on http://%s:%u/", addr, port); + + if (event_base_dispatch(ev_base) < 0) { + errno = errno ?: EIO; + LOG(ERR, "event_base_dispatch: %s", strerror(errno)); + } + +end: + if (http != NULL) + evhttp_free(http); + event_base_free(ev_base); + ev_base = NULL; + + return NULL; +} + +void gr_metrics_start(void) { + if (gr_config.metrics_addr != NULL && gr_config.metrics_port != 0) { + if (pthread_create(&thread_id, NULL, metrics_thread, NULL) != 0) { + LOG(ERR, "pthread_create: %s", strerror(errno)); + thread_id = 0; + } + } else { + LOG(INFO, "openmetrics exporter disabled"); + } +} + +void gr_metrics_stop(void) { + if (ev_base != NULL) + event_base_loopbreak(ev_base); + if (thread_id != 0) { + pthread_join(thread_id, NULL); + thread_id = 0; + } +} diff --git a/meson.build b/meson.build index b7e4970c9..b2075b6b3 100644 --- a/meson.build +++ b/meson.build @@ -74,6 +74,7 @@ dpdk_dep = dependency( ) ev_core_dep = dependency('libevent_core') +ev_extra_dep = dependency('libevent_extra') ev_thread_dep = dependency('libevent_pthreads') mnl_dep = dependency('libmnl') numa_dep = dependency('numa') @@ -112,7 +113,7 @@ subdir('frr') grout_exe = executable( 'grout', src, include_directories: inc + api_inc, - dependencies: [dpdk_dep, ev_core_dep, ev_thread_dep, mnl_dep, numa_dep], + dependencies: [dpdk_dep, ev_core_dep, ev_extra_dep, ev_thread_dep, mnl_dep, numa_dep], c_args: ['-D__GROUT_MAIN__'], install: true, ) diff --git a/modules/infra/api/affinity.c b/modules/infra/api/affinity.c index ee9ab6533..97dd45c06 100644 --- a/modules/infra/api/affinity.c +++ b/modules/infra/api/affinity.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,10 @@ static struct api_out affinity_set(const void *request, struct api_ctx *) { if (ret < 0) goto out; + ret = -gr_metrics_set_affinity(CPU_SETSIZE, &req->control_cpus); + if (ret < 0) + goto out; + gr_config.control_cpus = req->control_cpus; } if (CPU_COUNT(&req->datapath_cpus) > 0) { From 3217f1e693a469435350983778442f26a75a2dea Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Mon, 12 Jan 2026 23:15:57 +0100 Subject: [PATCH 10/16] metrics: export interface metrics Register a metrics collector for interfaces including administrative and operational state, MTU, promiscuous mode, and packet/byte counters for both datapath and control plane. The interface statistics are aggregated across all CPU cores. Extend the iface_type structure with an optional metrics_collect callback to allow interface types to export type-specific metrics. Implement this callback for port interfaces to expose queue configuration and hardware statistics such as missed packets and TX errors. Signed-off-by: Robin Jarry --- modules/infra/api/iface.c | 78 +++++++++++++++++++++++++++++ modules/infra/control/gr_iface.h | 2 + modules/infra/control/port.c | 30 +++++++++++ modules/infra/control/port_test.c | 3 ++ modules/infra/control/worker_test.c | 3 ++ 5 files changed, 116 insertions(+) diff --git a/modules/infra/api/iface.c b/modules/infra/api/iface.c index 834069f6e..108921abb 100644 --- a/modules/infra/api/iface.c +++ b/modules/infra/api/iface.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -175,6 +176,82 @@ static struct gr_event_serializer iface_serializer = { }, }; +METRIC_GAUGE(m_up, "iface_up", "Interface administrative state."); +METRIC_GAUGE(m_running, "iface_running", "Interface operational state."); +METRIC_GAUGE(m_mtu, "iface_mtu", "Interface maximum transmission unit."); +METRIC_GAUGE(m_promisc, "iface_promisc", "Interface promiscuous mode."); +METRIC_COUNTER(m_rx_packets, "iface_rx_packets", "Number of received packets."); +METRIC_COUNTER(m_rx_bytes, "iface_rx_bytes", "Number of received bytes."); +METRIC_COUNTER(m_tx_packets, "iface_tx_packets", "Number of transmitted packets."); +METRIC_COUNTER(m_tx_bytes, "iface_tx_bytes", "Number of transmitted bytes."); +METRIC_COUNTER( + m_cp_rx_packets, + "iface_cp_rx_packets", + "Number of packets received by control plane." +); +METRIC_COUNTER(m_cp_rx_bytes, "iface_cp_rx_bytes", "Number of bytes received by control plane."); +METRIC_COUNTER( + m_cp_tx_packets, + "iface_cp_tx_packets", + "Number of packets transmitted by control plane." +); +METRIC_COUNTER(m_cp_tx_bytes, "iface_cp_tx_bytes", "Number of bytes transmitted by control plane."); + +static void iface_metrics_collect(struct gr_metrics_writer *w) { + struct iface *iface = NULL; + struct gr_metrics_ctx ctx; + char vrf[16]; + + while ((iface = iface_next(GR_IFACE_TYPE_UNDEF, iface)) != NULL) { + const struct iface_type *type = iface_type_get(iface->type); + + snprintf(vrf, sizeof(vrf), "%u", iface->vrf_id); + + gr_metrics_ctx_init( + &ctx, w, "name", iface->name, "type", type->name, "vrf", vrf, NULL + ); + + gr_metric_emit(&ctx, &m_up, !!(iface->flags & GR_IFACE_F_UP)); + gr_metric_emit(&ctx, &m_running, !!(iface->state & GR_IFACE_S_RUNNING)); + gr_metric_emit(&ctx, &m_mtu, iface->mtu); + gr_metric_emit(&ctx, &m_promisc, !!(iface->flags & GR_IFACE_F_PROMISC)); + + // Aggregate per-core stats + uint64_t rx_pkts = 0, rx_bytes = 0, tx_pkts = 0, tx_bytes = 0; + uint64_t cp_rx_pkts = 0, cp_rx_bytes = 0, cp_tx_pkts = 0, cp_tx_bytes = 0; + + for (int i = 0; i < RTE_MAX_LCORE; i++) { + struct iface_stats *s = iface_get_stats(i, iface->id); + rx_pkts += s->rx_packets; + rx_bytes += s->rx_bytes; + tx_pkts += s->tx_packets; + tx_bytes += s->tx_bytes; + cp_rx_pkts += s->cp_rx_packets; + cp_rx_bytes += s->cp_rx_bytes; + cp_tx_pkts += s->cp_tx_packets; + cp_tx_bytes += s->cp_tx_bytes; + } + + gr_metric_emit(&ctx, &m_rx_packets, rx_pkts); + gr_metric_emit(&ctx, &m_rx_bytes, rx_bytes); + gr_metric_emit(&ctx, &m_tx_packets, tx_pkts); + gr_metric_emit(&ctx, &m_tx_bytes, tx_bytes); + gr_metric_emit(&ctx, &m_cp_rx_packets, cp_rx_pkts); + gr_metric_emit(&ctx, &m_cp_rx_bytes, cp_rx_bytes); + gr_metric_emit(&ctx, &m_cp_tx_packets, cp_tx_pkts); + gr_metric_emit(&ctx, &m_cp_tx_bytes, cp_tx_bytes); + + // Dispatch to type-specific collector + if (type->metrics_collect != NULL) + type->metrics_collect(&ctx, iface); + } +} + +static struct gr_metrics_collector iface_collector = { + .name = "iface", + .collect = iface_metrics_collect, +}; + RTE_INIT(infra_api_init) { gr_register_api_handler(&iface_add_handler); gr_register_api_handler(&iface_del_handler); @@ -182,4 +259,5 @@ RTE_INIT(infra_api_init) { gr_register_api_handler(&iface_list_handler); gr_register_api_handler(&iface_set_handler); gr_event_register_serializer(&iface_serializer); + gr_metrics_register(&iface_collector); } diff --git a/modules/infra/control/gr_iface.h b/modules/infra/control/gr_iface.h index ca3b11303..20680e98a 100644 --- a/modules/infra/control/gr_iface.h +++ b/modules/infra/control/gr_iface.h @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -53,6 +54,7 @@ struct iface_type { int (*set_mtu)(struct iface *, uint16_t mtu); int (*set_promisc)(struct iface *, bool enabled); void (*to_api)(void *api_info, const struct iface *); + void (*metrics_collect)(struct gr_metrics_ctx *, const struct iface *); const char *name; STAILQ_ENTRY(iface_type) next; }; diff --git a/modules/infra/control/port.c b/modules/infra/control/port.c index 5823c5505..abcd5d0ab 100644 --- a/modules/infra/control/port.c +++ b/modules/infra/control/port.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -597,6 +598,34 @@ static void port_to_api(void *info, const struct iface *iface) { } } +METRIC_GAUGE(m_rxqs, "iface_port_rxqs", "Number of RX queues."); +METRIC_GAUGE(m_txqs, "iface_port_txqs", "Number of TX queues."); +METRIC_GAUGE(m_rxq_size, "iface_port_rxq_size", "Number of descriptors in RX queues."); +METRIC_GAUGE(m_txq_size, "iface_port_txq_size", "Number of descriptors in TX queues."); +METRIC_COUNTER(m_rx_missed, "iface_port_rx_missed", "Number of packets dropped by HW."); +METRIC_COUNTER(m_tx_errors, "iface_port_tx_errors", "Number of TX failures."); + +static void port_metrics_collect(struct gr_metrics_ctx *ctx, const struct iface *iface) { + const struct iface_info_port *port = iface_info_port(iface); + struct rte_eth_dev_info dev_info; + struct rte_eth_stats stats; + + if (rte_eth_dev_info_get(port->port_id, &dev_info) == 0) + gr_metrics_labels_add(ctx, "driver", dev_info.driver_name, NULL); + else + gr_metrics_labels_add(ctx, "driver", "?", NULL); + + gr_metric_emit(ctx, &m_rxqs, port->n_rxq); + gr_metric_emit(ctx, &m_txqs, port->n_txq); + gr_metric_emit(ctx, &m_rxq_size, port->rxq_size); + gr_metric_emit(ctx, &m_txq_size, port->txq_size); + + if (rte_eth_stats_get(port->port_id, &stats) == 0) { + gr_metric_emit(ctx, &m_rx_missed, stats.imissed); + gr_metric_emit(ctx, &m_tx_errors, stats.oerrors); + } +} + static struct event *link_event; static void link_event_cb(evutil_socket_t, short /*what*/, void * /*priv*/) { @@ -764,6 +793,7 @@ static struct iface_type iface_type_port = { .set_up_down = port_up_down, .set_promisc = port_promisc_set, .to_api = port_to_api, + .metrics_collect = port_metrics_collect, }; static struct gr_module port_module = { diff --git a/modules/infra/control/port_test.c b/modules/infra/control/port_test.c index 01a1afe2a..a5cbdfaab 100644 --- a/modules/infra/control/port_test.c +++ b/modules/infra/control/port_test.c @@ -43,6 +43,9 @@ mock_func(int, __wrap_rte_eth_dev_set_mc_addr_list(uint16_t, struct rte_ether_ad mock_func(int, __wrap_rte_eth_promiscuous_disable(uint16_t)); mock_func(int, __wrap_rte_eth_promiscuous_enable(uint16_t)); mock_func(int, __wrap_rte_eth_promiscuous_get(uint16_t)); +void gr_metrics_ctx_init(struct gr_metrics_ctx *, struct gr_metrics_writer *, ...) { } +void gr_metrics_labels_add(struct gr_metrics_ctx *, ...) { } +void gr_metric_emit(struct gr_metrics_ctx *, const struct gr_metric *, uint64_t) { } // test harness init static const struct rte_ether_addr default_mac = {{0x02, 0xf0, 0x00, 0xb4, 0x47, 0x01}}; diff --git a/modules/infra/control/worker_test.c b/modules/infra/control/worker_test.c index b28f07e68..6ca9e4b8f 100644 --- a/modules/infra/control/worker_test.c +++ b/modules/infra/control/worker_test.c @@ -34,6 +34,9 @@ struct gr_config gr_config; void gr_register_api_handler(struct gr_api_handler *) { } void gr_register_module(struct gr_module *) { } void iface_type_register(struct iface_type *) { } +void gr_metrics_ctx_init(struct gr_metrics_ctx *, struct gr_metrics_writer *, ...) { } +void gr_metrics_labels_add(struct gr_metrics_ctx *, ...) { } +void gr_metric_emit(struct gr_metrics_ctx *, const struct gr_metric *, uint64_t) { } void gr_event_push(uint32_t, const void *) { } mock_func(struct rte_mempool *, gr_pktmbuf_pool_get(int8_t, uint32_t)); void gr_pktmbuf_pool_release(struct rte_mempool *, uint32_t) { } From 24a47b440f86cae0218311573eedd6488dd869c4 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Mon, 12 Jan 2026 23:17:37 +0100 Subject: [PATCH 11/16] metrics: export graph node metrics Register a metrics collector for rte_graph node statistics. Each node exposes packets processed, batch count and CPU cycles consumed. The stats are aggregated across all workers. Signed-off-by: Robin Jarry --- modules/infra/api/stats.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/modules/infra/api/stats.c b/modules/infra/api/stats.c index 237eb0294..680944ae0 100644 --- a/modules/infra/api/stats.c +++ b/modules/infra/api/stats.c @@ -257,8 +257,34 @@ static struct gr_api_handler iface_stats_get_handler = { .callback = iface_stats_get, }; +METRIC_COUNTER(m_packets, "node_packets", "Number of packets processed by a node."); +METRIC_COUNTER(m_batches, "node_batches", "Number of times a node was visited."); +METRIC_COUNTER(m_cycles, "node_cycles", "Number of cycles spent per node."); + +static void graph_metrics_collect(struct gr_metrics_writer *w) { + gr_vec struct gr_infra_stat *stats = worker_dump_stats(UINT16_MAX); + struct gr_metrics_ctx ctx; + + gr_vec_foreach_ref (const struct gr_infra_stat *s, stats) { + if (skip_stat(s, 0)) + continue; + gr_metrics_ctx_init(&ctx, w, "name", s->name, NULL); + gr_metric_emit(&ctx, &m_packets, s->packets); + gr_metric_emit(&ctx, &m_batches, s->batches); + gr_metric_emit(&ctx, &m_cycles, s->cycles); + } + + gr_vec_free(stats); +} + +static struct gr_metrics_collector graph_collector = { + .name = "graph", + .collect = graph_metrics_collect, +}; + RTE_INIT(infra_stats_init) { gr_register_api_handler(&stats_get_handler); gr_register_api_handler(&stats_reset_handler); gr_register_api_handler(&iface_stats_get_handler); + gr_metrics_register(&graph_collector); } From 4d0a1bcdc6209c4ed3ffe3df07798b424d4ccd36 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Mon, 12 Jan 2026 23:17:45 +0100 Subject: [PATCH 12/16] metrics: export CPU cycles metrics Register a metrics collector for worker CPU utilization. Each worker exports total cycles and busy cycles with cpu and numa labels to allow computing utilization percentages. Signed-off-by: Robin Jarry --- modules/infra/api/stats.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/modules/infra/api/stats.c b/modules/infra/api/stats.c index 680944ae0..84aa6f689 100644 --- a/modules/infra/api/stats.c +++ b/modules/infra/api/stats.c @@ -282,9 +282,37 @@ static struct gr_metrics_collector graph_collector = { .collect = graph_metrics_collect, }; +METRIC_COUNTER(m_idle_cycles, "idle_cycles", "Number of idle CPU cycles."); +METRIC_COUNTER(m_busy_cycles, "busy_cycles", "Number of busy CPU cycles."); + +static void cpu_collect_metrics(struct gr_metrics_writer *w) { + struct gr_metrics_ctx ctx; + char cpu[16], numa[16]; + struct worker *worker; + + STAILQ_FOREACH (worker, &workers, next) { + const struct worker_stats *stats = atomic_load(&worker->stats); + if (stats == NULL) + continue; + + snprintf(cpu, sizeof(cpu), "%u", worker->cpu_id); + snprintf(numa, sizeof(numa), "%u", rte_lcore_to_socket_id(worker->lcore_id)); + + gr_metrics_ctx_init(&ctx, w, "cpu", cpu, "numa", numa, NULL); + gr_metric_emit(&ctx, &m_idle_cycles, stats->total_cycles - stats->busy_cycles); + gr_metric_emit(&ctx, &m_busy_cycles, stats->busy_cycles); + } +} + +static struct gr_metrics_collector cpu_collector = { + .name = "cpu", + .collect = cpu_collect_metrics, +}; + RTE_INIT(infra_stats_init) { gr_register_api_handler(&stats_get_handler); gr_register_api_handler(&stats_reset_handler); gr_register_api_handler(&iface_stats_get_handler); gr_metrics_register(&graph_collector); + gr_metrics_register(&cpu_collector); } From b0c3c8edf2094de97ae77ed3bf1695eddf522b6d Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Mon, 12 Jan 2026 23:16:19 +0100 Subject: [PATCH 13/16] metrics: export nexthop counts Register a metrics collector for nexthop statistics. The collector exports the number of nexthops grouped by type (l3, blackhole, reject, group, etc.). Signed-off-by: Robin Jarry --- modules/infra/control/nexthop.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/modules/infra/control/nexthop.c b/modules/infra/control/nexthop.c index b3a7102d3..a7302fc4f 100644 --- a/modules/infra/control/nexthop.c +++ b/modules/infra/control/nexthop.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -720,6 +721,33 @@ static struct gr_module module = { .fini = nh_fini, }; +METRIC_GAUGE(m_count, "nexthop_count", "Number of nexthops by type."); + +static void count_types(struct nexthop *nh, void *priv) { + uint32_t *counts = priv; + counts[nh->type]++; +} + +static void nexthop_metrics_collect(struct gr_metrics_writer *w) { + uint32_t counts[UINT_NUM_VALUES(gr_nh_type_t)]; + struct gr_metrics_ctx ctx; + + memset(counts, 0, sizeof(counts)); + nexthop_iter(count_types, counts); + + for (unsigned t = 0; t < UINT_NUM_VALUES(gr_nh_type_t); t++) { + if (counts[t] == 0 || !nexthop_type_valid(t)) + continue; + gr_metrics_ctx_init(&ctx, w, "type", gr_nh_type_name(t), NULL); + gr_metric_emit(&ctx, &m_count, counts[t]); + } +} + +static struct gr_metrics_collector nexthop_collector = { + .name = "nexthop", + .collect = nexthop_metrics_collect, +}; + static void l3_free(struct nexthop *nh) { struct nexthop_info_l3 *l3 = nexthop_info_l3(nh); @@ -1025,6 +1053,7 @@ static struct nexthop_type_ops group_nh_ops = { RTE_INIT(init) { gr_event_register_serializer(&nh_serializer); gr_register_module(&module); + gr_metrics_register(&nexthop_collector); nexthop_type_ops_register(GR_NH_T_L3, &l3_nh_ops); nexthop_type_ops_register(GR_NH_T_GROUP, &group_nh_ops); } From 780a10325bc579ff8be7efc802f9d553dca870c7 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Mon, 12 Jan 2026 23:16:25 +0100 Subject: [PATCH 14/16] metrics: export rib4 route counts Register a metrics collector for the IPv4 RIB. The collector exports the number of routes grouped by VRF and origin (static, kernel, ospf, etc.). Signed-off-by: Robin Jarry --- modules/ip/control/route.c | 45 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/modules/ip/control/route.c b/modules/ip/control/route.c index 3a7dfcd99..2c0ef4629 100644 --- a/modules/ip/control/route.c +++ b/modules/ip/control/route.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -441,6 +442,49 @@ void rib4_cleanup(struct nexthop *nh) { rib4_iter(GR_VRF_ID_ALL, rib4_cleanup_nh, nh); } +METRIC_GAUGE(m_routes, "rib4_routes", "Number of IPv4 routes by origin."); + +static void count_origin_cb( + uint16_t /*vrf_id*/, + ip4_addr_t, + uint8_t /*prefixlen*/, + gr_nh_origin_t origin, + const struct nexthop *, + void *priv +) { + uint32_t *counts = priv; + counts[origin]++; +} + +static void rib4_metrics_collect(struct gr_metrics_writer *w) { + uint32_t counts[UINT_NUM_VALUES(gr_nh_origin_t)]; + struct gr_metrics_ctx ctx; + char vrf[16]; + + for (uint16_t vrf_id = 0; vrf_id < GR_MAX_VRFS; vrf_id++) { + if (vrf_ribs[vrf_id] == NULL) + continue; + + snprintf(vrf, sizeof(vrf), "%u", vrf_id); + memset(counts, 0, sizeof(counts)); + rib4_iter(vrf_id, count_origin_cb, counts); + + for (unsigned o = 0; o < UINT_NUM_VALUES(gr_nh_origin_t); o++) { + if (counts[o] == 0 || !nexthop_origin_valid(o)) + continue; + gr_metrics_ctx_init( + &ctx, w, "vrf", vrf, "origin", gr_nh_origin_name(o), NULL + ); + gr_metric_emit(&ctx, &m_routes, counts[o]); + } + } +} + +static struct gr_metrics_collector rib4_collector = { + .name = "rib4", + .collect = rib4_metrics_collect, +}; + static int serialize_route4_event(const void *obj, void **buf) { const struct route4_event *priv = obj; struct gr_ip4_route *r; @@ -510,4 +554,5 @@ RTE_INIT(control_ip_init) { gr_register_api_handler(&route4_list_handler); gr_event_register_serializer(&route_serializer); gr_register_module(&route4_module); + gr_metrics_register(&rib4_collector); } From bf4f8e4c0fb9e1dd8637184f9721001504c935a7 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Mon, 12 Jan 2026 23:16:48 +0100 Subject: [PATCH 15/16] metrics: export rib6 route counts Register a metrics collector for the IPv6 RIB. The collector exports the number of routes grouped by VRF and origin (static, kernel, ospf, etc.). Signed-off-by: Robin Jarry --- modules/ip6/control/route.c | 45 +++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/modules/ip6/control/route.c b/modules/ip6/control/route.c index b1000c50f..67f560405 100644 --- a/modules/ip6/control/route.c +++ b/modules/ip6/control/route.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -480,6 +481,49 @@ void rib6_cleanup(struct nexthop *nh) { rib6_iter(GR_VRF_ID_ALL, rib6_cleanup_nh, nh); } +METRIC_GAUGE(m_routes, "rib6_routes", "Number of IPv6 routes by origin."); + +static void count_origin_cb( + uint16_t /*vrf_id*/, + const struct rte_ipv6_addr *, + uint8_t /*prefixlen*/, + gr_nh_origin_t origin, + const struct nexthop *, + void *priv +) { + uint32_t *counts = priv; + counts[origin]++; +} + +static void rib6_metrics_collect(struct gr_metrics_writer *w) { + uint32_t counts[UINT_NUM_VALUES(gr_nh_origin_t)]; + struct gr_metrics_ctx ctx; + char vrf[16]; + + for (uint16_t vrf_id = 0; vrf_id < GR_MAX_VRFS; vrf_id++) { + if (vrf_ribs[vrf_id] == NULL) + continue; + + snprintf(vrf, sizeof(vrf), "%u", vrf_id); + memset(counts, 0, sizeof(counts)); + rib6_iter(vrf_id, count_origin_cb, counts); + + for (unsigned o = 0; o < UINT_NUM_VALUES(gr_nh_origin_t); o++) { + if (counts[o] == 0 || !nexthop_origin_valid(o)) + continue; + gr_metrics_ctx_init( + &ctx, w, "vrf", vrf, "origin", gr_nh_origin_name(o), NULL + ); + gr_metric_emit(&ctx, &m_routes, counts[o]); + } + } +} + +static struct gr_metrics_collector rib6_collector = { + .name = "rib6", + .collect = rib6_metrics_collect, +}; + static int serialize_route6_event(const void *obj, void **buf) { const struct route6_event *priv = obj; struct gr_ip6_route *r; @@ -549,4 +593,5 @@ RTE_INIT(control_ip_init) { gr_register_api_handler(&route6_list_handler); gr_event_register_serializer(&route6_serializer); gr_register_module(&route6_module); + gr_metrics_register(&rib6_collector); } From 14f43e855d9f6f2010d92d7ab51748c1b74c1723 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Wed, 7 Jan 2026 16:59:38 +0100 Subject: [PATCH 16/16] smoke: add metrics test Configure loopback in the test namespace to allow the metrics exporter to bind its HTTP socket. Add a new test that configures various interfaces, nexthops, addresses and routes. Fetch all metrics via HTTP and verify nothing failed. Signed-off-by: Robin Jarry --- smoke/_init.sh | 5 +++++ smoke/metrics_test.sh | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100755 smoke/metrics_test.sh diff --git a/smoke/_init.sh b/smoke/_init.sh index 9b0f97c3a..11e85e42e 100644 --- a/smoke/_init.sh +++ b/smoke/_init.sh @@ -9,6 +9,11 @@ if [ -z "$(ip netns identify)" ]; then exec ip netns exec grout "$0" "$@" fi +ip link set lo up +if ! ip -o addr show dev lo | grep -qF 'inet 127.0.0.1'; then + ip addr add 127.0.0.1/8 dev lo +fi + : "${test_frr:=false}" if [ -n "$ZEBRA_DEBUG_DPLANE_GROUT" ]; then diff --git a/smoke/metrics_test.sh b/smoke/metrics_test.sh new file mode 100755 index 000000000..b3138ce27 --- /dev/null +++ b/smoke/metrics_test.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 Robin Jarry + +. $(dirname $0)/_init.sh + +# configure a bunch of stuff +grcli -xe <