Skip to content

Commit 311add2

Browse files
authored
Add FRR failed route check in route_check.py (#4119)
What I did It supports to detect the route with offload False or without offload, which can capture queued route, because queued route doesn’t have offload, but for rejected route, the offload is True. It can’t detect the rejected route. So, we add a new detection check for the value of key failed for route entries, which can cover both rejected and queued routes. It will help to detect rejected route and queued route on device. How I did it Append failed route prefix into failed list, if it's not empty, script will print error message into syslog # Check for failed state if entry.get('failed', False): failed_rt.append(route_prefix) How to verify it For rejected route: The output of route_check.py Some routes have failed state in FRR : ['0.0.0.0/0', '192.168.128.0/25', '192.168.128.128/25', '192.168.136.0/25', '192.168.136.128/25', '192.168.144.0/25', '192.168.144.128/25', '192.168.152.0/25', '192.168.152.128/25', '192.168.160.0/25', '192.168.160.128/25', '192.168.168.0/25', '192.168.168.128/25', '192.168.176.0/25', '192.168.176.128/25', '192.168.184.0/25', '192.168.184.128/25', '192.168.192.0/25', '192.168.192.128/25', '192.168.200.0/25', '192.168.200.128/25', '192.168.208.0/25', '192.168.208.128/25', '192.168.216.0/25', '192.168.216.128/25', '192.168.224.0/25', '192.168.224.128/25', '192.168.232.0/25', '192.168.232.128/25', '192.168.240.0/25', '192.168.240.128/25', '192.168.248.0/25', '192.168.248.128/25', '192.169.0.0/25', '192.169.0.128/25', '192.169.104.0/25', '192.169.104.128/25', '192.169.112.0/25', '192.169.112.128/25', '192.169.120.0/25', '192.169.120.128/25', '192.169.128.0/25', '192.169.128.128/25', '192.169.136.0/25', '192.169.136.128/25', '192.169.144.0/25', '192.16 Failure results: {{ "": { "failed_FRR_routes": [ "0.0.0.0/0", "192.168.128.0/25", "192.168.128.128/25", "192.168.136.0/25", "192.168.136.128/25", "192.168.144.0/25", "192.168.144.128/25", "192.168.152.0/25", "192.168.152.128/25", "192.168.160.0/25", "192.168.160.128/25", "192.168.168.0/25", "192.168.168.128/25", "192.168.176.0/25", "192.168.176.128/25", "192.168.184.0/25", "192.168.184.128/25", "192.168.192.0/25", "192.168.192.128/25", "192.168.200.0/25", "192.168.200.128/25", "192.168.208.0/25", "192.168.208.128/25", "192.168.216.0/25", "192.168.216.128/25", "192.168.224.0/25", "192.168.224.128/25", "192.168.232.0/25", "192.168.232 Failed. Look at reported mismatches above For rejected route: the output of route_check.py Some routes have failed state in FRR : ['0.0.0.0/0', '192.168.128.0/25', '192.168.128.128/25', '192.168.136.0/25', '192.168.136.128/25', '192.168.144.0/25', '192.168.144.128/25', '192.168.152.0/25', '192.168.152.128/25', '192.168.160.0/25', '192.168.160.128/25', '192.168.168.0/25', '192.168.168.128/25', '192.168.176.0/25', '192.168.176.128/25', '192.168.184.0/25', '192.168.184.128/25', '192.168.192.0/25', '192.168.192.128/25', '192.168.200.0/25', '192.168.200.128/25', '192.168.208.0/25', '192.168.208.128/25', '192.168.216.0/25', '192.168.216.128/25', '192.168.224.0/25', '192.168.224.128/25', '192.168.232.0/25', '192.168.232.128/25', '192.168.240.0/25', '192.168.240.128/25', '192.168.248.0/25', '192.168.248.128/25', '192.169.0.0/25', '192.169.0.128/25', '192.169.104.0/25', '192.169.104.128/25', '192.169.112.0/25', '192.169.112.128/25', '192.169.120.0/25', '192.169.120.128/25', '192.169.128.0/25', '192.169.128.128/25', '192.169.136.0/25', '192.169.136.128/25', '192.169.144.0/25', '192.16 Failure results: {{ "": { "missed_FRR_routes": [ { "destSelected": true, "distance": 20, "failed": true, "installedNexthopGroupId": 39146, "internalFlags": 8, "internalNextHopActiveNum": 4, "internalNextHopNum": 4, "internalStatus": 168, "metric": 0, "nexthopGroupId": 39146, "nexthops": [ { "active": true, "afi": "ipv4", "fib": true, "flags": 3, "interfaceIndex": 6, "interfaceName": "PortChannel102", "ip": "10.0.0.1", "rmapSource": "10.1.0.32", "weight": 1 }, { "active": true, Failed. Look at reported mismatches above
1 parent 0282c25 commit 311add2

File tree

2 files changed

+367
-5
lines changed

2 files changed

+367
-5
lines changed

scripts/route_check.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -676,9 +676,10 @@ def check_frr_pending_routes(namespace):
676676
retries = FRR_CHECK_RETRIES
677677
for i in range(retries):
678678
missed_rt = []
679+
failed_rt = []
679680
frr_routes = get_frr_routes_parallel(namespace)
680681

681-
for _, entries in frr_routes.items():
682+
for route_prefix, entries in frr_routes.items():
682683
for entry in entries:
683684
if entry['protocol'] in ('connected', 'kernel', 'static'):
684685
continue
@@ -695,12 +696,16 @@ def check_frr_pending_routes(namespace):
695696
if not entry.get('offloaded', False):
696697
missed_rt.append(entry)
697698

698-
if not missed_rt:
699+
if entry.get('failed', False):
700+
failed_rt.append(route_prefix)
701+
702+
if not missed_rt and not failed_rt:
699703
break
700704

701705
time.sleep(FRR_WAIT_TIME)
702-
print_message(syslog.LOG_DEBUG, "FRR missed routes: {}".format(missed_rt, indent=4))
703-
return missed_rt
706+
print_message(syslog.LOG_DEBUG, "FRR missed routes: {}".format(json.dumps(missed_rt, indent=4)))
707+
print_message(syslog.LOG_DEBUG, "FRR failed routes: {}".format(json.dumps(failed_rt, indent=4)))
708+
return missed_rt, failed_rt
704709

705710

706711
def mitigate_installed_not_offloaded_frr_routes(namespace, missed_frr_rt, rt_appl):
@@ -837,6 +842,7 @@ def check_routes_for_namespace(namespace):
837842
rt_appl_miss = []
838843
rt_asic_miss = []
839844
rt_frr_miss = []
845+
rt_frr_failed = []
840846

841847
selector, subs, rt_asic = get_asicdb_routes(namespace)
842848

@@ -890,17 +896,23 @@ def check_routes_for_namespace(namespace):
890896
if rt_asic_miss:
891897
results["Unaccounted_ROUTE_ENTRY_TABLE_entries"] = rt_asic_miss
892898

893-
rt_frr_miss = check_frr_pending_routes(namespace)
899+
rt_frr_miss, rt_frr_failed = check_frr_pending_routes(namespace)
894900

895901
if rt_frr_miss:
896902
results["missed_FRR_routes"] = rt_frr_miss
897903

904+
if rt_frr_failed:
905+
results["failed_FRR_routes"] = rt_frr_failed
906+
898907
if results:
899908
if rt_frr_miss and not rt_appl_miss and not rt_asic_miss:
900909
print_message(syslog.LOG_ERR, "Some routes are not set offloaded in FRR{} \
901910
but all routes in APPL_DB and ASIC_DB are in sync".format(namespace))
902911
if is_suppress_fib_pending_enabled(namespace):
903912
mitigate_installed_not_offloaded_frr_routes(namespace, rt_frr_miss, rt_appl)
913+
if rt_frr_failed:
914+
print_message(syslog.LOG_ERR, "Some routes have failed state in FRR {} \
915+
: {}".format(namespace, rt_frr_failed))
904916

905917
return results, adds, deletes
906918

0 commit comments

Comments
 (0)