From 7e0ba68b48405d7b54d760bf56265a59819efd63 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Thu, 28 Mar 2019 21:49:04 +0800 Subject: [PATCH 001/218] [lag_rate] Restore lag setting on VMs in case of failure in test (#839) The existing script does not restore lag rate setting on VMs in case of failure. This improvement is to restore lag setting on VMs if the testing failed. Signed-off-by: Xin Wang --- .../test/tasks/single_lag_lacp_rate_test.yml | 83 +++++++++++-------- 1 file changed, 50 insertions(+), 33 deletions(-) diff --git a/ansible/roles/test/tasks/single_lag_lacp_rate_test.yml b/ansible/roles/test/tasks/single_lag_lacp_rate_test.yml index f6c27937d81..7785437dc67 100644 --- a/ansible/roles/test/tasks/single_lag_lacp_rate_test.yml +++ b/ansible/roles/test/tasks/single_lag_lacp_rate_test.yml @@ -47,36 +47,53 @@ neighbor_lag_intfs: "{{ neighbor_lag_intfs }} + [ '{{ vm_neighbors[item]['port'] }}' ]" with_items: "{{ po_interfaces }}" -# make sure portchannel peer rate is set to fast -- name: make sure all lag members on VM are set to fast - action: apswitch template=neighbor_lag_rate_fast.j2 - args: - host: "{{peer_host}}" - login: "{{switch_login[hwsku_map[peer_hwsku]]}}" - connection: switch - -- pause: - seconds: 5 - -- name: test lacp packet sending rate is 1 seconds - include: lag_lacp_timing_test.yml - vars: - vm_name: "{{ peer_device }}" - lacp_timer: 1 - -# make sure portchannel peer rate is set to slow -- name: make sure all lag members on VM are set to slow - action: apswitch template=neighbor_lag_rate_slow.j2 - args: - host: "{{peer_host}}" - login: "{{switch_login[hwsku_map[peer_hwsku]]}}" - connection: switch - -- pause: - seconds: 5 - -- name: test lacp packet sending rate is 30 seconds - include: lag_lacp_timing_test.yml - vars: - vm_name: "{{ peer_device }}" - lacp_timer: 30 +- block: + # make sure portchannel peer rate is set to fast + - name: make sure all lag members on VM are set to fast + action: apswitch template=neighbor_lag_rate_fast.j2 + args: + host: "{{peer_host}}" + login: "{{switch_login[hwsku_map[peer_hwsku]]}}" + connection: switch + + - set_fact: + lag_rate_current_setting: "fast" + + - pause: + seconds: 5 + + - name: test lacp packet sending rate is 1 seconds + include: lag_lacp_timing_test.yml + vars: + vm_name: "{{ peer_device }}" + lacp_timer: 1 + + # make sure portchannel peer rate is set to slow + - name: make sure all lag members on VM are set to slow + action: apswitch template=neighbor_lag_rate_slow.j2 + args: + host: "{{peer_host}}" + login: "{{switch_login[hwsku_map[peer_hwsku]]}}" + connection: switch + + - set_fact: + lag_rate_current_setting: "slow" + + - pause: + seconds: 5 + + - name: test lacp packet sending rate is 30 seconds + include: lag_lacp_timing_test.yml + vars: + vm_name: "{{ peer_device }}" + lacp_timer: 30 + + always: + - name: Restore lag rate setting on VM in case of failure + action: apswitch template=neighbor_lag_rate_slow.j2 + args: + host: "{{peer_host}}" + login: "{{switch_login[hwsku_map[peer_hwsku]]}}" + timeout: 300 + connection: switch + when: "lag_rate_current_setting is defined and lag_rate_current_setting == 'fast'" From d7664edeac30865a731413047f58b0dc0d7ccc60 Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Thu, 28 Mar 2019 16:36:33 +0200 Subject: [PATCH 002/218] [acltb] Apply BGP ACL forward rules in acltb_test_rules_part_1.json (#842) After applying acltb_test_rules_part_1.json BGP sessions may go down before we apply acltb_test_rules_part_2.json (which had BGP ACL forward rules); This results in BGP flap during ptf test run; It is safer to apply BGP ACL forward rules first to avoid BGP flapping. Signed-off-by: Stepan Blyschak --- .../tasks/acl/acltb_test_rules_part_1.json | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/ansible/roles/test/tasks/acl/acltb_test_rules_part_1.json b/ansible/roles/test/tasks/acl/acltb_test_rules_part_1.json index 29c6b02d034..1687fbda9cf 100644 --- a/ansible/roles/test/tasks/acl/acltb_test_rules_part_1.json +++ b/ansible/roles/test/tasks/acl/acltb_test_rules_part_1.json @@ -96,6 +96,36 @@ "source-ip-address": "10.0.0.2/32" } } + }, + "15": { + "actions": { + "config": { + "forwarding-action": "ACCEPT" + } + }, + "config": { + "sequence-id": 27 + }, + "transport": { + "config": { + "source-port": "179" + } + } + }, + "16": { + "actions": { + "config": { + "forwarding-action": "ACCEPT" + } + }, + "config": { + "sequence-id": 28 + }, + "transport": { + "config": { + "destination-port": "179" + } + } } } } From f266d7e92fd5c4b5e64c38cee9affd0f18b9ce88 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Fri, 29 Mar 2019 00:44:49 +0800 Subject: [PATCH 003/218] [logAnalyzer] Fix the issue that variables are not overridden as expected (#844) PR #831 does not fully fix the issue introduced by PR #822. Ansible's include_vars module could not override variable value previous defined by set_fact. Variables in vars/run_config_test_vars.yml may still have old value. The change is to avoid using include_vars. The variables defined in run_config_test_vars.yml are moved into script run_command_with_log_analyzer.yml. The vars files are deleted. The same change is made to other scripts using the same pattern. Signed-off-by: Xin Wang --- .../tasks/acl/acl_traffic_test/run_ping_test.yml | 9 ++++++--- .../tasks/acl/acl_traffic_test/run_ptf_test.yml | 9 ++++++--- .../test/tasks/run_command_with_log_analyzer.yml | 13 ++++++++++--- ansible/roles/test/tasks/run_loganalyzer.yml | 13 ++++++++++--- ansible/vars/run_config_test_vars.yml | 13 ------------- ansible/vars/run_loganalyzer_vars.yml | 15 --------------- ansible/vars/run_ping_test_vars.yml | 7 ------- ansible/vars/run_ptf_test_vars.yml | 7 ------- 8 files changed, 32 insertions(+), 54 deletions(-) delete mode 100644 ansible/vars/run_config_test_vars.yml delete mode 100644 ansible/vars/run_loganalyzer_vars.yml delete mode 100644 ansible/vars/run_ping_test_vars.yml delete mode 100644 ansible/vars/run_ptf_test_vars.yml diff --git a/ansible/roles/test/tasks/acl/acl_traffic_test/run_ping_test.yml b/ansible/roles/test/tasks/acl/acl_traffic_test/run_ping_test.yml index 33960998e7c..01ea8dba5ee 100644 --- a/ansible/roles/test/tasks/acl/acl_traffic_test/run_ping_test.yml +++ b/ansible/roles/test/tasks/acl/acl_traffic_test/run_ping_test.yml @@ -2,10 +2,13 @@ # Execute ping and check the log. #----------------------------------------- -- name: Get an unique timestamp to feed to testname_unique +- name: Initialize some variables for loganalyzer set_fact: - unique_timestamp: "{{ lookup('pipe','date +%Y-%m-%d-%H:%M:%S') }}" -- include_vars: "vars/run_ping_test_vars.yml" + testname_unique: "{{ testname }}.{{ lookup('pipe','date +%Y-%m-%d-%H:%M:%S') }}" +- set_fact: + test_out_dir: "{{ out_dir }}/{{ testname_unique }}" + summary_file: "summary.loganalysis.{{ testname_unique }}.log" + result_file: "result.loganalysis.{{ testname_unique }}.log" - name: Start log analyser include: roles/test/files/tools/loganalyzer/loganalyzer_init.yml diff --git a/ansible/roles/test/tasks/acl/acl_traffic_test/run_ptf_test.yml b/ansible/roles/test/tasks/acl/acl_traffic_test/run_ptf_test.yml index 43afe816056..dc8436a49dd 100644 --- a/ansible/roles/test/tasks/acl/acl_traffic_test/run_ptf_test.yml +++ b/ansible/roles/test/tasks/acl/acl_traffic_test/run_ptf_test.yml @@ -2,10 +2,13 @@ # Send some TCP packets. #----------------------------------------- -- name: Get an unique timestamp to feed to testname_unique +- name: Initialize some variables for loganalyzer set_fact: - unique_timestamp: "{{ lookup('pipe','date +%Y-%m-%d-%H:%M:%S') }}" -- include_vars: "vars/run_ptf_test_vars.yml" + testname_unique: "{{ testname }}.{{ lookup('pipe','date +%Y-%m-%d-%H:%M:%S') }}" +- set_fact: + test_out_dir: "{{ out_dir }}/{{ testname_unique }}" + summary_file: "summary.loganalysis.{{ testname_unique }}.log" + result_file: "result.loganalysis.{{ testname_unique }}.log" - block: - name: Execute ping from host to switch to validate link diff --git a/ansible/roles/test/tasks/run_command_with_log_analyzer.yml b/ansible/roles/test/tasks/run_command_with_log_analyzer.yml index 23d884b8dd4..a9e5bd169f9 100644 --- a/ansible/roles/test/tasks/run_command_with_log_analyzer.yml +++ b/ansible/roles/test/tasks/run_command_with_log_analyzer.yml @@ -5,10 +5,17 @@ # 3) Run cleanup if needed. #----------------------------------------- -- name: Get an unique timestamp to feed to testname_unique +- name: Initialize some variables for loganalyzer set_fact: - unique_timestamp: "{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}" -- include_vars: "vars/run_config_test_vars.yml" + testname_unique: "{{ testname }}.{{ lookup('pipe','date +%Y-%m-%d-%H:%M:%S') }}" +- set_fact: + test_out_dir: "{{ out_dir }}/{{ testname_unique }}" + loganalyzer_init: roles/test/files/tools/loganalyzer/loganalyzer_init.yml + loganalyzer_analyze: roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml + match_file: loganalyzer_common_match.txt + ignore_file: loganalyzer_common_ignore.txt + summary_file: summary.loganalysis.{{ testname_unique }}.log + result_file: result.loganalysis.{{ testname_unique }}.log - block: - name: Initialize loganalizer. Put start marker to log file. diff --git a/ansible/roles/test/tasks/run_loganalyzer.yml b/ansible/roles/test/tasks/run_loganalyzer.yml index d0b8509004a..125c058b9a7 100644 --- a/ansible/roles/test/tasks/run_loganalyzer.yml +++ b/ansible/roles/test/tasks/run_loganalyzer.yml @@ -3,10 +3,17 @@ # or analyze-phase of loganalyzer. #----------------------------------------- -- name: Get an unique timestamp to feed to testname_unique +- name: Initialize some variables for loganalyzer set_fact: - unique_timestamp: "{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}" -- include_vars: "vars/run_loganalyzer_vars.yml" + testname_unique: "{{ testname }}.{{ lookup('pipe','date +%Y-%m-%d-%H:%M:%S') }}" +- set_fact: + test_out_dir: "{{ out_dir }}/{{ testname_unique }}" + loganalyzer_init: roles/test/files/tools/loganalyzer/loganalyzer_init.yml + loganalyzer_analyze: roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml + match_file: loganalyzer_common_match.txt + ignore_file: loganalyzer_common_ignore.txt + summary_file: summary.loganalysis.{{ testname_unique }}.log + result_file: result.loganalysis.{{ testname_unique }}.log - name: Initialize loganalizer. Put start marker to log file. include: "{{ loganalyzer_init }}" diff --git a/ansible/vars/run_config_test_vars.yml b/ansible/vars/run_config_test_vars.yml deleted file mode 100644 index 686892092d4..00000000000 --- a/ansible/vars/run_config_test_vars.yml +++ /dev/null @@ -1,13 +0,0 @@ ---- - -testname_unique: "{{ testname }}.{{ unique_timestamp }}" - -test_out_dir: "{{ out_dir }}/{{ testname_unique }}" -loganalyzer_init: roles/test/files/tools/loganalyzer/loganalyzer_init.yml -loganalyzer_analyze: roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml - -match_file: loganalyzer_common_match.txt -ignore_file: loganalyzer_common_ignore.txt - -summary_file: summary.loganalysis.{{ testname_unique }}.log -result_file: result.loganalysis.{{ testname_unique }}.log diff --git a/ansible/vars/run_loganalyzer_vars.yml b/ansible/vars/run_loganalyzer_vars.yml deleted file mode 100644 index e3df78a571e..00000000000 --- a/ansible/vars/run_loganalyzer_vars.yml +++ /dev/null @@ -1,15 +0,0 @@ ---- - -testname_unique: "{{ testname }}.{{ unique_timestamp }}" - -test_out_dir: "{{ out_dir }}/{{ testname_unique }}" -loganalyzer_init: roles/test/files/tools/loganalyzer/loganalyzer_init.yml -loganalyzer_analyze: roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml - -match_file: loganalyzer_common_match.txt -ignore_file: loganalyzer_common_ignore.txt - -summary_file: summary.loganalysis.{{ testname_unique }}.log -result_file: result.loganalysis.{{ testname_unique }}.log - -run_analyze_and_check: "roles/test/tasks/run_analyze_and_check.yml" diff --git a/ansible/vars/run_ping_test_vars.yml b/ansible/vars/run_ping_test_vars.yml deleted file mode 100644 index a4dd27773c4..00000000000 --- a/ansible/vars/run_ping_test_vars.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- - -testname_unique: "{{ testname }}.{{ unique_timestamp }}" - -test_out_dir: "{{ out_dir }}/{{ testname_unique }}" -summary_file: "summary.loganalysis.{{ testname_unique }}.log" -result_file: "result.loganalysis.{{ testname_unique }}.log" diff --git a/ansible/vars/run_ptf_test_vars.yml b/ansible/vars/run_ptf_test_vars.yml deleted file mode 100644 index a4dd27773c4..00000000000 --- a/ansible/vars/run_ptf_test_vars.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- - -testname_unique: "{{ testname }}.{{ unique_timestamp }}" - -test_out_dir: "{{ out_dir }}/{{ testname_unique }}" -summary_file: "summary.loganalysis.{{ testname_unique }}.log" -result_file: "result.loganalysis.{{ testname_unique }}.log" From e060b4162c443b0cb1c9f3187d846337e248127e Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Mon, 1 Apr 2019 23:37:28 +0800 Subject: [PATCH 004/218] [docker] Use recommended CMD for getting docker GPG key (#843) If use apt_key module for getting docker official GPG key, there would be cert validation issue. Replace the apt_key module with 'curl' command recommended on docker official documentation site. --- ansible/roles/vm_set/tasks/docker.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ansible/roles/vm_set/tasks/docker.yml b/ansible/roles/vm_set/tasks/docker.yml index bec6ba2bd6d..c6936bf2882 100644 --- a/ansible/roles/vm_set/tasks/docker.yml +++ b/ansible/roles/vm_set/tasks/docker.yml @@ -20,9 +20,7 @@ when: host_distribution_version.stdout == "18.04" - name: Add docker official GPG key - apt_key: - url: https://download.docker.com/linux/ubuntu/gpg - state: present + shell: "curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -" become: yes environment: "{{ proxy_env | default({}) }}" From 9d54f431df43965f46e4bf8d340d16791e7c00f8 Mon Sep 17 00:00:00 2001 From: Qi Luo Date: Thu, 4 Apr 2019 19:27:34 -0700 Subject: [PATCH 005/218] Change warm-reboot time limit to 1 second (#855) --- ansible/roles/test/tasks/warm-reboot.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/tasks/warm-reboot.yml b/ansible/roles/test/tasks/warm-reboot.yml index 0f457cd260b..7959e2fa18a 100644 --- a/ansible/roles/test/tasks/warm-reboot.yml +++ b/ansible/roles/test/tasks/warm-reboot.yml @@ -1,6 +1,6 @@ - name: set default reboot_limit in seconds set_fact: - reboot_limit: 0 + reboot_limit: 1 when: reboot_limit is not defined - name: Warm-reboot test From 467382f977c0c349922d6eb7de80b19865a5ed67 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Fri, 5 Apr 2019 12:25:34 +0800 Subject: [PATCH 006/218] [add-topo] Refresh ARP table of neighbors with new MAC address of new PTF container (#836) The PTF container will be destroyed if testbed-cli.sh remove-topo is executed. Run testbed-cli.sh add-topo will add a new PTF conainer. Usually the new PTF container will have a new MAC address. If add-topo is executed immediately after remove-topo, ARP table of neighbor switches and hosts may still have entry of the old PTF MAC address. This would cause connectivity issue to the new PTF container for a while until the old PTF MAC address is expired. This workaround is to send out an ARPing from the PTF container querying mgmt_gw after new PTF container is deployed and attached to network. The ARPing request will be broadcasted to all neighbors on same LAN and will refresh ARP table of neighbors with new MAC address of new PTF. Signed-off-by: Xin Wang --- ansible/roles/vm_set/tasks/add_topo.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ansible/roles/vm_set/tasks/add_topo.yml b/ansible/roles/vm_set/tasks/add_topo.yml index 292ca5cff4b..d80577c8999 100644 --- a/ansible/roles/vm_set/tasks/add_topo.yml +++ b/ansible/roles/vm_set/tasks/add_topo.yml @@ -43,3 +43,7 @@ fp_mtu: "{{ fp_mtu_size }}" max_fp_num: "{{ max_fp_num }}" become: yes + +- name: Send arp ping packet to gw for flusing the ARP table + command: docker exec -i ptf_{{ vm_set_name }} python -c "from scapy.all import *; arping('{{ mgmt_gw }}')" + become: yes From 42f4860bc3e741423ba253702a8b5d977abd49db Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Fri, 5 Apr 2019 10:56:36 -0700 Subject: [PATCH 007/218] [link state] look up topology name until the separator char (#857) Otherwise, if 2 systems have names where one is prefix of the other one, parsing of the shorter name will come up with 2 lines. Signed-off-by: Ying Xie --- ansible/linkstate/testbed_inv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/linkstate/testbed_inv.py b/ansible/linkstate/testbed_inv.py index 3d3540bf463..9baa13a1798 100755 --- a/ansible/linkstate/testbed_inv.py +++ b/ansible/linkstate/testbed_inv.py @@ -25,7 +25,7 @@ def read_config(): def parse_testbed_configuration(filename, target): with open(filename) as fp: for line in fp: - if line.startswith(target): + if line.startswith(target + ','): splitted_line = line.split(",") ptf_name = splitted_line[1] topo_name = splitted_line[2] From d5b5415e18b5aef5e1a8057ed3ef9f9ecf1fe9a7 Mon Sep 17 00:00:00 2001 From: Shuotian Cheng Date: Tue, 9 Apr 2019 11:35:57 -0700 Subject: [PATCH 008/218] [test]: Change config interface command arguments order (#864) https://github.com/Azure/sonic-utilities/pull/504 This is to make all the commands backwards compatible Signed-off-by: Shu0T1an ChenG --- ansible/roles/test/tasks/arpall.yml | 8 ++++---- ansible/roles/test/tasks/config.yml | 8 ++++---- ansible/roles/test/tasks/neighbour-mac.yml | 2 +- ansible/roles/test/tasks/port_toggle.yml | 8 ++++---- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/ansible/roles/test/tasks/arpall.yml b/ansible/roles/test/tasks/arpall.yml index 791f7882ac3..14e6cad97fb 100644 --- a/ansible/roles/test/tasks/arpall.yml +++ b/ansible/roles/test/tasks/arpall.yml @@ -35,7 +35,7 @@ when: po1 is defined - name: bring {{ intf1 }} up - shell: config interface {{ intf1 }} startup + shell: config interface startup {{ intf1 }} become: yes when: po1 is defined @@ -51,16 +51,16 @@ when: po2 is defined - name: bring {{ intf2 }} up - shell: config interface {{ intf2 }} startup + shell: config interface startup {{ intf2 }} become: yes when: po2 is defined - name: change SONiC DUT interface IP to test IP address - command: config interface {{ intf1 }} ip add 10.10.1.2/28 + command: config interface ip add {{ intf1 }} 10.10.1.2/28 become: yes - name: change SONiC DUT interface IP to test IP address - command: config interface {{ intf2 }} ip add 10.10.1.20/28 + command: config interface ip add {{ intf2 }} 10.10.1.20/28 become: yes - name: wait for interfaces to be up after removed from portchannel diff --git a/ansible/roles/test/tasks/config.yml b/ansible/roles/test/tasks/config.yml index 9314fae991a..001a82d475d 100644 --- a/ansible/roles/test/tasks/config.yml +++ b/ansible/roles/test/tasks/config.yml @@ -42,7 +42,7 @@ remove_portchannel_members: true - name: Step 2 Remove {{ portchannel_ip }} from {{ portchannel }} - shell: config interface {{ portchannel }} ip remove {{ portchannel_ip }}/31 + shell: config interface ip remove {{ portchannel }} {{ portchannel_ip }}/31 become: yes - set_fact: remove_portchannel_ip: true @@ -73,7 +73,7 @@ add_tmp_portchannel_members: true - name: Step 5 Add {{ portchannel_ip }} to {{ tmp_portchannel }} - shell: config interface {{ tmp_portchannel }} ip add {{ portchannel_ip }}/31 + shell: config interface ip add {{ tmp_portchannel }} {{ portchannel_ip }}/31 become: yes - set_fact: add_tmp_portchannel_ip: true @@ -97,7 +97,7 @@ always: - name: Remove {{ portchannel_ip }} from {{ tmp_portchannel }} - shell: config interface {{ tmp_portchannel }} ip remove {{ portchannel_ip }}/31 + shell: config interface ip remove {{ tmp_portchannel }} {{ portchannel_ip }}/31 become: yes when: add_tmp_portchannel_ip @@ -113,7 +113,7 @@ when: create_tmp_portchannel - name: Add {{ portchannel_ip }} to {{ portchannel }} - shell: config interface {{ portchannel }} ip add {{ portchannel_ip }}/31 + shell: config interface ip add {{ portchannel }} {{ portchannel_ip }}/31 become: yes when: remove_portchannel_ip diff --git a/ansible/roles/test/tasks/neighbour-mac.yml b/ansible/roles/test/tasks/neighbour-mac.yml index 4ed8e3cf9b5..fbf9d3e189c 100644 --- a/ansible/roles/test/tasks/neighbour-mac.yml +++ b/ansible/roles/test/tasks/neighbour-mac.yml @@ -12,7 +12,7 @@ - name: Change DUT interface IP to test IP address become: yes - command: config interface {{ dut_if }} ip add {{ dut_ip }}/24 + command: config interface ip add {{ dut_if }} {{ dut_ip }}/24 - name: Change host interface IP to test IP address become: yes diff --git a/ansible/roles/test/tasks/port_toggle.yml b/ansible/roles/test/tasks/port_toggle.yml index e77fc5da1f7..4e6f02b38fd 100644 --- a/ansible/roles/test/tasks/port_toggle.yml +++ b/ansible/roles/test/tasks/port_toggle.yml @@ -1,8 +1,8 @@ - name: build shell command string - debug: msg="PORTS={{minigraph_ports.keys() | join(' ')}}; for port in $PORTS; do config interface $port shutdown; done" + debug: msg="PORTS={{minigraph_ports.keys() | join(' ')}}; for port in $PORTS; do config interface shutdown $port; done" - name: turn off all ports on device - shell: PORTS="{{minigraph_ports.keys() | join(' ')}}"; for port in $PORTS; do config interface $port shutdown; done + shell: PORTS="{{minigraph_ports.keys() | join(' ')}}"; for port in $PORTS; do config interface shutdown $port; done become: yes - name: Get interface facts @@ -13,10 +13,10 @@ - always: - name: build shell command string - debug: msg="PORTS={{minigraph_ports.keys() | join(' ')}}; for port in $PORTS; do config interface $port startup; done" + debug: msg="PORTS={{minigraph_ports.keys() | join(' ')}}; for port in $PORTS; do config interface startup $port; done" - name: turn on all ports on device - shell: PORTS="{{minigraph_ports.keys() | join(' ')}}"; for port in $PORTS; do config interface $port startup; done + shell: PORTS="{{minigraph_ports.keys() | join(' ')}}"; for port in $PORTS; do config interface startup $port; done become: yes - name: wait 1 minute for ports to come up From 3808d9778529e5160208f0a6d38c74cb363ac954 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Wed, 10 Apr 2019 08:00:55 +0800 Subject: [PATCH 009/218] [ptf_runner] Save ptf log to script executing host in case of failure (#823) * [ptf_runner] Save ptf log to script executing host in case of failure The PTF log and pcap files are useful for debugging in case of PTF script failed. However, these files are in the PTF container and could be lost when the PTF container is re-deployed. This improvement is to save the log and pcap files to the script executing host when the PTF script is failed. Signed-off-by: Xin Wang * [ptf_runner] Add option for specifying whether to save ptf log The previous commit changed the default behavior. This change is to add an option for specifying whether to save ptf log in case of failure. For example: ansible-playbook .yml ... -e save_ptf_log=yes --- ansible/roles/test/tasks/ptf_runner.yml | 31 +++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/ansible/roles/test/tasks/ptf_runner.yml b/ansible/roles/test/tasks/ptf_runner.yml index 467d946d617..67a5f15d45b 100644 --- a/ansible/roles/test/tasks/ptf_runner.yml +++ b/ansible/roles/test/tasks/ptf_runner.yml @@ -51,5 +51,36 @@ - debug: var=out.stdout_lines +- name: Set default PTF log filename + set_fact: + ptf_log_file: "/root/ptf.log" + ptf_log_file_param_index: "{{ out.cmd.find('--log-file') }}" + +- name: Parse custom log filename specified in PTF command + set_fact: + ptf_log_file: "{{ out.cmd[ptf_log_file_param_index|int:].split(' ')[1] }}" + when: ptf_log_file_param_index|int >= 0 + +- name: Set PTF pcap filename + set_fact: + ptf_pcap_file: "{{ ptf_log_file | replace('.log', '.pcap') }}" + +- name : Fetch result files from switch to ansible machine + fetch: + src: "{{ item }}" + dest: "test/{{ inventory_hostname }}/ptf/{{ item | basename }}.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}" + flat: yes + with_items: + - "{{ ptf_log_file }}" + - "{{ ptf_pcap_file }}" + delegate_to: "{{ ptf_host }}" + when: out.rc != 0 and save_ptf_log is defined and save_ptf_log|bool == true + +- debug: msg="File {{ item }} saved to test/{{ inventory_hostname }}/ptf/" + with_items: + - "{{ptf_log_file}}" + - "{{ptf_pcap_file}}" + when: out.rc != 0 and save_ptf_log is defined and save_ptf_log|bool == true + - fail: msg="Failed test '{{ ptf_test_name }}'" when: out.rc != 0 From dc7d58f1cfeaa123a0ed7cd00abdbd629aea5587 Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Wed, 10 Apr 2019 15:36:34 -0700 Subject: [PATCH 010/218] [pfc_wd] change pfc watchdog table name according to the sonic code change (#866) PFC_WD_TABLE --> PFC_WD Signed-off-by: Ying Xie --- ansible/roles/test/templates/pfc_wd_config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/templates/pfc_wd_config.j2 b/ansible/roles/test/templates/pfc_wd_config.j2 index 3dc440256de..fb6ccd713f6 100644 --- a/ansible/roles/test/templates/pfc_wd_config.j2 +++ b/ansible/roles/test/templates/pfc_wd_config.j2 @@ -1,5 +1,5 @@ { - "PFC_WD_TABLE": { + "PFC_WD": { "{{ pfc_wd_interface_list }}": { "action": "{{ pfc_wd_action }}", "detection_time": "{{ pfc_wd_detection_time }}", From 0bc2acf40ac4c83660b25d06ffe964f17a0a7532 Mon Sep 17 00:00:00 2001 From: Qi Luo Date: Sat, 13 Apr 2019 09:49:11 -0700 Subject: [PATCH 011/218] Scrub credential in `docker pull` command line (#869) --- ansible/swap_syncd.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ansible/swap_syncd.yml b/ansible/swap_syncd.yml index f7e1f3c215c..d460204c952 100644 --- a/ansible/swap_syncd.yml +++ b/ansible/swap_syncd.yml @@ -58,8 +58,10 @@ set_fact: sonic_image_version: "{{ result.stdout }}" + # Note: no_log requires passlib python library - name: Pull syncd-rpc docker from registry shell: docker login -u {{docker_registry_username}} -p {{docker_registry_password}} {{docker_registry_host}}; docker pull {{docker_registry_host}}/{{docker_rpc_image_name}}:{{sonic_image_version}} + no_log: true - name: Tag pulled images as syncd shell: docker tag {{docker_registry_host}}/{{docker_rpc_image_name}}:{{sonic_image_version}} {{docker_syncd_name}} From 336703ae660958a21aad3b2274da30416fc1a2cf Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Thu, 18 Apr 2019 16:46:17 +0000 Subject: [PATCH 012/218] [minigraph]: Fix minigraph parsing error on Mellanox-SN2700-D48C8 (#875) Signed-off-by: Qi Luo --- ansible/library/minigraph_facts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/library/minigraph_facts.py b/ansible/library/minigraph_facts.py index 7a19490eac5..18b28e242a2 100644 --- a/ansible/library/minigraph_facts.py +++ b/ansible/library/minigraph_facts.py @@ -478,7 +478,7 @@ def parse_xml(filename, hostname): s100G_ports = [x for x in range(24, 40, 4)] + [x for x in range(88, 104, 4)] for i in s50G_ports: - alias = "etp%d" % (i / 4 + 1) + "a" if i % 4 == 0 else "b" + alias = "etp%d" % (i / 4 + 1) + ("a" if i % 4 == 0 else "b") port_alias_map[alias] = "Ethernet%d" % i for i in s100G_ports: alias = "etp%d" % (i / 4 + 1) From 36d50c7a43520ea06e013a0caf4d9fcf25cdd3c6 Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Thu, 18 Apr 2019 09:10:39 -0700 Subject: [PATCH 013/218] [fast/warm reboot] improve new image installation code (#877) * [fast/warm reboot] improve new image installation code - Allow new_sonic_image being defined as empty string. It causes skipping image installation. - Rename new_image_location to a generic name. - Display defined new image url. Signed-off-by: Ying Xie * [fast/warm reboot] allow DUT to stay in the warm/fast reboot target release This feature is needed in order to test ugprade path. Where we might upgrade from one version to another, and more. We want the system to stay in target release for next steps. Signed-off-by: Ying Xie * Address review comments, test issues and some minor touch-ups Signed-off-by: Ying Xie * [fast/warm reboot] add knob to clean up old iamges on DUT before warm/fast reboot When new image is specified for fast/warm reboot. The new image will be installed. However, if the specified image is already installed on the target DUT, then sonic_install will fail and fast/warm reboot will reboot into current image. Add a knob to cleanup old images so that the installing of new image will have a better chance to succeed. Signed-off-by: Ying Xie * address review issue --- ansible/roles/test/tasks/advanced-reboot.yml | 36 ++++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/ansible/roles/test/tasks/advanced-reboot.yml b/ansible/roles/test/tasks/advanced-reboot.yml index 40626e1f238..9643737f0a1 100644 --- a/ansible/roles/test/tasks/advanced-reboot.yml +++ b/ansible/roles/test/tasks/advanced-reboot.yml @@ -106,15 +106,29 @@ dest: /tmp/ports.json delegate_to: "{{ ptf_host }}" + - debug: msg="Defined new sonic image url is {{ new_sonic_image }}" + when: new_sonic_image is defined + + - set_fact: + stay_in_target_image: "{{ stay_in_target_image | default('false') | bool }}" + cleanup_old_sonic_images: "{{ cleanup_old_sonic_images | default('false') | bool }}" + - block: - name: Save image version shell: 'sonic_installer list | grep Current | cut -f2 -d " "' register: current_sonic_image become: true + when: not stay_in_target_image + + - name: Generate temp file name on target device + shell: mktemp + register: tempfile - set_fact: - new_image_location: '/tmp/new_sonic_image.bin' + new_image_location: '{{ tempfile.stdout }}' + + - debug: msg='Setting image file name to {{ new_image_location }}' - name: Download SONiC image. local_action: get_url url={{ new_sonic_image }} dest={{ new_image_location }} @@ -124,14 +138,24 @@ src: "{{ new_image_location }}" dest: "{{ new_image_location }}" - - name: Install a new SONiC image if requested - shell: sonic_installer install -y {{ new_image_location }} + - name: Cleanup sonic images that is not current and/or next + shell: sonic_installer cleanup -y become: true + when: cleanup_old_sonic_images + + - name: 'Setup restoring initial image {{ current_sonic_image }}' + shell: /bin/true + connection: local notify: - - restore current image - - reboot sonic + - restore current image + - reboot sonic + when: not stay_in_target_image - when: new_sonic_image is defined + - name: Installing new SONiC image + shell: sonic_installer install -y {{ new_image_location }} + become: true + + when: new_sonic_image | default('') | length > 0 - include: ptf_runner.yml vars: From 0a31f6b5e50fb1703ecca64ff01cb676f943790f Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Wed, 24 Apr 2019 22:38:01 +0800 Subject: [PATCH 014/218] [ntpd] Update matching pattern of 'ERR ntpd' in loganalyzer ignore files (#865) The ntpd may generate 'ERR ntpd' in syslog and caused unnecessary test case failure. Previous PR https://github.com/Azure/sonic-mgmt/pull/816 added a matching pattern of 'ERR ntpd' in loganalyzer igonre files to ignore the ntpd error messages. However, ntpd may generate two formats of error messages. The previously added matching pattern can only match one of the formats. This change is to update the pattern to match both of the formats. Signed-off-by: Xin Wang --- .../test/files/tools/loganalyzer/loganalyzer_common_ignore.txt | 2 +- ansible/roles/test/tasks/acl/acltb_ignore_messages.txt | 2 +- ansible/roles/test/tasks/fib/fib_ignore_messages.txt | 2 +- ansible/roles/test/tasks/lag/lag_ignore_messages.txt | 2 +- .../test/tasks/pfc_wd/config_test/config_test_ignore_messages | 2 +- .../test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_ignore.txt b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_ignore.txt index 2a562f47972..03bada03d8b 100644 --- a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_ignore.txt +++ b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_ignore.txt @@ -1 +1 @@ -r, ".* ERR ntpd.*: routing socket reports: No buffer space available" +r, ".* ERR ntpd.*routing socket reports: No buffer space available.*" diff --git a/ansible/roles/test/tasks/acl/acltb_ignore_messages.txt b/ansible/roles/test/tasks/acl/acltb_ignore_messages.txt index 2a562f47972..03bada03d8b 100644 --- a/ansible/roles/test/tasks/acl/acltb_ignore_messages.txt +++ b/ansible/roles/test/tasks/acl/acltb_ignore_messages.txt @@ -1 +1 @@ -r, ".* ERR ntpd.*: routing socket reports: No buffer space available" +r, ".* ERR ntpd.*routing socket reports: No buffer space available.*" diff --git a/ansible/roles/test/tasks/fib/fib_ignore_messages.txt b/ansible/roles/test/tasks/fib/fib_ignore_messages.txt index 2a562f47972..03bada03d8b 100644 --- a/ansible/roles/test/tasks/fib/fib_ignore_messages.txt +++ b/ansible/roles/test/tasks/fib/fib_ignore_messages.txt @@ -1 +1 @@ -r, ".* ERR ntpd.*: routing socket reports: No buffer space available" +r, ".* ERR ntpd.*routing socket reports: No buffer space available.*" diff --git a/ansible/roles/test/tasks/lag/lag_ignore_messages.txt b/ansible/roles/test/tasks/lag/lag_ignore_messages.txt index 2a562f47972..03bada03d8b 100644 --- a/ansible/roles/test/tasks/lag/lag_ignore_messages.txt +++ b/ansible/roles/test/tasks/lag/lag_ignore_messages.txt @@ -1 +1 @@ -r, ".* ERR ntpd.*: routing socket reports: No buffer space available" +r, ".* ERR ntpd.*routing socket reports: No buffer space available.*" diff --git a/ansible/roles/test/tasks/pfc_wd/config_test/config_test_ignore_messages b/ansible/roles/test/tasks/pfc_wd/config_test/config_test_ignore_messages index b7b81fce8eb..b93ffa1530b 100644 --- a/ansible/roles/test/tasks/pfc_wd/config_test/config_test_ignore_messages +++ b/ansible/roles/test/tasks/pfc_wd/config_test/config_test_ignore_messages @@ -5,4 +5,4 @@ r, ".* Unknown.*" r, ".* SAI_STATUS_ATTR_NOT_SUPPORT.*" r, ".* snmp.*" r, ".* Trying to remove nonexisting queue from flex counter .*" -r, ".* ERR ntpd.*: routing socket reports: No buffer space available" +r, ".* ERR ntpd.*routing socket reports: No buffer space available.*" diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages b/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages index effc6c88b5d..391d16ed990 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages @@ -6,4 +6,4 @@ r, ".* SAI_STATUS_ATTR_NOT_SUPPORT.*" r, ".* snmp.*" r, ".* Trying to remove nonexisting queue from flex counter .*" r, ".* SAI_STATUS_BUFFER_OVERFLOW" -r, ".* ERR ntpd.*: routing socket reports: No buffer space available" +r, ".* ERR ntpd.*routing socket reports: No buffer space available.*" From 8ac9a889a9b250376e68a3b940c2b173df1b03da Mon Sep 17 00:00:00 2001 From: Qi Luo Date: Sat, 27 Apr 2019 19:34:02 -0700 Subject: [PATCH 015/218] Add many testcases support to t0-56 (#885) * Add many testcases support to t0-56 * Fix bgp_speaker for t0-56 --- ansible/roles/test/files/ptftests/fib_test.py | 2 + ansible/roles/test/tasks/bgp_speaker.yml | 2 +- ansible/roles/test/tasks/shared-fib.yml | 4 +- ansible/roles/test/vars/testcases.yml | 38 +++++++++---------- 4 files changed, 24 insertions(+), 22 deletions(-) diff --git a/ansible/roles/test/files/ptftests/fib_test.py b/ansible/roles/test/files/ptftests/fib_test.py index 5f11b96a9ff..a52199d8e5b 100644 --- a/ansible/roles/test/files/ptftests/fib_test.py +++ b/ansible/roles/test/files/ptftests/fib_test.py @@ -100,6 +100,8 @@ def setUp(self): self.src_ports = [0, 1, 4, 5, 16, 17, 20, 21, 34, 36, 37, 38, 39, 42, 44, 45, 46, 47, 50, 52, 53, 54, 55, 58, 60, 61, 62, 63] if self.test_params['testbed_type'] == 't0': self.src_ports = range(1, 25) + range(28, 32) + if self.test_params['testbed_type'] == 't0-56': + self.src_ports = [0, 1, 4, 5, 8, 9] + range(12, 18) + [20, 21, 24, 25, 28, 29, 32, 33, 36, 37] + range(40, 46) + [48, 49, 52, 53] if self.test_params['testbed_type'] == 't0-64': self.src_ports = range(0, 2) + range(4, 18) + range(20, 33) + range(36, 43) + range(48, 49) + range(52, 59) if self.test_params['testbed_type'] == 't0-116': diff --git a/ansible/roles/test/tasks/bgp_speaker.yml b/ansible/roles/test/tasks/bgp_speaker.yml index adfc5046227..2bb8e087fd0 100644 --- a/ansible/roles/test/tasks/bgp_speaker.yml +++ b/ansible/roles/test/tasks/bgp_speaker.yml @@ -7,7 +7,7 @@ when: (testbed_type is not defined or ptf_host is not defined) - fail: msg="Invalid testbed_type value '{{testbed_type}}'" - when: testbed_type not in ['t0', 't0-64', 't0-116'] + when: testbed_type not in testcases['bgp_speaker']['topologies'] - name: Gather minigraph facts about the device minigraph_facts: host={{inventory_hostname}} diff --git a/ansible/roles/test/tasks/shared-fib.yml b/ansible/roles/test/tasks/shared-fib.yml index 73aee0d19bb..87557248ea5 100644 --- a/ansible/roles/test/tasks/shared-fib.yml +++ b/ansible/roles/test/tasks/shared-fib.yml @@ -8,7 +8,7 @@ when: testbed_type is not defined - fail: msg="testbed_type {{testbed_type}} is invalid." - when: testbed_type not in ['t1-lag', 't1', 't1-64-lag', 't0', 't0-64', 't0-116'] + when: testbed_type not in testcases['fib']['topologies'] - include_vars: "vars/topo_{{testbed_type}}.yml" @@ -18,7 +18,7 @@ - name: Expand properties into props set_fact: props="{{configuration_properties['common']}}" - when: testbed_type in ['t0', 't0-64', 't0-116'] + when: testbed_type in ['t0', 't0-56', 't0-64', 't0-116'] - name: Expand ToR properties into props set_fact: props_tor="{{configuration_properties['tor']}}" diff --git a/ansible/roles/test/vars/testcases.yml b/ansible/roles/test/vars/testcases.yml index 2f7c89e58cb..33e2319075a 100644 --- a/ansible/roles/test/vars/testcases.yml +++ b/ansible/roles/test/vars/testcases.yml @@ -26,7 +26,7 @@ testcases: bgp_speaker: filename: bgp_speaker.yml - topologies: [t0, t0-16, t0-64, t0-64-32, t0-116] + topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116] required_vars: ptf_host: testbed_type: @@ -37,7 +37,7 @@ testcases: continuous_reboot: filename: continuous_reboot.yml - topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] + topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] copp: filename: copp.yml @@ -47,7 +47,7 @@ testcases: decap: filename: decap.yml - topologies: [t1, t1-lag, t1-64-lag, t0, t0-64, t0-116] + topologies: [t1, t1-lag, t1-64-lag, t0, t0-56, t0-64, t0-116] required_vars: ptf_host: testbed_type: @@ -55,7 +55,7 @@ testcases: dhcp_relay: filename: dhcp_relay.yml - topologies: [t0, t0-16, t0-64, t0-64-32, t0-116] + topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116] required_vars: ptf_host: @@ -72,7 +72,7 @@ testcases: fast-reboot: filename: fast-reboot.yml - topologies: [t0, t0-64, t0-64-32, t0-116] + topologies: [t0, t0-56, t0-64, t0-64-32, t0-116] required_vars: ptf_host: vm_hosts: @@ -86,7 +86,7 @@ testcases: fib: filename: simple-fib.yml - topologies: [t0, t0-16, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] + topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] required_vars: ptf_host: testbed_type: @@ -100,36 +100,36 @@ testcases: fdb: filename: fdb.yml - topologies: [t0, t0-16, t0-64, t0-64-32, t0-116] + topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116] required_vars: ptf_host: testbed_type: dir_bcast: filename: dir_bcast.yml - topologies: [t0, t0-16, t0-64, t0-64-32, t0-116] + topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116] required_vars: ptf_host: testbed_type: lag_2: filename: lag_2.yml - topologies: [t0, t0-64, t0-64-32, t0-116, t1-lag, t1-64-lag] + topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1-lag, t1-64-lag] required_vars: ptf_host: testbed_type: lldp: filename: lldp.yml - topologies: [t0, t0-16, t0-64, t0-116, t0-64-32, t1, t1-lag, t1-64-lag] + topologies: [t0, t0-16, t0-56, t0-64, t0-116, t0-64-32, t1, t1-lag, t1-64-lag] link_flap: filename: link_flap.yml - topologies: [t0, t0-16, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] mem_check: filename: mem_check.yml - topologies: [t0, t0-16, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] mtu: filename: mtu.yml @@ -150,11 +150,11 @@ testcases: ntp: filename: ntp.yml - topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] pfc_wd: filename: pfc_wd.yml - topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] port_toggle: filename: port_toggle.yml @@ -166,7 +166,7 @@ testcases: reboot: filename: reboot.yml - topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] repeat_harness: filename: repeat_harness.yml @@ -186,19 +186,19 @@ testcases: sensors: filename: sensors_check.yml - topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] service_acl: filename: service_acl.yml - topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] snmp: filename: snmp.yml - topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] syslog: filename: syslog.yml - topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] vlan: filename: vlantb.yml From d1dd00bf612f385188b95bb28df71d5fa7b98cf0 Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Tue, 30 Apr 2019 18:11:07 +0300 Subject: [PATCH 016/218] [advanced-reboot] improvements and enable CPU/VLAN ARP watchers during warm reboot (#890) * [advanced-reboot] move Arista class to seperate module Signed-off-by: Stepan Blyschak * [advanced-reboot] use lock to synchronize fast data plane and reachability_watcher threads Signed-off-by: Stepan Blyschak * [advanced-reboot] stabilize test when fast data plane send running * Apply a filter on socket before sending fast data plane IO * Save sniffed packets after the traffic test is done Signed-off-by: Stepan Blyschak * [advanced-reboot] refactor fast data plane generator code * reuse from_t1 and from_vlan_server generated packets in generate_bidirectional * use tcp instead ofudp in generate_bidirectional Signed-off-by: Stepan Blyschak * [advanced-reboot] add space back Signed-off-by: Stepan Blyschak --- .../test/files/ptftests/advanced-reboot.py | 677 +++++------------- ansible/roles/test/files/ptftests/arista.py | 386 ++++++++++ 2 files changed, 564 insertions(+), 499 deletions(-) create mode 100644 ansible/roles/test/files/ptftests/arista.py diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index e87d6ec0721..b8be0e89642 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -62,359 +62,9 @@ import pickle from operator import itemgetter import scapy.all as scapyall +import itertools - -class Arista(object): - DEBUG = False - def __init__(self, ip, queue, test_params, login='admin', password='123456'): - self.ip = ip - self.queue = queue - self.login = login - self.password = password - self.conn = None - self.hostname = None - self.v4_routes = [test_params['vlan_ip_range'], test_params['lo_prefix']] - self.v6_routes = [test_params['lo_v6_prefix']] - self.fails = set() - self.info = set() - self.min_bgp_gr_timeout = int(test_params['min_bgp_gr_timeout']) - - def __del__(self): - self.disconnect() - - def connect(self): - self.conn = paramiko.SSHClient() - self.conn.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - self.conn.connect(self.ip, username=self.login, password=self.password, allow_agent=False, look_for_keys=False) - self.shell = self.conn.invoke_shell() - - first_prompt = self.do_cmd(None, prompt = '>') - self.hostname = self.extract_hostname(first_prompt) - - self.do_cmd('enable') - self.do_cmd('terminal length 0') - - return self.shell - - def extract_hostname(self, first_prompt): - lines = first_prompt.split('\n') - prompt = lines[-1] - return prompt.strip().replace('>', '#') - - def do_cmd(self, cmd, prompt = None): - if prompt == None: - prompt = self.hostname - - if cmd is not None: - self.shell.send(cmd + '\n') - - input_buffer = '' - while prompt not in input_buffer: - input_buffer += self.shell.recv(16384) - - return input_buffer - - def disconnect(self): - if self.conn is not None: - self.conn.close() - self.conn = None - - return - - def run(self): - data = {} - debug_data = {} - run_once = False - log_first_line = None - quit_enabled = False - v4_routing_ok = False - v6_routing_ok = False - routing_works = True - self.connect() - - cur_time = time.time() - sample = {} - samples = {} - portchannel_output = self.do_cmd("show interfaces po1 | json") - portchannel_output = "\n".join(portchannel_output.split("\r\n")[1:-1]) - sample["po_changetime"] = json.loads(portchannel_output, strict=False)['interfaces']['Port-Channel1']['lastStatusChangeTimestamp'] - samples[cur_time] = sample - - while not (quit_enabled and v4_routing_ok and v6_routing_ok): - cmd = self.queue.get() - if cmd == 'quit': - quit_enabled = True - continue - cur_time = time.time() - info = {} - debug_info = {} - lacp_output = self.do_cmd('show lacp neighbor') - info['lacp'] = self.parse_lacp(lacp_output) - bgp_neig_output = self.do_cmd('show ip bgp neighbors') - info['bgp_neig'] = self.parse_bgp_neighbor(bgp_neig_output) - - bgp_route_v4_output = self.do_cmd('show ip route bgp | json') - v4_routing_ok = self.parse_bgp_route(bgp_route_v4_output, self.v4_routes) - info['bgp_route_v4'] = v4_routing_ok - - bgp_route_v6_output = self.do_cmd("show ipv6 route bgp | json") - v6_routing_ok = self.parse_bgp_route(bgp_route_v6_output, self.v6_routes) - info["bgp_route_v6"] = v6_routing_ok - - portchannel_output = self.do_cmd("show interfaces po1 | json") - portchannel_output = "\n".join(portchannel_output.split("\r\n")[1:-1]) - sample["po_changetime"] = json.loads(portchannel_output, strict=False)['interfaces']['Port-Channel1']['lastStatusChangeTimestamp'] - - if not run_once: - self.ipv4_gr_enabled, self.ipv6_gr_enabled, self.gr_timeout = self.parse_bgp_neighbor_once(bgp_neig_output) - if self.gr_timeout is not None: - log_first_line = "session_begins_%f" % cur_time - self.do_cmd("send log message %s" % log_first_line) - run_once = True - - data[cur_time] = info - samples[cur_time] = sample - if self.DEBUG: - debug_data[cur_time] = { - 'show lacp neighbor' : lacp_output, - 'show ip bgp neighbors' : bgp_neig_output, - 'show ip route bgp' : bgp_route_v4_output, - 'show ipv6 route bgp' : bgp_route_v6_output, - } - - attempts = 60 - for _ in range(attempts): - log_output = self.do_cmd("show log | begin %s" % log_first_line) - log_lines = log_output.split("\r\n")[1:-1] - log_data = self.parse_logs(log_lines) - if len(log_data) != 0: - break - time.sleep(1) # wait until logs are populated - - if len(log_data) == 0: - log_data['error'] = 'Incomplete output' - - self.disconnect() - - # save data for troubleshooting - with open("/tmp/%s.data.pickle" % self.ip, "w") as fp: - pickle.dump(data, fp) - - # save debug data for troubleshooting - if self.DEBUG: - with open("/tmp/%s.raw.pickle" % self.ip, "w") as fp: - pickle.dump(debug_data, fp) - with open("/tmp/%s.logging" % self.ip, "w") as fp: - fp.write("\n".join(log_lines)) - - self.check_gr_peer_status(data) - cli_data = {} - cli_data['lacp'] = self.check_series_status(data, "lacp", "LACP session") - cli_data['bgp_v4'] = self.check_series_status(data, "bgp_route_v4", "BGP v4 routes") - cli_data['bgp_v6'] = self.check_series_status(data, "bgp_route_v6", "BGP v6 routes") - cli_data['po'] = self.check_change_time(samples, "po_changetime", "PortChannel interface") - - route_timeout = log_data['route_timeout'] - cli_data['route_timeout'] = route_timeout - - # {'10.0.0.38': [(0, '4200065100)')], 'fc00::2d': [(0, '4200065100)')]} - for nei in route_timeout.keys(): - asn = route_timeout[nei][0][-1] - msg = 'BGP route GR timeout: neighbor %s (ASN %s' % (nei, asn) - self.fails.add(msg) - - return self.fails, self.info, cli_data, log_data - - def extract_from_logs(self, regexp, data): - raw_data = [] - result = defaultdict(list) - initial_time = -1 - re_compiled = re.compile(regexp) - for line in data: - m = re_compiled.match(line) - if not m: - continue - raw_data.append((datetime.datetime.strptime(m.group(1), "%b %d %X"), m.group(2), m.group(3))) - - if len(raw_data) > 0: - initial_time = raw_data[0][0] - for when, what, status in raw_data: - offset = (when - initial_time if when > initial_time else initial_time - when).seconds - result[what].append((offset, status)) - - return result, initial_time - - def parse_logs(self, data): - result = {} - bgp_r = r'^(\S+\s+\d+\s+\S+) \S+ Rib: %BGP-5-ADJCHANGE: peer (\S+) .+ (\S+)$' - result_bgp, initial_time_bgp = self.extract_from_logs(bgp_r, data) - if_r = r'^(\S+\s+\d+\s+\S+) \S+ Ebra: %LINEPROTO-5-UPDOWN: Line protocol on Interface (\S+), changed state to (\S+)$' - result_if, initial_time_if = self.extract_from_logs(if_r, data) - - route_r = r'^(\S+\s+\d+\s+\S+) \S+ Rib: %BGP-5-BGP_GRACEFUL_RESTART_TIMEOUT: Deleting stale routes from peer (\S+) .+ (\S+)$' - result_rt, initial_time_rt = self.extract_from_logs(route_r, data) - - result['route_timeout'] = result_rt - - if initial_time_bgp == -1 or initial_time_if == -1: - return result - - for events in result_bgp.values(): - if events[-1][1] != 'Established': - return result - - # first state is Idle, last state is Established - for events in result_bgp.values(): - if len(events) > 1: - assert(events[0][1] != 'Established') - - assert(events[-1][1] == 'Established') - - # first state is down, last state is up - for events in result_if.values(): - assert(events[0][1] == 'down') - assert(events[-1][1] == 'up') - - po_name = [ifname for ifname in result_if.keys() if 'Port-Channel' in ifname][0] - neigh_ipv4 = [neig_ip for neig_ip in result_bgp.keys() if '.' in neig_ip][0] - - result['PortChannel was down (seconds)'] = result_if[po_name][-1][0] - result_if[po_name][0][0] - for if_name in sorted(result_if.keys()): - result['Interface %s was down (times)' % if_name] = map(itemgetter(1), result_if[if_name]).count("down") - - for neig_ip in result_bgp.keys(): - key = "BGP IPv6 was down (seconds)" if ':' in neig_ip else "BGP IPv4 was down (seconds)" - result[key] = result_bgp[neig_ip][-1][0] - result_bgp[neig_ip][0][0] - - for neig_ip in result_bgp.keys(): - key = "BGP IPv6 was down (times)" if ':' in neig_ip else "BGP IPv4 was down (times)" - result[key] = map(itemgetter(1), result_bgp[neig_ip]).count("Idle") - - bgp_po_offset = (initial_time_if - initial_time_bgp if initial_time_if > initial_time_bgp else initial_time_bgp - initial_time_if).seconds - result['PortChannel went down after bgp session was down (seconds)'] = bgp_po_offset + result_if[po_name][0][0] - - for neig_ip in result_bgp.keys(): - key = "BGP IPv6 was gotten up after Po was up (seconds)" if ':' in neig_ip else "BGP IPv4 was gotten up after Po was up (seconds)" - result[key] = result_bgp[neig_ip][-1][0] - bgp_po_offset - result_if[po_name][-1][0] - - return result - - def parse_lacp(self, output): - return output.find('Bundled') != -1 - - def parse_bgp_neighbor_once(self, output): - is_gr_ipv4_enabled = False - is_gr_ipv6_enabled = False - restart_time = None - for line in output.split('\n'): - if ' Restart-time is' in line: - restart_time = int(line.replace(' Restart-time is ', '')) - continue - - if 'is enabled, Forwarding State is' in line: - if 'IPv6' in line: - is_gr_ipv6_enabled = True - elif 'IPv4' in line: - is_gr_ipv4_enabled = True - - return is_gr_ipv4_enabled, is_gr_ipv6_enabled, restart_time - - def parse_bgp_neighbor(self, output): - gr_active = None - gr_timer = None - for line in output.split('\n'): - if 'Restart timer is' in line: - gr_active = 'is active' in line - gr_timer = str(line[-9:-1]) - - return gr_active, gr_timer - - def parse_bgp_route(self, output, expects): - prefixes = set() - data = "\n".join(output.split("\r\n")[1:-1]) - obj = json.loads(data) - - if "vrfs" in obj and "default" in obj["vrfs"]: - obj = obj["vrfs"]["default"] - for prefix, attrs in obj["routes"].items(): - if "routeAction" not in attrs or attrs["routeAction"] != "forward": - continue - if all("Port-Channel" in via["interface"] for via in attrs["vias"]): - prefixes.add(prefix) - - return set(expects) == prefixes - - def check_gr_peer_status(self, output): - # [0] True 'ipv4_gr_enabled', [1] doesn't matter 'ipv6_enabled', [2] should be >= 120 - if not self.ipv4_gr_enabled: - self.fails.add("bgp ipv4 graceful restart is not enabled") - if not self.ipv6_gr_enabled: - pass # ToDo: - if self.gr_timeout < 120: # bgp graceful restart timeout less then 120 seconds - self.fails.add("bgp graceful restart timeout is less then 120 seconds") - - for when, other in sorted(output.items(), key = lambda x : x[0]): - gr_active, timer = other['bgp_neig'] - # wnen it's False, it's ok, wnen it's True, check that inactivity timer not less then self.min_bgp_gr_timeout seconds - if gr_active and datetime.datetime.strptime(timer, '%H:%M:%S') < datetime.datetime(1900, 1, 1, second = self.min_bgp_gr_timeout): - self.fails.add("graceful restart timer is almost finished. Less then %d seconds left" % self.min_bgp_gr_timeout) - - def check_series_status(self, output, entity, what): - # find how long anything was down - # Input parameter is a dictionary when:status - # constraints: - # entity must be down just once - # entity must be up when the test starts - # entity must be up when the test stops - - sorted_keys = sorted(output.keys()) - if not output[sorted_keys[0]][entity]: - self.fails.add("%s must be up when the test starts" % what) - return 0, 0 - if not output[sorted_keys[-1]][entity]: - self.fails.add("%s must be up when the test stops" % what) - return 0, 0 - - start = sorted_keys[0] - cur_state = True - res = defaultdict(list) - for when in sorted_keys[1:]: - if cur_state != output[when][entity]: - res[cur_state].append(when - start) - start = when - cur_state = output[when][entity] - res[cur_state].append(when - start) - - is_down_count = len(res[False]) - - if is_down_count > 1: - self.info.add("%s must be down just for once" % what) - - return is_down_count, sum(res[False]) # summary_downtime - - def check_change_time(self, output, entity, what): - # find last changing time updated, if no update, the entity is never changed - # Input parameter is a dictionary when:last_changing_time - # constraints: - # the dictionary `output` cannot be empty - sorted_keys = sorted(output.keys()) - if not output: - self.fails.add("%s cannot be empty" % what) - return 0, 0 - - start = sorted_keys[0] - prev_time = output[start] - change_count = 0 - for when in sorted_keys[1:]: - if prev_time != output[when][entity]: - prev_time = output[when][entity] - change_count += 1 - - if change_count > 0: - self.info.add("%s state changed %d times" % (what, change_count)) - - # Note: the first item is a placeholder - return 0, change_count +from arista import Arista class StateMachine(): @@ -458,6 +108,10 @@ def is_flooding(self): class ReloadTest(BaseTest): TIMEOUT = 0.5 + VLAN_BASE_MAC_PATTERN = '72060001{:04}' + LAG_BASE_MAC_PATTERN = '5c010203{:04}' + SOCKET_RECV_BUFFER_SIZE = 10 * 1024 * 1024 + def __init__(self): BaseTest.__init__(self) self.fails = {} @@ -466,36 +120,36 @@ def __init__(self): self.logs_info = {} self.log_lock = threading.RLock() self.test_params = testutils.test_params_get() - self.check_param('verbose', False, required = False) - self.check_param('dut_username', '', required = True) - self.check_param('dut_hostname', '', required = True) - self.check_param('reboot_limit_in_seconds', 30, required = False) - self.check_param('reboot_type', 'fast-reboot', required = False) - self.check_param('graceful_limit', 180, required = False) - self.check_param('portchannel_ports_file', '', required = True) - self.check_param('vlan_ports_file', '', required = True) - self.check_param('ports_file', '', required = True) - self.check_param('dut_mac', '', required = True) - self.check_param('dut_vlan_ip', '', required = True) - self.check_param('default_ip_range', '', required = True) - self.check_param('vlan_ip_range', '', required = True) - self.check_param('lo_prefix', '10.1.0.32/32', required = False) - self.check_param('lo_v6_prefix', 'fc00:1::/64', required = False) - self.check_param('arista_vms', [], required = True) - self.check_param('min_bgp_gr_timeout', 15, required = False) - self.check_param('warm_up_timeout_secs', 180, required = False) - self.check_param('dut_stabilize_secs', 20, required = False) + self.check_param('verbose', False, required=False) + self.check_param('dut_username', '', required=True) + self.check_param('dut_hostname', '', required=True) + self.check_param('reboot_limit_in_seconds', 30, required=False) + self.check_param('reboot_type', 'fast-reboot', required=False) + self.check_param('graceful_limit', 180, required=False) + self.check_param('portchannel_ports_file', '', required=True) + self.check_param('vlan_ports_file', '', required=True) + self.check_param('ports_file', '', required=True) + self.check_param('dut_mac', '', required=True) + self.check_param('dut_vlan_ip', '', required=True) + self.check_param('default_ip_range', '', required=True) + self.check_param('vlan_ip_range', '', required=True) + self.check_param('lo_prefix', '10.1.0.32/32', required=False) + self.check_param('lo_v6_prefix', 'fc00:1::/64', required=False) + self.check_param('arista_vms', [], required=True) + self.check_param('min_bgp_gr_timeout', 15, required=False) + self.check_param('warm_up_timeout_secs', 180, required=False) + self.check_param('dut_stabilize_secs', 20, required=False) self.log_file_name = '/tmp/%s.log' % self.test_params['reboot_type'] self.log_fp = open(self.log_file_name, 'w') # Default settings - self.ping_dut_pkts = 10 - self.arp_ping_pkts = 1 - self.nr_pc_pkts = 100 - self.nr_tests = 3 - self.reboot_delay = 10 - self.task_timeout = 300 # Wait up to 5 minutes for tasks to complete + self.ping_dut_pkts = 10 + self.arp_ping_pkts = 1 + self.nr_pc_pkts = 100 + self.nr_tests = 3 + self.reboot_delay = 10 + self.task_timeout = 300 # Wait up to 5 minutes for tasks to complete self.max_nr_vl_pkts = 500 # FIXME: should be 1000. # But ptf is not fast enough + swss is slow for FDB and ARP entries insertions self.timeout_thr = None @@ -519,6 +173,10 @@ def __init__(self): # True : when one direction probe fails, don't probe another. # False: when one direction probe fails, continue probe another. self.light_probe = False + # We have two data plane traffic generators which are mutualy exclusive + # one is the reachability_watcher thread + # second is the fast send_in_background + self.dataplane_io_lock = threading.Lock() return @@ -529,9 +187,9 @@ def read_json(self, name): return content def read_port_indices(self): - self.port_indices = self.read_json('ports_file') + port_indices = self.read_json('ports_file') - return + return port_indices def read_portchannel_ports(self): content = self.read_json('portchannel_ports_file') @@ -598,23 +256,60 @@ def cancel_timeout(self): self.timeout_thr.cancel() self.timeout_thr = None + def generate_vlan_servers(self): + vlan_host_map = defaultdict(dict) + vlan_ip_range = self.test_params['vlan_ip_range'] + + _, mask = vlan_ip_range.split('/') + n_hosts = min(2**(32 - int(mask)) - 3, self.max_nr_vl_pkts) + + for counter, i in enumerate(xrange(2, n_hosts + 2)): + mac = self.VLAN_BASE_MAC_PATTERN.format(counter) + port = self.vlan_ports[i % len(self.vlan_ports)] + addr = self.host_ip(vlan_ip_range, i) + + vlan_host_map[port][addr] = mac + + self.nr_vl_pkts = n_hosts + + return vlan_host_map + + def generate_arp_responder_conf(self, vlan_host_map): + arp_responder_conf = {} + for port in vlan_host_map: + arp_responder_conf['eth{}'.format(port)] = vlan_host_map[port] + + return arp_responder_conf + + def dump_arp_responder_config(self, dump): + # save data for arp_replay process + with open("/tmp/from_t1.json", "w") as fp: + json.dump(dump, fp) + def setUp(self): - self.read_port_indices() + self.port_indices = self.read_port_indices() self.portchannel_ports = self.read_portchannel_ports() - vlan_ip_range = self.test_params['vlan_ip_range'] self.vlan_ports = self.read_vlan_ports() + self.vlan_ip_range = self.test_params['vlan_ip_range'] + self.default_ip_range = self.test_params['default_ip_range'] + self.limit = datetime.timedelta(seconds=self.test_params['reboot_limit_in_seconds']) self.reboot_type = self.test_params['reboot_type'] if self.reboot_type not in ['fast-reboot', 'warm-reboot']: raise ValueError('Not supported reboot_type %s' % self.reboot_type) self.dut_ssh = self.test_params['dut_username'] + '@' + self.test_params['dut_hostname'] self.dut_mac = self.test_params['dut_mac'] - # - self.generate_from_t1() - self.generate_from_vlan() - self.generate_ping_dut_lo() - self.generate_arp_ping_packet() + + self.vlan_host_map = self.generate_vlan_servers() + arp_responder_conf = self.generate_arp_responder_conf(self.vlan_host_map) + self.dump_arp_responder_config(arp_responder_conf) + + self.random_vlan = random.choice(self.vlan_ports) + self.from_server_src_port = self.random_vlan + self.from_server_src_addr = random.choice(self.vlan_host_map[self.random_vlan].keys()) + self.from_server_dst_addr = self.random_ip(self.test_params['default_ip_range']) + self.from_server_dst_ports = self.portchannel_ports self.log("Test params:") self.log("DUT ssh: %s" % self.dut_ssh) @@ -630,6 +325,11 @@ def setUp(self): self.log("Reboot type is %s" % self.reboot_type) + self.generate_from_t1() + self.generate_from_vlan() + self.generate_ping_dut_lo() + self.generate_arp_ping_packet() + if self.reboot_type == 'warm-reboot': # Pre-generate list of packets to be sent in send_in_background method. generate_start = datetime.datetime.now() @@ -639,7 +339,7 @@ def setUp(self): self.dataplane = ptf.dataplane_instance for p in self.dataplane.ports.values(): port = p.get_packet_source() - port.socket.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 1000000) + port.socket.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, self.SOCKET_RECV_BUFFER_SIZE) self.dataplane.flush() if config["log_dir"] != None: @@ -673,38 +373,34 @@ def get_mac(self, iff): SIOCGIFHWADDR = 0x8927 # Get hardware address return ':'.join(['%02x' % ord(char) for char in self.get_if(iff, SIOCGIFHWADDR)[18:24]]) + @staticmethod + def hex_to_mac(hex_mac): + return ':'.join(hex_mac[i:i+2] for i in range(0, len(hex_mac), 2)) + def generate_from_t1(self): self.from_t1 = [] - vlan_ip_range = self.test_params['vlan_ip_range'] + # for each server host create a packet destinating server IP + for counter, host_port in enumerate(self.vlan_host_map): + src_addr = self.random_ip(self.default_ip_range) + src_port = self.random_port(self.portchannel_ports) - _, mask = vlan_ip_range.split('/') - n_hosts = min(2**(32 - int(mask)) - 3, self.max_nr_vl_pkts) + for server_ip in self.vlan_host_map[host_port]: + dst_addr = server_ip - dump = defaultdict(dict) - counter = 0 - for i in xrange(2, n_hosts + 2): - from_t1_src_addr = self.random_ip(self.test_params['default_ip_range']) - from_t1_src_port = self.random_port(self.portchannel_ports) - from_t1_dst_addr = self.host_ip(vlan_ip_range, i) - from_t1_dst_port = self.vlan_ports[i % len(self.vlan_ports)] - from_t1_if_name = "eth%d" % from_t1_dst_port - from_t1_if_addr = "%s/%s" % (from_t1_dst_addr, vlan_ip_range.split('/')[1]) - vlan_mac_hex = '72060001%04x' % counter - lag_mac_hex = '5c010203%04x' % counter - mac_addr = ':'.join(lag_mac_hex[i:i+2] for i in range(0, len(lag_mac_hex), 2)) - packet = simple_tcp_packet( - eth_src=mac_addr, - eth_dst=self.dut_mac, - ip_src=from_t1_src_addr, - ip_dst=from_t1_dst_addr, - ip_ttl=255, - tcp_dport=5000 - ) - self.from_t1.append((from_t1_src_port, str(packet))) - dump[from_t1_if_name][from_t1_dst_addr] = vlan_mac_hex - counter += 1 + # generate source MAC address for traffic based on LAG_BASE_MAC_PATTERN + mac_addr = self.hex_to_mac(self.LAG_BASE_MAC_PATTERN.format(counter)) + + packet = simple_tcp_packet(eth_src=mac_addr, + eth_dst=self.dut_mac, + ip_src=src_addr, + ip_dst=dst_addr, + ip_ttl=255, + tcp_dport=5000) + + self.from_t1.append((src_port, str(packet))) + # expect any packet with dport 5000 exp_packet = simple_tcp_packet( ip_src="0.0.0.0", ip_dst="0.0.0.0", @@ -720,20 +416,6 @@ def generate_from_t1(self): self.from_t1_exp_packet.set_do_not_care_scapy(scapy.TCP, "chksum") self.from_t1_exp_packet.set_do_not_care_scapy(scapy.IP, "ttl") - # save data for arp_replay process - with open("/tmp/from_t1.json", "w") as fp: - json.dump(dump, fp) - - random_vlan_iface = random.choice(dump.keys()) - self.from_server_src_port = int(random_vlan_iface.replace('eth','')) - self.from_server_src_addr = random.choice(dump[random_vlan_iface].keys()) - self.from_server_dst_addr = self.random_ip(self.test_params['default_ip_range']) - self.from_server_dst_ports = self.portchannel_ports - - self.nr_vl_pkts = n_hosts - - return - def generate_from_vlan(self): packet = simple_tcp_packet( eth_dst=self.dut_mac, @@ -749,13 +431,11 @@ def generate_from_vlan(self): ) self.from_vlan_exp_packet = Mask(exp_packet) - self.from_vlan_exp_packet.set_do_not_care_scapy(scapy.Ether,"src") - self.from_vlan_exp_packet.set_do_not_care_scapy(scapy.Ether,"dst") + self.from_vlan_exp_packet.set_do_not_care_scapy(scapy.Ether, "src") + self.from_vlan_exp_packet.set_do_not_care_scapy(scapy.Ether, "dst") self.from_vlan_packet = str(packet) - return - def generate_ping_dut_lo(self): dut_lo_ipv4 = self.test_params['lo_prefix'].split('/')[0] packet = simple_icmp_packet(eth_dst=self.dut_mac, @@ -800,50 +480,29 @@ def generate_arp_ping_packet(self): self.arp_resp.set_do_not_care_scapy(scapy.ARP, 'hwsrc') self.arp_src_port = src_port - def generate_bidirectional(self, packets_to_send = None): + def generate_bidirectional(self): """ This method is used to pre-generate packets to be sent in background thread. Packets are composed into a list, and present a bidirectional flow as next: five packet from T1, one packet from vlan. - Each packet has sequential UDP Payload - to be identified later. + Each packet has sequential TCP Payload - to be identified later. """ - if packets_to_send: - self.packets_to_send = packets_to_send - self.send_interval = self.time_to_listen / self.packets_to_send - else: - packets_to_send = self.packets_to_send - vlan_ip_range = self.test_params['vlan_ip_range'] - _, mask = vlan_ip_range.split('/') - n_hosts = min(2**(32 - int(mask)) - 3, self.max_nr_vl_pkts) - counter = 0 - self.packets_list = list() - for i in xrange(packets_to_send): + + self.send_interval = self.time_to_listen / self.packets_to_send + self.packets_list = [] + from_t1_iter = itertools.cycle(self.from_t1) + + for i in xrange(self.packets_to_send): payload = '0' * 60 + str(i) if (i % 5) == 0 : # From vlan to T1. - packet = simple_udp_packet( - eth_dst = self.dut_mac, - ip_src = self.from_server_src_addr, - ip_dst = self.from_server_dst_addr, - udp_sport = 1234, - udp_dport = 5000, - udp_payload = payload) + packet = scapyall.Ether(self.from_vlan_packet) + packet.load = payload from_port = self.from_server_src_port else: # From T1 to vlan. - from_t1_src_addr = self.random_ip(self.test_params['default_ip_range']) - from_t1_src_port = self.random_port(self.portchannel_ports) - from_t1_dst_addr = self.host_ip(vlan_ip_range, (counter%(n_hosts-2))+2) - lag_mac_hex = '5c010203%04x' % counter - mac_addr = ':'.join(lag_mac_hex[i:i+2] for i in range(0, len(lag_mac_hex), 2)) - counter += 1 - packet = simple_udp_packet( - eth_src = mac_addr, - eth_dst = self.dut_mac, - ip_src = from_t1_src_addr, - ip_dst = from_t1_dst_addr, - ip_ttl = 255, - udp_dport = 5000, - udp_payload = payload) - from_port = from_t1_src_port + src_port, packet = next(from_t1_iter) + packet = scapyall.Ether(packet) + packet.load = payload + from_port = src_port self.packets_list.append((from_port, str(packet))) def runTest(self): @@ -944,11 +603,14 @@ def runTest(self): self.watching = False if self.reboot_type == 'warm-reboot': + self.send_and_sniff() + # Stop watching DUT self.watching = False self.log("Stopping reachability state watch thread.") self.watcher_is_stopped.wait(timeout = 10) # Wait for the Watcher stopped. - self.send_and_sniff() + + self.save_sniffed_packets() examine_start = datetime.datetime.now() self.log("Packet flow examine started %s after the reboot" % str(examine_start - self.reboot_start)) @@ -1122,6 +784,11 @@ def wait_until_cpu_port_up(self): break time.sleep(self.TIMEOUT) + def apply_filter_all_ports(self, filter_expression): + for p in self.dataplane.ports.values(): + port = p.get_packet_source() + scapyall.attach_filter(port.socket, filter_expression) + def send_in_background(self, packets_list = None, interval = None): """ This method sends predefined list of packets with predefined interval. @@ -1131,16 +798,24 @@ def send_in_background(self, packets_list = None, interval = None): if not packets_list: packets_list = self.packets_list self.sniffer_started.wait(timeout=10) - sender_start = datetime.datetime.now() - self.log("Sender started at %s" % str(sender_start)) - for entry in packets_list: - time.sleep(interval) - testutils.send_packet(self, *entry) - self.log("Sender has been running for %s" % str(datetime.datetime.now() - sender_start)) + with self.dataplane_io_lock: + # While running fast data plane sender thread there are two reasons for filter to be applied + # 1. filter out data plane traffic which is tcp to free up the load on PTF socket (sniffer thread is using a different one) + # 2. during warm neighbor restoration DUT will send a lot of ARP requests which we are not interested in + # This is essential to get stable results + self.apply_filter_all_ports('not (arp and ether src {}) and not tcp'.format(self.test_params['dut_mac'])) + sender_start = datetime.datetime.now() + self.log("Sender started at %s" % str(sender_start)) + for entry in packets_list: + time.sleep(interval) + testutils.send_packet(self, *entry) + self.log("Sender has been running for %s" % str(datetime.datetime.now() - sender_start)) + # Remove filter + self.apply_filter_all_ports('') def sniff_in_background(self, wait = None): """ - This function listens on all ports, in both directions, for the UDP src=1234 dst=5000 packets, until timeout. + This function listens on all ports, in both directions, for the TCP src=1234 dst=5000 packets, until timeout. Once found, all packets are dumped to local pcap file, and all packets are saved to self.packets as scapy type. The native scapy.snif() is used as a background thread, to allow delayed start for the send_in_background(). @@ -1149,8 +824,7 @@ def sniff_in_background(self, wait = None): wait = self.time_to_listen + 30 sniffer_start = datetime.datetime.now() self.log("Sniffer started at %s" % str(sniffer_start)) - filename = '/tmp/capture.pcap' - sniff_filter = "udp and udp dst port 5000 and udp src port 1234 and not icmp" + sniff_filter = "tcp and tcp dst port 5000 and tcp src port 1234 and not icmp" scapy_sniffer = threading.Thread(target=self.scapy_sniff, kwargs={'wait': wait, 'sniff_filter': sniff_filter}) scapy_sniffer.start() time.sleep(2) # Let the scapy sniff initialize completely. @@ -1158,6 +832,9 @@ def sniff_in_background(self, wait = None): scapy_sniffer.join() self.log("Sniffer has been running for %s" % str(datetime.datetime.now() - sniffer_start)) self.sniffer_started.clear() + + def save_sniffed_packets(self): + filename = '/tmp/capture.pcap' if self.packets: scapyall.wrpcap(filename, self.packets) self.log("Pcap file dumped to %s" % filename) @@ -1183,13 +860,13 @@ def send_and_sniff(self): self.sniff_thr.join() self.sender_thr.join() - def check_udp_payload(self, packet): + def check_tcp_payload(self, packet): """ This method is used by examine_flow() method. - It returns True if a packet is not corrupted and has a valid UDP sequential UDP Payload, as created by generate_bidirectional() method'. + It returns True if a packet is not corrupted and has a valid TCP sequential TCP Payload, as created by generate_bidirectional() method'. """ try: - int(str(packet[scapyall.UDP].payload)) in range(self.packets_to_send) + int(str(packet[scapyall.TCP].payload)) in range(self.packets_to_send) return True except Exception as err: return False @@ -1198,9 +875,9 @@ def no_flood(self, packet): """ This method filters packets which are unique (i.e. no floods). """ - if (not int(str(packet[scapyall.UDP].payload)) in self.unique_id) and (packet[scapyall.Ether].src == self.dut_mac): + if (not int(str(packet[scapyall.TCP].payload)) in self.unique_id) and (packet[scapyall.Ether].src == self.dut_mac): # This is a unique (no flooded) received packet. - self.unique_id.append(int(str(packet[scapyall.UDP].payload))) + self.unique_id.append(int(str(packet[scapyall.TCP].payload))) return True elif packet[scapyall.Ether].dst == self.dut_mac: # This is a sent packet. @@ -1211,7 +888,7 @@ def no_flood(self, packet): def examine_flow(self, filename = None): """ This method examines pcap file (if given), or self.packets scapy file. - The method compares UDP payloads of the packets one by one (assuming all payloads are consecutive integers), + The method compares TCP payloads of the packets one by one (assuming all payloads are consecutive integers), and the losses if found - are treated as disruptions in Dataplane forwarding. All disruptions are saved to self.lost_packets dictionary, in format: disrupt_start_id = (missing_packets_count, disrupt_time, disrupt_start_timestamp, disrupt_stop_timestamp) @@ -1227,15 +904,15 @@ def examine_flow(self, filename = None): # Filter out packets and remove floods: self.unique_id = list() # This list will contain all unique Payload ID, to filter out received floods. filtered_packets = [ pkt for pkt in all_packets if - scapyall.UDP in pkt and + scapyall.TCP in pkt and not scapyall.ICMP in pkt and - pkt[scapyall.UDP].sport == 1234 and - pkt[scapyall.UDP].dport == 5000 and - self.check_udp_payload(pkt) and + pkt[scapyall.TCP].sport == 1234 and + pkt[scapyall.TCP].dport == 5000 and + self.check_tcp_payload(pkt) and self.no_flood(pkt) ] # Re-arrange packets, if delayed, by Payload ID and Timestamp: - packets = sorted(filtered_packets, key = lambda packet: (int(str(packet[scapyall.UDP].payload)), packet.time )) + packets = sorted(filtered_packets, key = lambda packet: (int(str(packet[scapyall.TCP].payload)), packet.time )) self.lost_packets = dict() self.max_disrupt, self.total_disruption = 0, 0 sent_packets = dict() @@ -1250,13 +927,13 @@ def examine_flow(self, filename = None): for packet in packets: if packet[scapyall.Ether].dst == self.dut_mac: # This is a sent packet - keep track of it as payload_id:timestamp. - sent_payload = int(str(packet[scapyall.UDP].payload)) + sent_payload = int(str(packet[scapyall.TCP].payload)) sent_packets[sent_payload] = packet.time continue if packet[scapyall.Ether].src == self.dut_mac: # This is a received packet. received_time = packet.time - received_payload = int(str(packet[scapyall.UDP].payload)) + received_payload = int(str(packet[scapyall.TCP].payload)) received_counter += 1 if not (received_payload and received_time): # This is the first valid received packet. @@ -1483,16 +1160,18 @@ def reachability_watcher(self): # changes for future analysis self.watcher_is_stopped.clear() # Watcher is running. while self.watching: - vlan_to_t1, t1_to_vlan = self.ping_data_plane(self.light_probe) - reachable = (t1_to_vlan > self.nr_vl_pkts * 0.7 and - vlan_to_t1 > self.nr_pc_pkts * 0.7) - partial = (reachable and - (t1_to_vlan < self.nr_vl_pkts or - vlan_to_t1 < self.nr_pc_pkts)) - flooding = (reachable and - (t1_to_vlan > self.nr_vl_pkts or - vlan_to_t1 > self.nr_pc_pkts)) - self.log_asic_state_change(reachable, partial, t1_to_vlan, flooding) + if self.dataplane_io_lock.acquire(False): + vlan_to_t1, t1_to_vlan = self.ping_data_plane(self.light_probe) + reachable = (t1_to_vlan > self.nr_vl_pkts * 0.7 and + vlan_to_t1 > self.nr_pc_pkts * 0.7) + partial = (reachable and + (t1_to_vlan < self.nr_vl_pkts or + vlan_to_t1 < self.nr_pc_pkts)) + flooding = (reachable and + (t1_to_vlan > self.nr_vl_pkts or + vlan_to_t1 > self.nr_pc_pkts)) + self.log_asic_state_change(reachable, partial, t1_to_vlan, flooding) + self.dataplane_io_lock.release() total_rcv_pkt_cnt = self.pingDut() reachable = total_rcv_pkt_cnt > 0 and total_rcv_pkt_cnt > self.ping_dut_pkts * 0.7 partial = total_rcv_pkt_cnt > 0 and total_rcv_pkt_cnt < self.ping_dut_pkts diff --git a/ansible/roles/test/files/ptftests/arista.py b/ansible/roles/test/files/ptftests/arista.py new file mode 100644 index 00000000000..e77b69a3874 --- /dev/null +++ b/ansible/roles/test/files/ptftests/arista.py @@ -0,0 +1,386 @@ +import ptf +from ptf.base_tests import BaseTest +from ptf import config +import ptf.testutils as testutils +from ptf.testutils import * +from ptf.dataplane import match_exp_pkt +import datetime +import time +import subprocess +from ptf.mask import Mask +import socket +import ptf.packet as scapy +import thread +import threading +from multiprocessing.pool import ThreadPool, TimeoutError +import os +import signal +import random +import struct +import socket +from pprint import pprint +from fcntl import ioctl +import sys +import json +import re +from collections import defaultdict +import json +import paramiko +import Queue +import pickle +from operator import itemgetter +import scapy.all as scapyall +import enum + +class Arista(object): + DEBUG = False + def __init__(self, ip, queue, test_params, login='admin', password='123456'): + self.ip = ip + self.queue = queue + self.login = login + self.password = password + self.conn = None + self.hostname = None + self.v4_routes = [test_params['vlan_ip_range'], test_params['lo_prefix']] + self.v6_routes = [test_params['lo_v6_prefix']] + self.fails = set() + self.info = set() + self.min_bgp_gr_timeout = int(test_params['min_bgp_gr_timeout']) + + def __del__(self): + self.disconnect() + + def connect(self): + self.conn = paramiko.SSHClient() + self.conn.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + self.conn.connect(self.ip, username=self.login, password=self.password, allow_agent=False, look_for_keys=False) + self.shell = self.conn.invoke_shell() + + first_prompt = self.do_cmd(None, prompt = '>') + self.hostname = self.extract_hostname(first_prompt) + + self.do_cmd('enable') + self.do_cmd('terminal length 0') + + return self.shell + + def extract_hostname(self, first_prompt): + lines = first_prompt.split('\n') + prompt = lines[-1] + return prompt.strip().replace('>', '#') + + def do_cmd(self, cmd, prompt = None): + if prompt == None: + prompt = self.hostname + + if cmd is not None: + self.shell.send(cmd + '\n') + + input_buffer = '' + while prompt not in input_buffer: + input_buffer += self.shell.recv(16384) + + return input_buffer + + def disconnect(self): + if self.conn is not None: + self.conn.close() + self.conn = None + + return + + def run(self): + data = {} + debug_data = {} + run_once = False + log_first_line = None + quit_enabled = False + v4_routing_ok = False + v6_routing_ok = False + routing_works = True + self.connect() + + cur_time = time.time() + sample = {} + samples = {} + portchannel_output = self.do_cmd("show interfaces po1 | json") + portchannel_output = "\n".join(portchannel_output.split("\r\n")[1:-1]) + sample["po_changetime"] = json.loads(portchannel_output, strict=False)['interfaces']['Port-Channel1']['lastStatusChangeTimestamp'] + samples[cur_time] = sample + + while not (quit_enabled and v4_routing_ok and v6_routing_ok): + cmd = self.queue.get() + if cmd == 'quit': + quit_enabled = True + continue + cur_time = time.time() + info = {} + debug_info = {} + lacp_output = self.do_cmd('show lacp neighbor') + info['lacp'] = self.parse_lacp(lacp_output) + bgp_neig_output = self.do_cmd('show ip bgp neighbors') + info['bgp_neig'] = self.parse_bgp_neighbor(bgp_neig_output) + + bgp_route_v4_output = self.do_cmd('show ip route bgp | json') + v4_routing_ok = self.parse_bgp_route(bgp_route_v4_output, self.v4_routes) + info['bgp_route_v4'] = v4_routing_ok + + bgp_route_v6_output = self.do_cmd("show ipv6 route bgp | json") + v6_routing_ok = self.parse_bgp_route(bgp_route_v6_output, self.v6_routes) + info["bgp_route_v6"] = v6_routing_ok + + portchannel_output = self.do_cmd("show interfaces po1 | json") + portchannel_output = "\n".join(portchannel_output.split("\r\n")[1:-1]) + sample["po_changetime"] = json.loads(portchannel_output, strict=False)['interfaces']['Port-Channel1']['lastStatusChangeTimestamp'] + + if not run_once: + self.ipv4_gr_enabled, self.ipv6_gr_enabled, self.gr_timeout = self.parse_bgp_neighbor_once(bgp_neig_output) + if self.gr_timeout is not None: + log_first_line = "session_begins_%f" % cur_time + self.do_cmd("send log message %s" % log_first_line) + run_once = True + + data[cur_time] = info + samples[cur_time] = sample + if self.DEBUG: + debug_data[cur_time] = { + 'show lacp neighbor' : lacp_output, + 'show ip bgp neighbors' : bgp_neig_output, + 'show ip route bgp' : bgp_route_v4_output, + 'show ipv6 route bgp' : bgp_route_v6_output, + } + + attempts = 60 + for _ in range(attempts): + log_output = self.do_cmd("show log | begin %s" % log_first_line) + log_lines = log_output.split("\r\n")[1:-1] + log_data = self.parse_logs(log_lines) + if len(log_data) != 0: + break + time.sleep(1) # wait until logs are populated + + if len(log_data) == 0: + log_data['error'] = 'Incomplete output' + + self.disconnect() + + # save data for troubleshooting + with open("/tmp/%s.data.pickle" % self.ip, "w") as fp: + pickle.dump(data, fp) + + # save debug data for troubleshooting + if self.DEBUG: + with open("/tmp/%s.raw.pickle" % self.ip, "w") as fp: + pickle.dump(debug_data, fp) + with open("/tmp/%s.logging" % self.ip, "w") as fp: + fp.write("\n".join(log_lines)) + + self.check_gr_peer_status(data) + cli_data = {} + cli_data['lacp'] = self.check_series_status(data, "lacp", "LACP session") + cli_data['bgp_v4'] = self.check_series_status(data, "bgp_route_v4", "BGP v4 routes") + cli_data['bgp_v6'] = self.check_series_status(data, "bgp_route_v6", "BGP v6 routes") + cli_data['po'] = self.check_change_time(samples, "po_changetime", "PortChannel interface") + + route_timeout = log_data['route_timeout'] + cli_data['route_timeout'] = route_timeout + + # {'10.0.0.38': [(0, '4200065100)')], 'fc00::2d': [(0, '4200065100)')]} + for nei in route_timeout.keys(): + asn = route_timeout[nei][0][-1] + msg = 'BGP route GR timeout: neighbor %s (ASN %s' % (nei, asn) + self.fails.add(msg) + + return self.fails, self.info, cli_data, log_data + + def extract_from_logs(self, regexp, data): + raw_data = [] + result = defaultdict(list) + initial_time = -1 + re_compiled = re.compile(regexp) + for line in data: + m = re_compiled.match(line) + if not m: + continue + raw_data.append((datetime.datetime.strptime(m.group(1), "%b %d %X"), m.group(2), m.group(3))) + + if len(raw_data) > 0: + initial_time = raw_data[0][0] + for when, what, status in raw_data: + offset = (when - initial_time if when > initial_time else initial_time - when).seconds + result[what].append((offset, status)) + + return result, initial_time + + def parse_logs(self, data): + result = {} + bgp_r = r'^(\S+\s+\d+\s+\S+) \S+ Rib: %BGP-5-ADJCHANGE: peer (\S+) .+ (\S+)$' + result_bgp, initial_time_bgp = self.extract_from_logs(bgp_r, data) + if_r = r'^(\S+\s+\d+\s+\S+) \S+ Ebra: %LINEPROTO-5-UPDOWN: Line protocol on Interface (\S+), changed state to (\S+)$' + result_if, initial_time_if = self.extract_from_logs(if_r, data) + + route_r = r'^(\S+\s+\d+\s+\S+) \S+ Rib: %BGP-5-BGP_GRACEFUL_RESTART_TIMEOUT: Deleting stale routes from peer (\S+) .+ (\S+)$' + result_rt, initial_time_rt = self.extract_from_logs(route_r, data) + + result['route_timeout'] = result_rt + + if initial_time_bgp == -1 or initial_time_if == -1: + return result + + for events in result_bgp.values(): + if events[-1][1] != 'Established': + return result + + # first state is Idle, last state is Established + for events in result_bgp.values(): + if len(events) > 1: + assert(events[0][1] != 'Established') + + assert(events[-1][1] == 'Established') + + # first state is down, last state is up + for events in result_if.values(): + assert(events[0][1] == 'down') + assert(events[-1][1] == 'up') + + po_name = [ifname for ifname in result_if.keys() if 'Port-Channel' in ifname][0] + neigh_ipv4 = [neig_ip for neig_ip in result_bgp.keys() if '.' in neig_ip][0] + + result['PortChannel was down (seconds)'] = result_if[po_name][-1][0] - result_if[po_name][0][0] + for if_name in sorted(result_if.keys()): + result['Interface %s was down (times)' % if_name] = map(itemgetter(1), result_if[if_name]).count("down") + + for neig_ip in result_bgp.keys(): + key = "BGP IPv6 was down (seconds)" if ':' in neig_ip else "BGP IPv4 was down (seconds)" + result[key] = result_bgp[neig_ip][-1][0] - result_bgp[neig_ip][0][0] + + for neig_ip in result_bgp.keys(): + key = "BGP IPv6 was down (times)" if ':' in neig_ip else "BGP IPv4 was down (times)" + result[key] = map(itemgetter(1), result_bgp[neig_ip]).count("Idle") + + bgp_po_offset = (initial_time_if - initial_time_bgp if initial_time_if > initial_time_bgp else initial_time_bgp - initial_time_if).seconds + result['PortChannel went down after bgp session was down (seconds)'] = bgp_po_offset + result_if[po_name][0][0] + + for neig_ip in result_bgp.keys(): + key = "BGP IPv6 was gotten up after Po was up (seconds)" if ':' in neig_ip else "BGP IPv4 was gotten up after Po was up (seconds)" + result[key] = result_bgp[neig_ip][-1][0] - bgp_po_offset - result_if[po_name][-1][0] + + return result + + def parse_lacp(self, output): + return output.find('Bundled') != -1 + + def parse_bgp_neighbor_once(self, output): + is_gr_ipv4_enabled = False + is_gr_ipv6_enabled = False + restart_time = None + for line in output.split('\n'): + if ' Restart-time is' in line: + restart_time = int(line.replace(' Restart-time is ', '')) + continue + + if 'is enabled, Forwarding State is' in line: + if 'IPv6' in line: + is_gr_ipv6_enabled = True + elif 'IPv4' in line: + is_gr_ipv4_enabled = True + + return is_gr_ipv4_enabled, is_gr_ipv6_enabled, restart_time + + def parse_bgp_neighbor(self, output): + gr_active = None + gr_timer = None + for line in output.split('\n'): + if 'Restart timer is' in line: + gr_active = 'is active' in line + gr_timer = str(line[-9:-1]) + + return gr_active, gr_timer + + def parse_bgp_route(self, output, expects): + prefixes = set() + data = "\n".join(output.split("\r\n")[1:-1]) + obj = json.loads(data) + + if "vrfs" in obj and "default" in obj["vrfs"]: + obj = obj["vrfs"]["default"] + for prefix, attrs in obj["routes"].items(): + if "routeAction" not in attrs or attrs["routeAction"] != "forward": + continue + if all("Port-Channel" in via["interface"] for via in attrs["vias"]): + prefixes.add(prefix) + + return set(expects) == prefixes + + def check_gr_peer_status(self, output): + # [0] True 'ipv4_gr_enabled', [1] doesn't matter 'ipv6_enabled', [2] should be >= 120 + if not self.ipv4_gr_enabled: + self.fails.add("bgp ipv4 graceful restart is not enabled") + if not self.ipv6_gr_enabled: + pass # ToDo: + if self.gr_timeout < 120: # bgp graceful restart timeout less then 120 seconds + self.fails.add("bgp graceful restart timeout is less then 120 seconds") + + for when, other in sorted(output.items(), key = lambda x : x[0]): + gr_active, timer = other['bgp_neig'] + # wnen it's False, it's ok, wnen it's True, check that inactivity timer not less then self.min_bgp_gr_timeout seconds + if gr_active and datetime.datetime.strptime(timer, '%H:%M:%S') < datetime.datetime(1900, 1, 1, second = self.min_bgp_gr_timeout): + self.fails.add("graceful restart timer is almost finished. Less then %d seconds left" % self.min_bgp_gr_timeout) + + def check_series_status(self, output, entity, what): + # find how long anything was down + # Input parameter is a dictionary when:status + # constraints: + # entity must be down just once + # entity must be up when the test starts + # entity must be up when the test stops + + sorted_keys = sorted(output.keys()) + if not output[sorted_keys[0]][entity]: + self.fails.add("%s must be up when the test starts" % what) + return 0, 0 + if not output[sorted_keys[-1]][entity]: + self.fails.add("%s must be up when the test stops" % what) + return 0, 0 + + start = sorted_keys[0] + cur_state = True + res = defaultdict(list) + for when in sorted_keys[1:]: + if cur_state != output[when][entity]: + res[cur_state].append(when - start) + start = when + cur_state = output[when][entity] + res[cur_state].append(when - start) + + is_down_count = len(res[False]) + + if is_down_count > 1: + self.info.add("%s must be down just for once" % what) + + return is_down_count, sum(res[False]) # summary_downtime + + def check_change_time(self, output, entity, what): + # find last changing time updated, if no update, the entity is never changed + # Input parameter is a dictionary when:last_changing_time + # constraints: + # the dictionary `output` cannot be empty + sorted_keys = sorted(output.keys()) + if not output: + self.fails.add("%s cannot be empty" % what) + return 0, 0 + + start = sorted_keys[0] + prev_time = output[start] + change_count = 0 + for when in sorted_keys[1:]: + if prev_time != output[when][entity]: + prev_time = output[when][entity] + change_count += 1 + + if change_count > 0: + self.info.add("%s state changed %d times" % (what, change_count)) + + # Note: the first item is a placeholder + return 0, change_count + From eea8cb188af1349aefcb51a24dc36397344f8bff Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Thu, 2 May 2019 08:07:14 -0700 Subject: [PATCH 017/218] [warm-reboot] increase warm-reboot sniffing time (#899) Some test setup has delay in the IO path, the original sniff wait time doesn't guarantee all 36000 packet were received. Increasing sniffing wait time by 30 seconds. Signed-off-by: Ying Xie --- ansible/roles/test/files/ptftests/advanced-reboot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index b8be0e89642..c8ad027e3f3 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -821,7 +821,7 @@ def sniff_in_background(self, wait = None): The native scapy.snif() is used as a background thread, to allow delayed start for the send_in_background(). """ if not wait: - wait = self.time_to_listen + 30 + wait = self.time_to_listen + 60 sniffer_start = datetime.datetime.now() self.log("Sniffer started at %s" % str(sniffer_start)) sniff_filter = "tcp and tcp dst port 5000 and tcp src port 1234 and not icmp" From d9f88d33ea952d8edfcd012ce9aa4f994b7f8904 Mon Sep 17 00:00:00 2001 From: Joe LeVeque Date: Wed, 17 Apr 2019 11:29:12 -0700 Subject: [PATCH 018/218] [lag] Increase wait time for LAG to change state to 35 seconds (#871) --- ansible/roles/test/tasks/lag_minlink.yml | 2 +- ansible/roles/test/tasks/single_lag_test.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/test/tasks/lag_minlink.yml b/ansible/roles/test/tasks/lag_minlink.yml index f2c9028a0bd..7b5af1f72c0 100644 --- a/ansible/roles/test/tasks/lag_minlink.yml +++ b/ansible/roles/test/tasks/lag_minlink.yml @@ -45,7 +45,7 @@ connection: switch - pause: - seconds: 20 + seconds: 35 - lag_facts: host={{ inventory_hostname }} diff --git a/ansible/roles/test/tasks/single_lag_test.yml b/ansible/roles/test/tasks/single_lag_test.yml index de8e558abee..852eb5734cb 100644 --- a/ansible/roles/test/tasks/single_lag_test.yml +++ b/ansible/roles/test/tasks/single_lag_test.yml @@ -31,9 +31,9 @@ - name: test fanout interface (physical) flap and lacp keep correct po status follow minimum links requirement include: lag_minlink.yml vars: - wait_down_time: 20 + wait_down_time: 35 -### Now figure out remote VM and interface info for the falpping lag member and run minlink test +### Now figure out remote VM and interface info for the flapping lag member and run minlink test - set_fact: peer_device: "{{vm_neighbors[flap_intf]['name']}}" neighbor_interface: "{{vm_neighbors[flap_intf]['port']}}" From f529eac939a2b9b3e322fe6453f8f34a2d2345d9 Mon Sep 17 00:00:00 2001 From: chitra-raghavan <32665166+chitra-raghavan@users.noreply.github.com> Date: Thu, 9 May 2019 07:03:48 +0530 Subject: [PATCH 019/218] [201811]Modified sensors data for S6100/Z9100 according to latest output (#907) * Modified sensors data for S6100/Z9100 according to latest output --- ansible/group_vars/sonic/sku-sensors-data.yml | 193 +++++++++++++++--- 1 file changed, 160 insertions(+), 33 deletions(-) diff --git a/ansible/group_vars/sonic/sku-sensors-data.yml b/ansible/group_vars/sonic/sku-sensors-data.yml index f888b088bb1..f4cf27bb78d 100644 --- a/ansible/group_vars/sonic/sku-sensors-data.yml +++ b/ansible/group_vars/sonic/sku-sensors-data.yml @@ -49,14 +49,14 @@ sensors_checks: compares: fan: [] power: - - - SMF_S6100_ON-isa-0000/Psu1 Input/power1_input - - SMF_S6100_ON-isa-0000/Psu1 Input/power1_max - - - SMF_S6100_ON-isa-0000/Psu1 Output/power2_input - - SMF_S6100_ON-isa-0000/Psu1 Output/power2_max - - - SMF_S6100_ON-isa-0000/Psu2 Input/power3_input - - SMF_S6100_ON-isa-0000/Psu2 Input/power3_max - - - SMF_S6100_ON-isa-0000/Psu2 Output/power4_input - - SMF_S6100_ON-isa-0000/Psu2 Output/power4_max + - - SMF_S6100_ON-isa-0000/PSU1 Input Power/power1_input + - SMF_S6100_ON-isa-0000/PSU1 Input Power/power1_max + - - SMF_S6100_ON-isa-0000/PSU1 Output Power/power2_input + - SMF_S6100_ON-isa-0000/PSU1 Output Power/power2_max + - - SMF_S6100_ON-isa-0000/PSU2 Input Power/power3_input + - SMF_S6100_ON-isa-0000/PSU2 Input Power/power3_max + - - SMF_S6100_ON-isa-0000/PSU2 Output Power/power4_input + - SMF_S6100_ON-isa-0000/PSU2 Output Power/power4_max temp: - - coretemp-isa-0000/Core 0/temp2_input - coretemp-isa-0000/Core 0/temp2_crit @@ -78,10 +78,10 @@ sensors_checks: - SMF_S6100_ON-isa-0000/Front GE/temp10_crit - - SMF_S6100_ON-isa-0000/Front SFP+/temp11_input - SMF_S6100_ON-isa-0000/Front SFP+/temp11_crit - - - SMF_S6100_ON-isa-0000/PSU 1/temp14_input - - SMF_S6100_ON-isa-0000/PSU 1/temp14_crit - - - SMF_S6100_ON-isa-0000/PSU 2/temp15_input - - SMF_S6100_ON-isa-0000/PSU 2/temp15_crit + - - SMF_S6100_ON-isa-0000/PSU1 Temp/temp14_input + - SMF_S6100_ON-isa-0000/PSU1 Temp/temp14_max + - - SMF_S6100_ON-isa-0000/PSU2 Temp/temp15_input + - SMF_S6100_ON-isa-0000/PSU2 Temp/temp15_max non_zero: fan: - SMF_S6100_ON-isa-0000/Tray1 Fan1/fan1_input @@ -121,10 +121,10 @@ sensors_checks: - SMF_S6100_ON-isa-0000/PSU1 VOUT/in30_input - SMF_S6100_ON-isa-0000/PSU2 VIN/in31_input - SMF_S6100_ON-isa-0000/PSU2 VOUT/in32_input - - SMF_S6100_ON-isa-0000/Psu1 Input/power1_input - - SMF_S6100_ON-isa-0000/Psu1 Output/power2_input - - SMF_S6100_ON-isa-0000/Psu2 Input/power3_input - - SMF_S6100_ON-isa-0000/Psu2 Output/power4_input + - SMF_S6100_ON-isa-0000/PSU1 Input Power/power1_input + - SMF_S6100_ON-isa-0000/PSU1 Output Power/power2_input + - SMF_S6100_ON-isa-0000/PSU2 Input Power/power3_input + - SMF_S6100_ON-isa-0000/PSU2 Output Power/power4_input - SMF_S6100_ON-isa-0000/XP1R0V/curr21_input - SMF_S6100_ON-isa-0000/XP1R0V_ROV/curr22_input temp: @@ -141,12 +141,12 @@ sensors_checks: - SMF_S6100_ON-isa-0000/Front SFP+/temp11_input - SMF_S6100_ON-isa-0000/BCM Internal/temp12_input - SMF_S6100_ON-isa-0000/CPU Internal/temp13_input - - SMF_S6100_ON-isa-0000/PSU 1/temp14_input - - SMF_S6100_ON-isa-0000/PSU 2/temp15_input + - SMF_S6100_ON-isa-0000/PSU1 Temp/temp14_input + - SMF_S6100_ON-isa-0000/PSU2 Temp/temp15_input psu_skips: {} - Force10-Z9100: + Force10-Z9100-C32: alarms: fan: - SMF_Z9100_ON-isa-0000/Tray1 Fan1/fan1_alarm @@ -217,14 +217,14 @@ sensors_checks: - - coretemp-isa-0000/Core 3/temp5_input - coretemp-isa-0000/Core 3/temp5_crit power: - - - SMF_Z9100_ON-isa-0000/Psu1 Input/power1_input - - SMF_Z9100_ON-isa-0000/Psu1 Input/power1_max - - - SMF_Z9100_ON-isa-0000/Psu1 Output/power2_input - - SMF_Z9100_ON-isa-0000/Psu1 Output/power2_max - - - SMF_Z9100_ON-isa-0000/Psu2 Input/power3_input - - SMF_Z9100_ON-isa-0000/Psu2 Input/power3_max - - - SMF_Z9100_ON-isa-0000/Psu2 Output/power4_input - - SMF_Z9100_ON-isa-0000/Psu2 Output/power4_max + - - SMF_Z9100_ON-isa-0000/PSU1 Input Power/power1_input + - SMF_Z9100_ON-isa-0000/PSU1 Input Power/power1_max + - - SMF_Z9100_ON-isa-0000/PSU1 Output Power/power2_input + - SMF_Z9100_ON-isa-0000/PSU1 Output Power/power2_max + - - SMF_Z9100_ON-isa-0000/PSU2 Input Power/power3_input + - SMF_Z9100_ON-isa-0000/PSU2 Input Power/power3_max + - - SMF_Z9100_ON-isa-0000/PSU2 Output Power/power4_input + - SMF_Z9100_ON-isa-0000/PSU2 Output Power/power4_max fan: [] non_zero: fan: @@ -241,10 +241,10 @@ sensors_checks: - SMF_Z9100_ON-isa-0000/Psu1 Fan/fan11_input - SMF_Z9100_ON-isa-0000/Psu2 Fan/fan12_input power: - - SMF_Z9100_ON-isa-0000/Psu1 Input/power1_input - - SMF_Z9100_ON-isa-0000/Psu1 Output/power2_input - - SMF_Z9100_ON-isa-0000/Psu2 Input/power3_input - - SMF_Z9100_ON-isa-0000/Psu2 Output/power4_input + - SMF_Z9100_ON-isa-0000/PSU1 Input Power/power1_input + - SMF_Z9100_ON-isa-0000/PSU1 Output Power/power2_input + - SMF_Z9100_ON-isa-0000/PSU2 Input Power/power3_input + - SMF_Z9100_ON-isa-0000/PSU2 Output Power/power4_input - SMF_Z9100_ON-isa-0000/PSU1 VIN/in29_input - SMF_Z9100_ON-isa-0000/PSU1 VOUT/in30_input - SMF_Z9100_ON-isa-0000/PSU2 VIN/in31_input @@ -262,8 +262,135 @@ sensors_checks: - SMF_Z9100_ON-isa-0000/Front BCM On-Board (U2)/temp4_input - "SMF_Z9100_ON-isa-0000/BCM Switch On-Board #1 (U38)/temp6_input" - SMF_Z9100_ON-isa-0000/Rear (U2900)/temp9_input - - SMF_Z9100_ON-isa-0000/PSU 1/temp14_input - - SMF_Z9100_ON-isa-0000/PSU 2/temp15_input + - SMF_Z9100_ON-isa-0000/PSU1 Temp/temp14_input + - SMF_Z9100_ON-isa-0000/PSU2 Temp/temp15_input + + psu_skips: {} + + Force10-Z9100-C8D48: + alarms: + fan: + - SMF_Z9100_ON-isa-0000/Tray1 Fan1/fan1_alarm + - SMF_Z9100_ON-isa-0000/Tray1 Fan1/fan1_fault + - SMF_Z9100_ON-isa-0000/Tray1 Fan2/fan2_alarm + - SMF_Z9100_ON-isa-0000/Tray1 Fan2/fan2_fault + - SMF_Z9100_ON-isa-0000/Tray2 Fan1/fan3_alarm + - SMF_Z9100_ON-isa-0000/Tray2 Fan1/fan3_fault + - SMF_Z9100_ON-isa-0000/Tray2 Fan2/fan4_alarm + - SMF_Z9100_ON-isa-0000/Tray2 Fan2/fan4_fault + - SMF_Z9100_ON-isa-0000/Tray3 Fan1/fan5_alarm + - SMF_Z9100_ON-isa-0000/Tray3 Fan1/fan5_fault + - SMF_Z9100_ON-isa-0000/Tray3 Fan2/fan6_alarm + - SMF_Z9100_ON-isa-0000/Tray3 Fan2/fan6_fault + - SMF_Z9100_ON-isa-0000/Tray4 Fan1/fan7_alarm + - SMF_Z9100_ON-isa-0000/Tray4 Fan1/fan7_fault + - SMF_Z9100_ON-isa-0000/Tray4 Fan2/fan8_alarm + - SMF_Z9100_ON-isa-0000/Tray4 Fan2/fan8_fault + - SMF_Z9100_ON-isa-0000/Tray5 Fan1/fan9_alarm + - SMF_Z9100_ON-isa-0000/Tray5 Fan1/fan9_fault + - SMF_Z9100_ON-isa-0000/Tray5 Fan2/fan10_alarm + - SMF_Z9100_ON-isa-0000/Tray5 Fan2/fan10_fault + - SMF_Z9100_ON-isa-0000/Psu1 Fan/fan11_alarm + - SMF_Z9100_ON-isa-0000/Psu1 Fan/fan11_fault + - SMF_Z9100_ON-isa-0000/Psu2 Fan/fan12_alarm + - SMF_Z9100_ON-isa-0000/Psu2 Fan/fan12_fault + temp: + - coretemp-isa-0000/Core 0/temp2_crit_alarm + - coretemp-isa-0000/Core 1/temp3_crit_alarm + - coretemp-isa-0000/Core 2/temp4_crit_alarm + - coretemp-isa-0000/Core 3/temp5_crit_alarm + power: + - SMF_Z9100_ON-isa-0000/CPU XP3R3V_EARLY/in1_alarm + - SMF_Z9100_ON-isa-0000/CPU XP5R0V_CP/in2_alarm + - SMF_Z9100_ON-isa-0000/CPU XP3R3V_STD/in3_alarm + - SMF_Z9100_ON-isa-0000/CPU XP3R3V_CP /in4_alarm + - SMF_Z9100_ON-isa-0000/CPU XP3R3V_STD/in3_alarm + - SMF_Z9100_ON-isa-0000/CPU XP3R3V_CP /in4_alarm + - SMF_Z9100_ON-isa-0000/CPU XP0R75V_VTT_A/in5_alarm + - SMF_Z9100_ON-isa-0000/CPU XP0R75V_VTT_B/in6_alarm + - SMF_Z9100_ON-isa-0000/CPU XP1R07V_CPU/in7_alarm + - SMF_Z9100_ON-isa-0000/CPU XP1R0V_CPU/in8_alarm + - SMF_Z9100_ON-isa-0000/CPU XP12R0V/in9_alarm + - SMF_Z9100_ON-isa-0000/CPU VDDR_CPU_2/in10_alarm + - SMF_Z9100_ON-isa-0000/CPU VDDR_CPU_1/in11_alarm + - SMF_Z9100_ON-isa-0000/CPU XP1R5V_CLK/in12_alarm + - SMF_Z9100_ON-isa-0000/CPU XP1R35V_CPU/in13_alarm + - SMF_Z9100_ON-isa-0000/CPU XP1R8V_CPU/in14_alarm + - SMF_Z9100_ON-isa-0000/CPU XP1R0V_CPU_VNN/in15_alarm + - SMF_Z9100_ON-isa-0000/CPU XP1R0V_CPU_VCC/in16_alarm + - SMF_Z9100_ON-isa-0000/CPU XP1R5V_EARLY/in17_alarm + - SMF_Z9100_ON-isa-0000/SW XP3R3V_MON/in19_alarm + - SMF_Z9100_ON-isa-0000/SW XP1R8V_MON/in20_alarm + - SMF_Z9100_ON-isa-0000/SW XP1R25V_MON/in21_alarm + - SMF_Z9100_ON-isa-0000/SW XP1R2V_MON/in22_alarm + - SMF_Z9100_ON-isa-0000/SW XP1R0V_SW_MON/in23_alarm + - SMF_Z9100_ON-isa-0000/SW XP1R0V_ROV_SW_MON/in24_alarm + - SMF_Z9100_ON-isa-0000/SW XP5V_MB_MON/in25_alarm + - SMF_Z9100_ON-isa-0000/SW XP1R8V_FPGA_MON/in26_alarm + - SMF_Z9100_ON-isa-0000/SW XP3R3V_FPGA_MON/in27_alarm + - SMF_Z9100_ON-isa-0000/SW XP3R3V_EARLY_MON/in28_alarm + + compares: + temp: + - - coretemp-isa-0000/Core 0/temp2_input + - coretemp-isa-0000/Core 0/temp2_crit + - - coretemp-isa-0000/Core 1/temp3_input + - coretemp-isa-0000/Core 1/temp3_crit + - - coretemp-isa-0000/Core 2/temp4_input + - coretemp-isa-0000/Core 2/temp4_crit + - - coretemp-isa-0000/Core 3/temp5_input + - coretemp-isa-0000/Core 3/temp5_crit + power: + - - SMF_Z9100_ON-isa-0000/PSU1 Input Power/power1_input + - SMF_Z9100_ON-isa-0000/PSU1 Input Power/power1_max + - - SMF_Z9100_ON-isa-0000/PSU1 Output Power/power2_input + - SMF_Z9100_ON-isa-0000/PSU1 Output Power/power2_max + - - SMF_Z9100_ON-isa-0000/PSU2 Input Power/power3_input + - SMF_Z9100_ON-isa-0000/PSU2 Input Power/power3_max + - - SMF_Z9100_ON-isa-0000/PSU2 Output Power/power4_input + - SMF_Z9100_ON-isa-0000/PSU2 Output Power/power4_max + fan: [] + non_zero: + fan: + - SMF_Z9100_ON-isa-0000/Tray1 Fan1/fan1_input + - SMF_Z9100_ON-isa-0000/Tray1 Fan2/fan2_input + - SMF_Z9100_ON-isa-0000/Tray2 Fan1/fan3_input + - SMF_Z9100_ON-isa-0000/Tray2 Fan2/fan4_input + - SMF_Z9100_ON-isa-0000/Tray3 Fan1/fan5_input + - SMF_Z9100_ON-isa-0000/Tray3 Fan2/fan6_input + - SMF_Z9100_ON-isa-0000/Tray4 Fan1/fan7_input + - SMF_Z9100_ON-isa-0000/Tray4 Fan2/fan8_input + - SMF_Z9100_ON-isa-0000/Tray5 Fan1/fan9_input + - SMF_Z9100_ON-isa-0000/Tray5 Fan2/fan10_input + - SMF_Z9100_ON-isa-0000/Psu1 Fan/fan11_input + - SMF_Z9100_ON-isa-0000/Psu2 Fan/fan12_input + power: + - SMF_Z9100_ON-isa-0000/PSU1 Input Power/power1_input + - SMF_Z9100_ON-isa-0000/PSU1 Output Power/power2_input + - SMF_Z9100_ON-isa-0000/PSU2 Input Power/power3_input + - SMF_Z9100_ON-isa-0000/PSU1 Output Power/power2_input + - SMF_Z9100_ON-isa-0000/PSU2 Input Power/power3_input + - SMF_Z9100_ON-isa-0000/PSU2 Output Power/power4_input + - SMF_Z9100_ON-isa-0000/PSU1 VIN/in29_input + - SMF_Z9100_ON-isa-0000/PSU1 VOUT/in30_input + - SMF_Z9100_ON-isa-0000/PSU2 VIN/in31_input + - SMF_Z9100_ON-isa-0000/PSU2 VOUT/in32_input + - SMF_Z9100_ON-isa-0000/XP1R0V/curr21_input + - SMF_Z9100_ON-isa-0000/XP1R0V_ROV/curr22_input + + temp: + - coretemp-isa-0000/Core 0/temp2_input + - coretemp-isa-0000/Core 1/temp3_input + - coretemp-isa-0000/Core 2/temp4_input + - coretemp-isa-0000/Core 3/temp5_input + - SMF_Z9100_ON-isa-0000/CPU On-board (U2900)/temp1_input + - "SMF_Z9100_ON-isa-0000/BCM Switch On-Board #1 (U44)/temp2_input" + - SMF_Z9100_ON-isa-0000/Front BCM On-Board (U4)/temp3_input + - SMF_Z9100_ON-isa-0000/Front BCM On-Board (U2)/temp4_input + - "SMF_Z9100_ON-isa-0000/BCM Switch On-Board #1 (U38)/temp6_input" + - SMF_Z9100_ON-isa-0000/Rear (U2900)/temp9_input + - SMF_Z9100_ON-isa-0000/PSU1 Temp/temp14_input + - SMF_Z9100_ON-isa-0000/PSU2 Temp/temp15_input psu_skips: {} From 84964a7dc2fb7a8519428c0b8eaac8ff2cee2761 Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Fri, 10 May 2019 00:17:54 +0300 Subject: [PATCH 020/218] [advanced-reboot] Improve error reporting in ansible log (#903) * [advanced-reboot] start watcher thread after initializing Event objects Signed-off-by: Stepan Blyschak * [advanced-reboot] improve error messages when DUT is not ready for test Signed-off-by: Stepan Blyschak --- .../test/files/ptftests/advanced-reboot.py | 117 +++++++++--------- 1 file changed, 58 insertions(+), 59 deletions(-) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index c8ad027e3f3..2627829565a 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -160,6 +160,9 @@ def __init__(self): self.send_interval = 0.0035 self.packets_to_send = min(int(self.time_to_listen / (self.send_interval + 0.0015)), 45000) # How many packets to be sent in send_in_background method + # Thread pool for background watching operations + self.pool = ThreadPool(processes=3) + # State watcher attributes self.watching = False self.cpu_state = StateMachine('init') @@ -239,22 +242,15 @@ def log(self, message, verbose=False): print "%s : %s" % (current_time, message) self.log_fp.write("%s : %s\n" % (current_time, message)) - def timeout(self, seconds, message): - def timeout_exception(self, message): - self.log('Timeout is reached: %s' % message) - self.tearDown() - os.kill(os.getpid(), signal.SIGINT) - - if self.timeout_thr is None: - self.timeout_thr = threading.Timer(seconds, timeout_exception, args=(self, message)) - self.timeout_thr.start() - else: - raise Exception("Timeout already set") - - def cancel_timeout(self): - if self.timeout_thr is not None: - self.timeout_thr.cancel() - self.timeout_thr = None + def timeout(self, func, seconds, message): + async_res = self.pool.apply_async(func) + try: + res = async_res.get(timeout=seconds) + except Exception as err: + # TimeoutError and Exception's from func + # captured here + raise type(err)(message) + return res def generate_vlan_servers(self): vlan_host_map = defaultdict(dict) @@ -535,30 +531,27 @@ def runTest(self): try: self.fails['dut'] = set() - pool = ThreadPool(processes=3) self.log("Starting reachability state watch thread...") self.watching = True self.light_probe = False - watcher = pool.apply_async(self.reachability_watcher) self.watcher_is_stopped = threading.Event() # Waiter Event for the Watcher state is stopped. self.watcher_is_running = threading.Event() # Waiter Event for the Watcher state is running. self.watcher_is_stopped.set() # By default the Watcher is not running. self.watcher_is_running.clear() # By default its required to wait for the Watcher started. # Give watch thread some time to wind up + watcher = self.pool.apply_async(self.reachability_watcher) time.sleep(5) self.log("Check that device is alive and pinging") - self.fails['dut'].add('DUT is not ready for test') - self.assertTrue(self.wait_dut_to_warm_up(), 'DUT is not stable') + self.fails['dut'].add("DUT is not ready for test") + self.wait_dut_to_warm_up() self.fails['dut'].clear() self.log("Schedule to reboot the remote switch in %s sec" % self.reboot_delay) thr.start() self.log("Wait until Control plane is down") - self.timeout(self.task_timeout, "DUT hasn't shutdown in %d seconds" % self.task_timeout) - self.wait_until_cpu_port_down() - self.cancel_timeout() + self.timeout(self.wait_until_cpu_port_down, self.task_timeout, "DUT hasn't shutdown in {} seconds".format(self.task_timeout)) if self.reboot_type == 'fast-reboot': self.light_probe = True @@ -568,15 +561,15 @@ def runTest(self): if self.reboot_type == 'fast-reboot': self.log("Check that device is still forwarding data plane traffic") - self.fails['dut'].add('Data plane has a forwarding problem') - self.assertTrue(self.check_alive(), 'DUT is not stable') + self.fails['dut'].add("Data plane has a forwarding problem after CPU went down") + self.check_alive() self.fails['dut'].clear() self.log("Wait until control plane up") - async_cpu_up = pool.apply_async(self.wait_until_cpu_port_up) + async_cpu_up = self.pool.apply_async(self.wait_until_cpu_port_up) self.log("Wait until data plane stops") - async_forward_stop = pool.apply_async(self.check_forwarding_stop) + async_forward_stop = self.pool.apply_async(self.check_forwarding_stop) try: async_cpu_up.get(timeout=self.task_timeout) @@ -593,9 +586,9 @@ def runTest(self): no_routing_start = datetime.datetime.min if no_routing_start is not None: - self.timeout(self.task_timeout, "DUT hasn't started to work for %d seconds" % self.task_timeout) - no_routing_stop, _ = self.check_forwarding_resume() - self.cancel_timeout() + no_routing_stop, _ = self.timeout(self.check_forwarding_resume, + self.task_timeout, + "DUT hasn't started to work for %d seconds" % self.task_timeout) else: no_routing_stop = datetime.datetime.min @@ -631,15 +624,16 @@ def runTest(self): for _, q in self.ssh_jobs: q.put('quit') - self.timeout(self.task_timeout, "SSH threads haven't finished for %d seconds" % self.task_timeout) - while any(thr.is_alive() for thr, _ in self.ssh_jobs): - for _, q in self.ssh_jobs: - q.put('go') - time.sleep(self.TIMEOUT) + def wait_for_ssh_threads(): + while any(thr.is_alive() for thr, _ in self.ssh_jobs): + for _, q in self.ssh_jobs: + q.put('go') + time.sleep(self.TIMEOUT) - for thr, _ in self.ssh_jobs: - thr.join() - self.cancel_timeout() + for thr, _ in self.ssh_jobs: + thr.join() + + self.timeout(wait_for_ssh_threads, self.task_timeout, "SSH threads haven't finished for %d seconds" % self.task_timeout) self.log("Data plane works again. Start time: %s" % str(no_routing_stop)) self.log("") @@ -654,7 +648,8 @@ def runTest(self): self.fails['dut'].add("%s cycle must be less than graceful limit %s seconds" % (self.reboot_type, self.test_params['graceful_limit'])) if self.reboot_type == 'fast-reboot' and no_cp_replies < 0.95 * self.nr_vl_pkts: self.fails['dut'].add("Dataplane didn't route to all servers, when control-plane was down: %d vs %d" % (no_cp_replies, self.nr_vl_pkts)) - + except Exception as e: + self.fails['dut'].add(e) finally: # Stop watching DUT self.watching = False @@ -1010,6 +1005,8 @@ def wait_dut_to_warm_up(self): # up towards PTF docker. In practice, I've seen this warm up taking # up to ~70 seconds. + fail = None + dut_stabilize_secs = int(self.test_params['dut_stabilize_secs']) warm_up_timeout_secs = int(self.test_params['warm_up_timeout_secs']) @@ -1023,8 +1020,7 @@ def wait_dut_to_warm_up(self): if dataplane == 'up' and ctrlplane == 'up' and elapsed > dut_stabilize_secs: break; if elapsed > warm_up_timeout_secs: - # Control plane didn't come up within warm up timeout - return False + raise Exception("Control plane didn't come up within warm up timeout") time.sleep(1) # check until flooding is over. Flooding happens when FDB entry of @@ -1036,26 +1032,28 @@ def wait_dut_to_warm_up(self): if not self.asic_state.is_flooding() and elapsed > dut_stabilize_secs: break if elapsed > warm_up_timeout_secs: - # Control plane didn't stop flooding within warm up timeout - return False + raise Exception("Data plane didn't stop flooding within warm up timeout") time.sleep(1) dataplane = self.asic_state.get() ctrlplane = self.cpu_state.get() - if not dataplane == 'up' or not ctrlplane == 'up': - # Either control or data plane went down while we were waiting - # for the flooding to stop. - return False + if not dataplane == 'up': + fail = "Data plane" + elif not ctrlplane == 'up': + fail = "Control plane" - if (self.asic_state.get_state_time('up') > uptime or - self.cpu_state.get_state_time('up') > uptime): - # Either control plane or data plane flapped while we were - # waiting for the warm up. - return False + if fail is not None: + raise Exception("{} went down while waiting for flooding to stop".format(fail)) - # Everything is good - return True + if self.asic_state.get_state_time('up') > uptime: + fail = "Data plane" + elif self.cpu_state.get_state_time('up') > uptime: + fail = "Control plane" + + if fail is not None: + raise Exception("{} flapped while waiting for the warm up".format(fail)) + # Everything is good def check_alive(self): # This function checks that DUT routes the packets in the both directions. @@ -1077,16 +1075,17 @@ def check_alive(self): uptime = self.asic_state.get_state_time(state) else: if uptime: - return False # Stopped working after it working for sometime? + raise Exception("Data plane stopped working") time.sleep(2) # wait, until FDB entries are populated for _ in range(self.nr_tests * 10): # wait for some time - if not self.asic_state.is_flooding(): - return True - time.sleep(2) + if self.asic_state.is_flooding(): + time.sleep(2) + else: + break - return False # we still see extra replies + raise Exception("DUT is flooding") def get_asic_vlan_reachability(self): From d53846e756c43fc31fafdb79047f918964fd289b Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Fri, 10 May 2019 00:18:10 +0300 Subject: [PATCH 021/218] [continuous-reboot] wait for warmboo-finalizer to finish if warm-reboot (#904) Signed-off-by: Stepan Blyschak --- ansible/roles/test/tasks/common_tasks/reboot_sonic.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ansible/roles/test/tasks/common_tasks/reboot_sonic.yml b/ansible/roles/test/tasks/common_tasks/reboot_sonic.yml index 6440da71ad6..5c844a68723 100644 --- a/ansible/roles/test/tasks/common_tasks/reboot_sonic.yml +++ b/ansible/roles/test/tasks/common_tasks/reboot_sonic.yml @@ -59,3 +59,13 @@ - name: wait for 2 minute for prcesses and interfaces to be stable pause: seconds=120 + +- name: Wait for warmboot-finalizer service to finish + become: true + ignore_errors: true + shell: systemctl is-active warmboot-finalizer.service + register: status + until: status.stdout.find('inactive') != -1 + delay: 10 + retries: 30 + when: reboot_type == 'warm-reboot' From c36511a181ddb608b7e74491e34059190782660f Mon Sep 17 00:00:00 2001 From: Qi Luo Date: Sat, 11 May 2019 16:09:00 -0700 Subject: [PATCH 022/218] Workaround python2 bug on strptime with threading (#911) ``` Exception in thread Thread-2: Traceback (most recent call last): File "/usr/lib/python2.7/threading.py", line 810, in __bootstrap_inner self.run() File "/usr/lib/python2.7/threading.py", line 763, in run self.__target(*self.__args, **self.__kwargs) File "ptftests/advanced-reboot.py", line 769, in peer_state_check self.fails[ip], self.info[ip], self.cli_info[ip], self.logs_info[ip] = ssh.run() File "ptftests/arista.py", line 157, in run log_data = self.parse_logs(log_lines) File "ptftests/arista.py", line 218, in parse_logs result_bgp, initial_time_bgp = self.extract_from_logs(bgp_r, data) File "ptftests/arista.py", line 205, in extract_from_logs raw_data.append((datetime.datetime.strptime(m.group(1), "%b %d %X"), m.group(2), m.group(3))) AttributeError: 'module' object has no attribute '_strptime' ``` --- ansible/roles/test/files/ptftests/arista.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/test/files/ptftests/arista.py b/ansible/roles/test/files/ptftests/arista.py index e77b69a3874..d26de63068b 100644 --- a/ansible/roles/test/files/ptftests/arista.py +++ b/ansible/roles/test/files/ptftests/arista.py @@ -5,6 +5,7 @@ from ptf.testutils import * from ptf.dataplane import match_exp_pkt import datetime +import _strptime # workaround python bug ref: https://stackoverflow.com/a/22476843/2514803 import time import subprocess from ptf.mask import Mask From 0d7b5171ea084582770fcadc0d2dc47068417146 Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Wed, 15 May 2019 02:18:37 +0300 Subject: [PATCH 023/218] [advanced-reboot] fix fast-reboot failure (DUT is flooding always) (#912) Signed-off-by: Stepan Blyschak --- ansible/roles/test/files/ptftests/advanced-reboot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 2627829565a..225aa0bba4f 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -1084,8 +1084,8 @@ def check_alive(self): time.sleep(2) else: break - - raise Exception("DUT is flooding") + else: + raise Exception("DUT is flooding") def get_asic_vlan_reachability(self): From 6892fd1dee943d04fe2bd8ddd6a5ceae7cbc4602 Mon Sep 17 00:00:00 2001 From: chitra-raghavan <32665166+chitra-raghavan@users.noreply.github.com> Date: Mon, 20 May 2019 21:58:00 +0530 Subject: [PATCH 024/218] updated S6100 sensors data (#922) --- ansible/group_vars/sonic/sku-sensors-data.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/ansible/group_vars/sonic/sku-sensors-data.yml b/ansible/group_vars/sonic/sku-sensors-data.yml index f4cf27bb78d..5f32c2d7b47 100644 --- a/ansible/group_vars/sonic/sku-sensors-data.yml +++ b/ansible/group_vars/sonic/sku-sensors-data.yml @@ -139,8 +139,6 @@ sensors_checks: - SMF_S6100_ON-isa-0000/U2 Switch board?/temp9_input - SMF_S6100_ON-isa-0000/Front GE/temp10_input - SMF_S6100_ON-isa-0000/Front SFP+/temp11_input - - SMF_S6100_ON-isa-0000/BCM Internal/temp12_input - - SMF_S6100_ON-isa-0000/CPU Internal/temp13_input - SMF_S6100_ON-isa-0000/PSU1 Temp/temp14_input - SMF_S6100_ON-isa-0000/PSU2 Temp/temp15_input From 52553327cb06fc3ba03987d32049d1449b9979e8 Mon Sep 17 00:00:00 2001 From: Joe LeVeque Date: Wed, 15 May 2019 18:34:40 -0700 Subject: [PATCH 025/218] [deploy_pfc_pktgen] Ensure destination directory exists before copying pfc_gen_file (#913) --- .../functional_test/deploy_pfc_pktgen.yml | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/deploy_pfc_pktgen.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/deploy_pfc_pktgen.yml index dc592a91a4a..3ef4c0abf7c 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/deploy_pfc_pktgen.yml +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/deploy_pfc_pktgen.yml @@ -1,11 +1,22 @@ -- name: Create pfc generater file in case it doesn't exist. - file: path=/mnt/flash/{{pfc_gen_file}} state=touch - delegate_to: "{{peer_mgmt}}" - become: true - when: peer_hwsku | search("Arista") or peer_hwsku | search("arista") +- block: + - name: Ensure destination directory exists on fanout + file: + path: "/mnt/flash/" + state: directory + delegate_to: "{{peer_mgmt}}" + become: true + + - name: Create pfc generator file in case it doesn't exist. + file: + path: "/mnt/flash/{{pfc_gen_file}}" + state: touch + delegate_to: "{{peer_mgmt}}" + become: true -- name: Deploy PFC generator to the fanout switch - copy: src=roles/test/files/helpers/{{pfc_gen_file}} dest=/mnt/flash - delegate_to: "{{peer_mgmt}}" - become: true + - name: Deploy PFC generator to the fanout switch + copy: + src: "roles/test/files/helpers/{{pfc_gen_file}}" + dest: "/mnt/flash" + delegate_to: "{{peer_mgmt}}" + become: true when: peer_hwsku | search("Arista") or peer_hwsku | search("arista") From cc9aa3efbfbe6369251cbee54671ccc6baff88a0 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Fri, 17 May 2019 09:29:27 +0800 Subject: [PATCH 026/218] [deploy] Wait for vEOS to come back after restart (#906) * [deploy] Wait for vEOS to come back after restart * [deploy] Replace handlers with tasks to ensure execution sequence * [deploy] Replace ping cmd with wait_for module * [deploy] Remove unused handlers --- ansible/roles/eos/handlers/main.yml | 3 --- ansible/roles/eos/tasks/main.yml | 22 +++++++++++++++++++--- 2 files changed, 19 insertions(+), 6 deletions(-) delete mode 100644 ansible/roles/eos/handlers/main.yml diff --git a/ansible/roles/eos/handlers/main.yml b/ansible/roles/eos/handlers/main.yml deleted file mode 100644 index ddd945ccf1b..00000000000 --- a/ansible/roles/eos/handlers/main.yml +++ /dev/null @@ -1,3 +0,0 @@ -- name: Restart the box - command: /sbin/shutdown -r now "Ansible updates triggered" - diff --git a/ansible/roles/eos/tasks/main.yml b/ansible/roles/eos/tasks/main.yml index 09cb3a56542..b3e432749c9 100644 --- a/ansible/roles/eos/tasks/main.yml +++ b/ansible/roles/eos/tasks/main.yml @@ -50,6 +50,22 @@ - name: build a startup config template: src="{{ topo }}-{{ props.swrole }}.j2" dest=/mnt/flash/startup-config - when: configuration is defined - notify: - - Restart the box + when: hostname in configuration + +- name: Restart the box + command: /sbin/shutdown -r now "Ansible updates triggered" + when: hostname in configuration + +- name: Pause for reboot + pause: seconds=30 + when: hostname in configuration + +- name: Wait for VM to come up + wait_for: + host: "{{ ansible_ssh_host }}" + port: 22 + state: started + delay: 10 + timeout: 600 + connection: local + when: hostname in configuration From 7c4b00c16d0dc963ac2788a3a805f1f2da9921ad Mon Sep 17 00:00:00 2001 From: Andriy Moroz Date: Sun, 26 May 2019 16:06:33 +0300 Subject: [PATCH 027/218] Improved error handling when not all Interfaces are up (#853) * Improved error handling when not all Interfaces are up * Fixed PR 853. --- .../test/tasks/check_fanout_interfaces.yml | 18 ++++++ .../test/tasks/check_sw_vm_interfaces.yml | 57 +++++++++++++++++++ ansible/roles/test/tasks/interface.yml | 24 ++++++-- .../templates/show_int_portchannel_status.j2 | 1 + ansible/testbed_vm_status.yml | 7 +++ 5 files changed, 102 insertions(+), 5 deletions(-) create mode 100644 ansible/roles/test/tasks/check_fanout_interfaces.yml create mode 100644 ansible/roles/test/tasks/check_sw_vm_interfaces.yml create mode 100644 ansible/roles/vm_set/templates/show_int_portchannel_status.j2 create mode 100755 ansible/testbed_vm_status.yml diff --git a/ansible/roles/test/tasks/check_fanout_interfaces.yml b/ansible/roles/test/tasks/check_fanout_interfaces.yml new file mode 100644 index 00000000000..4a4d4e2f43f --- /dev/null +++ b/ansible/roles/test/tasks/check_fanout_interfaces.yml @@ -0,0 +1,18 @@ +- block: + - name: Gathering lab graph facts about the device + conn_graph_facts: host={{ inventory_hostname }} + connection: local + + - name: Fanout hostname + set_fact: fanout_switch={{ device_conn['Ethernet0']['peerdevice'] }} + + - name: Check Fanout interfaces + local_action: shell ansible-playbook -i lab fanout.yml -l {{ fanout_switch }} --tags check_interfaces_status + ignore_errors: yes + register: fanout_interfaces_status + + - name: Debug Fanout interfaces + debug: msg={{ fanout_interfaces_status }} + when: fanout_interfaces_status is defined + + when: check_fanout is defined diff --git a/ansible/roles/test/tasks/check_sw_vm_interfaces.yml b/ansible/roles/test/tasks/check_sw_vm_interfaces.yml new file mode 100644 index 00000000000..9a09d4968e4 --- /dev/null +++ b/ansible/roles/test/tasks/check_sw_vm_interfaces.yml @@ -0,0 +1,57 @@ +- block: + - name: Get Portchannel status + shell: show interfaces portchannel + register: portchannel_status + ignore_errors: yes + + - name: Get teamd dump + shell: teamdctl '{{ item }}' state dump + with_items: "{{ minigraph_portchannels }}" + ignore_errors: yes + register: teamd_dump + when: + - minigraph_portchannels is defined + + - name: Debug teamd dump + debug: msg={{ teamd_dump }} + when: teamd_dump is defined + + - name: Define testbed_name when not obtained + set_fact: + testbed_name: "{{ inventory_hostname + '-' + topo }}" + when: testbed_name is not defined + + - name: Gathering testbed information + test_facts: testbed_name="{{ testbed_name }}" + connection: local + ignore_errors: yes + + - name: Gather vm list from Testbed server + local_action: shell ansible-playbook testbed_vm_status.yml -i veos -l "{{ testbed_facts['server'] }}" + ignore_errors: yes + register: testbed_vm_list + + - name: Debug VM list on Testbed + debug: msg={{ testbed_vm_list }} + when: testbed_vm_list is defined + + - set_fact: + vms: "{{ minigraph_devices }}" + peer_hwsku: 'Arista-VM' + + - name: Gather Port-Channel status from VMs + action: apswitch template=roles/vm_set/templates/show_int_portchannel_status.j2 + args: + host: "{{ vms[item]['mgmt_addr'] }}" + login: "{{ switch_login[hwsku_map[peer_hwsku]] }}" + connection: switch + ignore_errors: yes + when: vms["{{ item }}"]['hwsku'] == 'Arista-VM' + with_items: vms + register: vm_portchannel_status + + - name: Debug Port-Channel on VMs + debug: msg={{ vm_portchannel_status }} + when: vm_portchannel_status is defined + + when: check_vms is defined diff --git a/ansible/roles/test/tasks/interface.yml b/ansible/roles/test/tasks/interface.yml index bd6f8dc639f..90fb9134a77 100644 --- a/ansible/roles/test/tasks/interface.yml +++ b/ansible/roles/test/tasks/interface.yml @@ -32,12 +32,26 @@ - debug: msg="Found link down ports {{ansible_interface_link_down_ports}}" when: ansible_interface_link_down_ports | length > 0 -- name: Verify interfaces are up correctly - assert: { that: "{{ ansible_interface_link_down_ports | length }} == 0" } +- block: + - name: Verify interfaces are up correctly + assert: { that: "{{ ansible_interface_link_down_ports | length }} == 0" } + rescue: + - include: check_fanout_interfaces.yml + vars: + check_fanout: true + - debug: msg="Not all Interfaces are up" -- name: Verify port channel interfaces are up correctly - assert: { that: "'{{ ansible_interface_facts[item]['active'] }}' == 'True'" } - with_items: "{{ minigraph_portchannels.keys() }}" +- block: + - name: Verify port channel interfaces are up correctly + assert: { that: "'{{ ansible_interface_facts[item]['active'] }}' == 'True'" } + with_items: "{{ minigraph_portchannels.keys() }}" + + rescue: + - include: check_sw_vm_interfaces.yml + vars: + check_vms: true + - debug: msg="Not all PortChannels are up '{{ portchannel_status['stdout_lines'] }}' " + when: portchannel_status is defined - name: Verify VLAN interfaces are up correctly assert: { that: "'{{ ansible_interface_facts[item]['active'] }}' == 'True'" } diff --git a/ansible/roles/vm_set/templates/show_int_portchannel_status.j2 b/ansible/roles/vm_set/templates/show_int_portchannel_status.j2 new file mode 100644 index 00000000000..4a8f3b3bae3 --- /dev/null +++ b/ansible/roles/vm_set/templates/show_int_portchannel_status.j2 @@ -0,0 +1 @@ +show interfaces Port-Channel 1-$ status \ No newline at end of file diff --git a/ansible/testbed_vm_status.yml b/ansible/testbed_vm_status.yml new file mode 100755 index 00000000000..d809c6ac3ae --- /dev/null +++ b/ansible/testbed_vm_status.yml @@ -0,0 +1,7 @@ +- hosts: servers:&vm_host + tasks: + - name: Get VM statuses from Testbed server + shell: virsh list + register: virsh_list + - name: Show VM statuses + debug: msg="{{ virsh_list['stdout_lines'] }}" \ No newline at end of file From 1ccb7ea3310ee5f5082f9cd141a251f247e361ae Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Wed, 22 May 2019 11:09:11 +0300 Subject: [PATCH 028/218] [mlnx] upstream pfc storm dockers and playbooks to check/deploy on fanout (#909) Signed-off-by: Stepan Blyschak --- ansible/roles/fanout/tasks/main.yml | 14 +++++ .../fanout/tasks/mlnx/check_pfcwd_fanout.yml | 22 ++++++++ .../fanout/tasks/mlnx/deploy_pfcwd_fanout.yml | 54 +++++++++++++++++++ .../files/mlnx/docker-tests-pfcgen/Dockerfile | 5 ++ .../files/mlnx/docker-tests-pfcgen/Makefile | 9 ++++ .../files/mlnx/docker-tests-pfcgen/start.sh | 3 ++ .../mlnx/docker-tests-saveargs/Dockerfile | 3 ++ .../files/mlnx/docker-tests-saveargs/Makefile | 7 +++ .../mlnx/docker-tests-saveargs/save_args.sh | 3 ++ 9 files changed, 120 insertions(+) create mode 100644 ansible/roles/fanout/tasks/mlnx/check_pfcwd_fanout.yml create mode 100644 ansible/roles/fanout/tasks/mlnx/deploy_pfcwd_fanout.yml create mode 100644 ansible/roles/test/files/mlnx/docker-tests-pfcgen/Dockerfile create mode 100644 ansible/roles/test/files/mlnx/docker-tests-pfcgen/Makefile create mode 100755 ansible/roles/test/files/mlnx/docker-tests-pfcgen/start.sh create mode 100644 ansible/roles/test/files/mlnx/docker-tests-saveargs/Dockerfile create mode 100644 ansible/roles/test/files/mlnx/docker-tests-saveargs/Makefile create mode 100755 ansible/roles/test/files/mlnx/docker-tests-saveargs/save_args.sh diff --git a/ansible/roles/fanout/tasks/main.yml b/ansible/roles/fanout/tasks/main.yml index 610a20fd538..1eae1f3f8e0 100644 --- a/ansible/roles/fanout/tasks/main.yml +++ b/ansible/roles/fanout/tasks/main.yml @@ -30,3 +30,17 @@ - include: rootfanout_connect.yml deploy_leaf=true when: sw_type == 'FanoutLeaf' + + ################################################################### + # build, deploy and start docker images for the PFC WD test # + ################################################################### +- include: mlnx/deploy_pfcwd_fanout.yml + when: peer_hwsku == "MLNX-OS" + tags: deploy,pfcwd_config + + ################################################################### + # check and recover docker images for the PFC WD test # + ################################################################### +- include: mlnx/check_pfcwd_fanout.yml + when: peer_hwsku == "MLNX-OS" + tags: check_pfcwd_config diff --git a/ansible/roles/fanout/tasks/mlnx/check_pfcwd_fanout.yml b/ansible/roles/fanout/tasks/mlnx/check_pfcwd_fanout.yml new file mode 100644 index 00000000000..56ebefadb9e --- /dev/null +++ b/ansible/roles/fanout/tasks/mlnx/check_pfcwd_fanout.yml @@ -0,0 +1,22 @@ +############################################################################################## +### sub-playbook to deploy the docker images needed for the pfcwd test to fanout swtich +### to run separately: +### ansible-playbook -i lab fanout.yml -l ${FANOUT} --become --tags check_pfcwd_config -vvvv +################################################################################################ + +- name: Load and start dockers + action: apswitch template=mlnx_check_pfcwd_fanout.j2 + connection: switch + register: output + args: + login: "{{ switch_login['MLNX-OS'] }}" + +- set_fact: + dockers_running: "{{output.stdout|search(\"args *storm_args\")|bool}}" + dockers_installed: "{{output.stdout|search(\"pfc_storm\") and output.stdout|search(\"storm_args\")|bool}}" + +- debug: + msg: "Dockers installed{{':'}} {{dockers_installed}}" + +- debug: + msg: "Dockers running {{':'}} {{dockers_running}}" diff --git a/ansible/roles/fanout/tasks/mlnx/deploy_pfcwd_fanout.yml b/ansible/roles/fanout/tasks/mlnx/deploy_pfcwd_fanout.yml new file mode 100644 index 00000000000..ef1d47ff825 --- /dev/null +++ b/ansible/roles/fanout/tasks/mlnx/deploy_pfcwd_fanout.yml @@ -0,0 +1,54 @@ +############################################################################################## +### sub-playbook to deploy the docker images needed for the pfcwd test to fanout swtich +### to run separately: +### ansible-playbook -i lab fanout.yml -l ${FANOUT} --become --tags pfcwd_config -vvvv +### Optionally "-e pfcwd_dockers_url=" can be specified to fetch dockers without +### building them. This is useful to save time or run task in sonic-mgmt docker. +### E.g. +### ansible-playbook -i lab fanout.yml -l ${FANOUT} -e pfcwd_dockers_url=http://arc-build-server/sonic/ --become --tags pfcwd_config -vvvv +################################################################################################ + +- set_fact: + fanout_addr: "{{device_info['mgmtip']}}" + ansible_ssh_user: "{{fanout_root_user}}" + ansible_ssh_pass: "{{fanout_root_pass}}" + pfcwd_dockers: "['roles/test/files/mlnx/docker-tests-pfcgen/pfc_storm.tgz', 'roles/test/files/mlnx/docker-tests-saveargs/storm_args.tgz']" + fanout_img_path: "/var/opt/tms/images/" + +- name: Build containers to save storm arguments and to run storm + command: make + args: + chdir: "{{item | dirname}}" + with_items: pfcwd_dockers + delegate_to: localhost + when: pfcwd_dockers_url is not defined + +- name: Copy test match and ignore files to switch + copy: + src: "{{ item }}" + dest: "{{fanout_img_path}}" + with_items: pfcwd_dockers + when: pfcwd_dockers_url is not defined + +- name: Download pre-built pfcwd dockers if path specified + get_url: url={{pfcwd_dockers_url}}{{item | basename}} dest={{fanout_img_path}}/{{item | basename}} + with_items: pfcwd_dockers + when: pfcwd_dockers_url is defined + +- block: + - name: Mount FS to read-write + command: mount -o remount, rw / + + - name: Update storage driver for Docker + command: 'sed -i s/\"storage-driver\":\ \"vfs\",/\"storage-driver\":\ \"devicemapper\",\\n\ \ \ \ \"storage-opts\":\ [\\n\ \ \ \ \ \ \ \ \"dm.fs=ext4\"\\n\ \ \ \ ],\/g /opt/tms/bin/docker_config.json' + + always: + - name: Remount FS back to read-only + command: mount -r -o remount / + +- name: Load and start dockers + action: apswitch template=mlnx_deploy_pfcwd_fanout.j2 + connection: switch + args: + timeout: 600 + login: "{{ switch_login['MLNX-OS'] }}" diff --git a/ansible/roles/test/files/mlnx/docker-tests-pfcgen/Dockerfile b/ansible/roles/test/files/mlnx/docker-tests-pfcgen/Dockerfile new file mode 100644 index 00000000000..666d7fc18ce --- /dev/null +++ b/ansible/roles/test/files/mlnx/docker-tests-pfcgen/Dockerfile @@ -0,0 +1,5 @@ +FROM python:2.7-jessie + +COPY ["start.sh", "pfc_gen.py", "/root/"] + +CMD ["/root/start.sh"] diff --git a/ansible/roles/test/files/mlnx/docker-tests-pfcgen/Makefile b/ansible/roles/test/files/mlnx/docker-tests-pfcgen/Makefile new file mode 100644 index 00000000000..62c0ff80daf --- /dev/null +++ b/ansible/roles/test/files/mlnx/docker-tests-pfcgen/Makefile @@ -0,0 +1,9 @@ +all: save + +build: Dockerfile + cp ../../helpers/pfc_gen.py . + docker build -t pfc_storm . + rm ./pfc_gen.py + +save: build + docker save pfc_storm:latest | gzip >pfc_storm.tgz diff --git a/ansible/roles/test/files/mlnx/docker-tests-pfcgen/start.sh b/ansible/roles/test/files/mlnx/docker-tests-pfcgen/start.sh new file mode 100755 index 00000000000..250a670d687 --- /dev/null +++ b/ansible/roles/test/files/mlnx/docker-tests-pfcgen/start.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +/root/pfc_gen.py `cat /storm/args` diff --git a/ansible/roles/test/files/mlnx/docker-tests-saveargs/Dockerfile b/ansible/roles/test/files/mlnx/docker-tests-saveargs/Dockerfile new file mode 100644 index 00000000000..9a84e876df8 --- /dev/null +++ b/ansible/roles/test/files/mlnx/docker-tests-saveargs/Dockerfile @@ -0,0 +1,3 @@ +FROM debian:jessie + +COPY ./save_args.sh /root/ diff --git a/ansible/roles/test/files/mlnx/docker-tests-saveargs/Makefile b/ansible/roles/test/files/mlnx/docker-tests-saveargs/Makefile new file mode 100644 index 00000000000..d6b834e804a --- /dev/null +++ b/ansible/roles/test/files/mlnx/docker-tests-saveargs/Makefile @@ -0,0 +1,7 @@ +all: save + +build: Dockerfile + docker build -t storm_args . + +save: build + docker save storm_args:latest | gzip >storm_args.tgz diff --git a/ansible/roles/test/files/mlnx/docker-tests-saveargs/save_args.sh b/ansible/roles/test/files/mlnx/docker-tests-saveargs/save_args.sh new file mode 100755 index 00000000000..dab1533c029 --- /dev/null +++ b/ansible/roles/test/files/mlnx/docker-tests-saveargs/save_args.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +echo $@ >/storm/args From ad550fe2ba5061b67f90a524bd4b3184794277f9 Mon Sep 17 00:00:00 2001 From: yvolynets-mlnx <50697593+yvolynets-mlnx@users.noreply.github.com> Date: Tue, 4 Jun 2019 10:41:55 +0300 Subject: [PATCH 029/218] Move image processing from advanced-reboot.yml to separate file (#930) * Moved image processing from advanced-reboot.yml to separate file reboot-image-handle.yml * Moved image processing from advanced-reboot.yml to separate file reboot-image-handle.yml --- ansible/roles/test/tasks/advanced-reboot.yml | 45 +---------------- .../advanced_reboot/reboot-image-handle.yml | 49 +++++++++++++++++++ 2 files changed, 51 insertions(+), 43 deletions(-) create mode 100755 ansible/roles/test/tasks/advanced_reboot/reboot-image-handle.yml diff --git a/ansible/roles/test/tasks/advanced-reboot.yml b/ansible/roles/test/tasks/advanced-reboot.yml index 9643737f0a1..7a9973e11c7 100644 --- a/ansible/roles/test/tasks/advanced-reboot.yml +++ b/ansible/roles/test/tasks/advanced-reboot.yml @@ -113,49 +113,8 @@ stay_in_target_image: "{{ stay_in_target_image | default('false') | bool }}" cleanup_old_sonic_images: "{{ cleanup_old_sonic_images | default('false') | bool }}" - - block: - - - name: Save image version - shell: 'sonic_installer list | grep Current | cut -f2 -d " "' - register: current_sonic_image - become: true - when: not stay_in_target_image - - - name: Generate temp file name on target device - shell: mktemp - register: tempfile - - - set_fact: - new_image_location: '{{ tempfile.stdout }}' - - - debug: msg='Setting image file name to {{ new_image_location }}' - - - name: Download SONiC image. - local_action: get_url url={{ new_sonic_image }} dest={{ new_image_location }} - - - name: Upload SONiC image to device. - copy: - src: "{{ new_image_location }}" - dest: "{{ new_image_location }}" - - - name: Cleanup sonic images that is not current and/or next - shell: sonic_installer cleanup -y - become: true - when: cleanup_old_sonic_images - - - name: 'Setup restoring initial image {{ current_sonic_image }}' - shell: /bin/true - connection: local - notify: - - restore current image - - reboot sonic - when: not stay_in_target_image - - - name: Installing new SONiC image - shell: sonic_installer install -y {{ new_image_location }} - become: true - - when: new_sonic_image | default('') | length > 0 + - include: advanced_reboot/reboot-image-handle.yml + when: new_sonic_image is defined - include: ptf_runner.yml vars: diff --git a/ansible/roles/test/tasks/advanced_reboot/reboot-image-handle.yml b/ansible/roles/test/tasks/advanced_reboot/reboot-image-handle.yml new file mode 100755 index 00000000000..ed5f5d8a99c --- /dev/null +++ b/ansible/roles/test/tasks/advanced_reboot/reboot-image-handle.yml @@ -0,0 +1,49 @@ +- block: + - fail: msg="Please set new_sonic_image variable" + when: new_sonic_image is not defined + + - fail: msg="Please set cleanup_old_sonic_images variable" + when: cleanup_old_sonic_images is not defined + + - fail: msg="Please set stay_in_target_image variable" + when: stay_in_target_image is not defined + + - name: Save image version + shell: 'sonic_installer list | grep Current | cut -f2 -d " "' + register: current_sonic_image + become: true + when: not stay_in_target_image + + - name: Generate temp file name on target device + shell: mktemp + register: tempfile + + - set_fact: + new_image_location: '{{ tempfile.stdout }}' + + - debug: msg='Setting image file name to {{ new_image_location }}' + + - name: Download SONiC image. + local_action: get_url url={{ new_sonic_image }} dest={{ new_image_location }} + + - name: Upload SONiC image to device. + copy: + src: "{{ new_image_location }}" + dest: "{{ new_image_location }}" + + - name: Cleanup sonic images that is not current and/or next + shell: sonic_installer cleanup -y + become: true + when: cleanup_old_sonic_images + + - name: 'Setup restoring initial image {{ current_sonic_image }}' + shell: /bin/true + connection: local + notify: + - restore current image + - reboot sonic + when: not stay_in_target_image + + - name: Installing new SONiC image + shell: sonic_installer install -y {{ new_image_location }} + become: true From 80e2c2b185845dd9c5ac46934f905b28dbe3f423 Mon Sep 17 00:00:00 2001 From: Joe LeVeque Date: Tue, 4 Jun 2019 18:19:36 -0700 Subject: [PATCH 030/218] [dhcp_relay] More detailed crafting and strict testing of OFFER and ACK packets (#924) --- .../test/files/ptftests/dhcp_relay_test.py | 146 +++++++++++++++--- 1 file changed, 121 insertions(+), 25 deletions(-) diff --git a/ansible/roles/test/files/ptftests/dhcp_relay_test.py b/ansible/roles/test/files/ptftests/dhcp_relay_test.py index 6ae050df941..358d811805a 100644 --- a/ansible/roles/test/files/ptftests/dhcp_relay_test.py +++ b/ansible/roles/test/files/ptftests/dhcp_relay_test.py @@ -214,6 +214,46 @@ def create_dhcp_offer_packet(self): dhcp_lease=self.LEASE_TIME, padding_bytes=0) + def create_dhcp_offer_relayed_packet(self): + my_chaddr = ''.join([chr(int(octet, 16)) for octet in self.client_mac.split(':')]) + + # Relay modifies the DHCPOFFER message in the following ways: + # 1.) Replaces the source MAC with the MAC of the interface it received it on + # 2.) Replaces the destination MAC with boradcast (ff:ff:ff:ff:ff:ff) + # 3.) Replaces the source IP with the IP of the interface which the relay + # received it on + # 4.) Replaces the destination IP with broadcast (255.255.255.255) + # 5.) Replaces the destination port with the DHCP client port (68) + ether = scapy.Ether(dst=self.BROADCAST_MAC, src=self.relay_iface_mac, type=0x0800) + ip = scapy.IP(src=self.relay_iface_ip, dst=self.BROADCAST_IP, len=290, ttl=64) + udp = scapy.UDP(sport=self.DHCP_SERVER_PORT, dport=self.DHCP_CLIENT_PORT, len=262) + bootp = scapy.BOOTP(op=2, + htype=1, + hlen=6, + hops=0, + xid=0, + secs=0, + flags=0, + ciaddr=self.DEFAULT_ROUTE_IP, + yiaddr=self.client_ip, + siaddr=self.server_ip, + giaddr=self.relay_iface_ip, + chaddr=my_chaddr) + bootp /= scapy.DHCP(options=[('message-type', 'offer'), + ('server_id', self.server_ip), + ('lease_time', self.LEASE_TIME), + ('subnet_mask', self.client_subnet), + ('end')]) + + # TODO: Need to add this to the packet creation functions in PTF code first! + # If our bootp layer is too small, pad it + #pad_bytes = self.DHCP_PKT_BOOTP_MIN_LEN - len(bootp) + #if pad_bytes > 0: + # bootp /= scapy.PADDING('\x00' * pad_bytes) + + pkt = ether / ip / udp / bootp + return pkt + def create_dhcp_request_packet(self): return testutils.dhcp_request_packet(eth_client=self.client_mac, ip_server=self.server_ip, @@ -272,6 +312,47 @@ def create_dhcp_ack_packet(self): dhcp_lease=self.LEASE_TIME, padding_bytes=0) + def create_dhcp_ack_relayed_packet(self): + my_chaddr = ''.join([chr(int(octet, 16)) for octet in self.client_mac.split(':')]) + + # Relay modifies the DHCPACK message in the following ways: + # 1.) Replaces the source MAC with the MAC of the interface it received it on + # 2.) Replaces the destination MAC with boradcast (ff:ff:ff:ff:ff:ff) + # 3.) Replaces the source IP with the IP of the interface which the relay + # received it on + # 4.) Replaces the destination IP with broadcast (255.255.255.255) + # 5.) Replaces the destination port with the DHCP client port (68) + ether = scapy.Ether(dst=self.BROADCAST_MAC, src=self.relay_iface_mac, type=0x0800) + ip = scapy.IP(src=self.relay_iface_ip, dst=self.BROADCAST_IP, len=290, ttl=64) + udp = scapy.UDP(sport=self.DHCP_SERVER_PORT, dport=self.DHCP_CLIENT_PORT, len=262) + bootp = scapy.BOOTP(op=2, + htype=1, + hlen=6, + hops=0, + xid=0, + secs=0, + flags=0, + ciaddr=self.DEFAULT_ROUTE_IP, + yiaddr=self.client_ip, + siaddr=self.server_ip, + giaddr=self.relay_iface_ip, + chaddr=my_chaddr) + bootp /= scapy.DHCP(options=[('message-type', 'ack'), + ('server_id', self.server_ip), + ('lease_time', self.LEASE_TIME), + ('subnet_mask', self.client_subnet), + ('end')]) + + # TODO: Need to add this to the packet creation functions in PTF code first! + # If our bootp layer is too small, pad it + #pad_bytes = self.DHCP_PKT_BOOTP_MIN_LEN - len(bootp) + #if pad_bytes > 0: + # bootp /= scapy.PADDING('\x00' * pad_bytes) + + pkt = ether / ip / udp / bootp + return pkt + + """ Send/receive functions @@ -318,9 +399,10 @@ def verify_relayed_discover(self): masked_discover.set_do_not_care_scapy(scapy.PADDING, "load") # Count the number of these packets received on the ports connected to our leaves + num_expected_packets = self.num_dhcp_servers discover_count = testutils.count_matched_packets_all_ports(self, masked_discover, self.server_port_indices) - self.assertTrue(discover_count == self.num_dhcp_servers, - "Failed: Discover count of %d != %d (num_dhcp_servers)" % (discover_count, self.num_dhcp_servers)) + self.assertTrue(discover_count == num_expected_packets, + "Failed: Discover count of %d != %d" % (discover_count, num_expected_packets)) # Simulate a DHCP server sending a DHCPOFFER message to client. # We do this by injecting a DHCPOFFER message on the link connected to one @@ -331,24 +413,31 @@ def server_send_offer(self): # Verify that the DHCPOFFER would be received by our simulated client def verify_offer_received(self): - dhcp_offer = self.create_dhcp_offer_packet() + dhcp_offer = self.create_dhcp_offer_relayed_packet() masked_offer = Mask(dhcp_offer) - masked_offer.set_do_not_care_scapy(scapy.Ether, "src") - masked_offer.set_do_not_care_scapy(scapy.Ether, "dst") + masked_offer.set_do_not_care_scapy(scapy.IP, "version") + masked_offer.set_do_not_care_scapy(scapy.IP, "ihl") + masked_offer.set_do_not_care_scapy(scapy.IP, "tos") + masked_offer.set_do_not_care_scapy(scapy.IP, "len") + masked_offer.set_do_not_care_scapy(scapy.IP, "id") + masked_offer.set_do_not_care_scapy(scapy.IP, "flags") + masked_offer.set_do_not_care_scapy(scapy.IP, "frag") + masked_offer.set_do_not_care_scapy(scapy.IP, "ttl") + masked_offer.set_do_not_care_scapy(scapy.IP, "proto") masked_offer.set_do_not_care_scapy(scapy.IP, "chksum") - masked_offer.set_do_not_care_scapy(scapy.IP, "src") - masked_offer.set_do_not_care_scapy(scapy.IP, "dst") + masked_offer.set_do_not_care_scapy(scapy.IP, "options") + masked_offer.set_do_not_care_scapy(scapy.UDP, "len") masked_offer.set_do_not_care_scapy(scapy.UDP, "chksum") - masked_offer.set_do_not_care_scapy(scapy.UDP, "dport") - # Mask out lease time since it can change depending on when the server receives the request - # Lease time in ack can be slightly different than in offer, since lease time varies slightly - # We also want to ignore the checksums since they will vary a bit depending on the timestamp - # Offset is byte 292, 6 byte field, set_do_not_care() expects values in bits - masked_offer.set_do_not_care((self.DHCP_LEASE_TIME_OFFSET * 8), (self.DHCP_LEASE_TIME_LEN * 8)) + masked_offer.set_do_not_care_scapy(scapy.BOOTP, "sname") + masked_offer.set_do_not_care_scapy(scapy.BOOTP, "file") + + masked_offer.set_do_not_care_scapy(scapy.DHCP, "lease_time") + + #masked_offer.set_do_not_care_scapy(scapy.PADDING, "load") # NOTE: verify_packet() will fail for us via an assert, so no need to check a return value here testutils.verify_packet(self, masked_offer, self.client_port_index) @@ -390,9 +479,10 @@ def verify_relayed_request(self): masked_request.set_do_not_care_scapy(scapy.BOOTP, "file") # Count the number of these packets received on the ports connected to our leaves + num_expected_packets = self.num_dhcp_servers request_count = testutils.count_matched_packets_all_ports(self, masked_request, self.server_port_indices) - self.assertTrue(request_count == self.num_dhcp_servers, - "Failed: Request count of %d != %d (num_dhcp_servers)" % (request_count, self.num_dhcp_servers)) + self.assertTrue(request_count == num_expected_packets, + "Failed: Request count of %d != %d" % (request_count, num_expected_packets)) # Simulate a DHCP server sending a DHCPOFFER message to client from one of our leaves def server_send_ack(self): @@ -401,23 +491,29 @@ def server_send_ack(self): # Verify that the DHCPACK would be received by our simulated client def verify_ack_received(self): - dhcp_ack = self.create_dhcp_ack_packet() + dhcp_ack = self.create_dhcp_ack_relayed_packet() - # Mask out lease time, ip checksum, udp checksum (explanation above) masked_ack = Mask(dhcp_ack) - masked_ack.set_do_not_care_scapy(scapy.Ether, "src") - masked_ack.set_do_not_care_scapy(scapy.Ether, "dst") - + masked_ack.set_do_not_care_scapy(scapy.IP, "version") + masked_ack.set_do_not_care_scapy(scapy.IP, "ihl") + masked_ack.set_do_not_care_scapy(scapy.IP, "tos") + masked_ack.set_do_not_care_scapy(scapy.IP, "len") + masked_ack.set_do_not_care_scapy(scapy.IP, "id") + masked_ack.set_do_not_care_scapy(scapy.IP, "flags") + masked_ack.set_do_not_care_scapy(scapy.IP, "frag") + masked_ack.set_do_not_care_scapy(scapy.IP, "ttl") + masked_ack.set_do_not_care_scapy(scapy.IP, "proto") masked_ack.set_do_not_care_scapy(scapy.IP, "chksum") - masked_ack.set_do_not_care_scapy(scapy.IP, "src") - masked_ack.set_do_not_care_scapy(scapy.IP, "dst") + masked_ack.set_do_not_care_scapy(scapy.IP, "options") + masked_ack.set_do_not_care_scapy(scapy.UDP, "len") masked_ack.set_do_not_care_scapy(scapy.UDP, "chksum") - masked_ack.set_do_not_care_scapy(scapy.UDP, "dport") - # Also mask out lease time (see comment in verify_offer_received() above) - masked_ack.set_do_not_care((self.DHCP_LEASE_TIME_OFFSET * 8), (self.DHCP_LEASE_TIME_LEN * 8)) + masked_ack.set_do_not_care_scapy(scapy.BOOTP, "sname") + masked_ack.set_do_not_care_scapy(scapy.BOOTP, "file") + + masked_ack.set_do_not_care_scapy(scapy.DHCP, "lease_time") # NOTE: verify_packet() will fail for us via an assert, so no need to check a return value here testutils.verify_packet(self, masked_ack, self.client_port_index) From 57a0ab2fd1664b6d098625bbe5396a650fb4b69b Mon Sep 17 00:00:00 2001 From: neethajohn <48968228+neethajohn@users.noreply.github.com> Date: Tue, 4 Jun 2019 15:15:56 -0700 Subject: [PATCH 031/218] Extend warm-reboot test to include the BGP sad path (#926) * Extend warm-reboot test to include the BGP sad pass --- .../test/files/ptftests/advanced-reboot.py | 131 +++++++++++-- ansible/roles/test/files/ptftests/arista.py | 74 ++++++- ansible/roles/test/files/ptftests/sad_path.py | 183 ++++++++++++++++++ ansible/roles/test/tasks/advanced-reboot.yml | 107 ++++------ .../roles/test/tasks/ptf_runner_reboot.yml | 104 ++++++++++ ansible/roles/test/tasks/warm-reboot-sad.yml | 11 ++ ansible/roles/test/vars/testcases.yml | 7 + 7 files changed, 518 insertions(+), 99 deletions(-) create mode 100644 ansible/roles/test/files/ptftests/sad_path.py create mode 100644 ansible/roles/test/tasks/ptf_runner_reboot.yml create mode 100644 ansible/roles/test/tasks/warm-reboot-sad.yml diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 225aa0bba4f..2f11102a15a 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -65,6 +65,7 @@ import itertools from arista import Arista +import sad_path as sp class StateMachine(): @@ -119,6 +120,8 @@ def __init__(self): self.cli_info = {} self.logs_info = {} self.log_lock = threading.RLock() + self.vm_handle = None + self.pre_handle = None self.test_params = testutils.test_params_get() self.check_param('verbose', False, required=False) self.check_param('dut_username', '', required=True) @@ -139,8 +142,15 @@ def __init__(self): self.check_param('min_bgp_gr_timeout', 15, required=False) self.check_param('warm_up_timeout_secs', 180, required=False) self.check_param('dut_stabilize_secs', 20, required=False) + self.check_param('preboot_files', None, required = False) + self.check_param('preboot_oper', None, required = False) + if not self.test_params['preboot_oper'] or self.test_params['preboot_oper'] == 'None': + self.test_params['preboot_oper'] = None - self.log_file_name = '/tmp/%s.log' % self.test_params['reboot_type'] + if self.test_params['preboot_oper'] is not None: + self.log_file_name = '/tmp/%s-%s.log' % (self.test_params['reboot_type'], self.test_params['preboot_oper']) + else: + self.log_file_name = '/tmp/%s.log' % self.test_params['reboot_type'] self.log_fp = open(self.log_file_name, 'w') # Default settings @@ -158,7 +168,7 @@ def __init__(self): # Inter-packet interval, to be used in send_in_background method. # Improve this interval to gain more precision of disruptions. self.send_interval = 0.0035 - self.packets_to_send = min(int(self.time_to_listen / (self.send_interval + 0.0015)), 45000) # How many packets to be sent in send_in_background method + self.packets_to_send = min(int(self.time_to_listen / (self.send_interval + 0.0015)), 45000) # How many packets to be sent in send_in_background method # Thread pool for background watching operations self.pool = ThreadPool(processes=3) @@ -279,24 +289,101 @@ def generate_arp_responder_conf(self, vlan_host_map): def dump_arp_responder_config(self, dump): # save data for arp_replay process - with open("/tmp/from_t1.json", "w") as fp: + filename = "/tmp/from_t1.json" if self.preboot_oper is None else "/tmp/from_t1_%s.json" % self.preboot_oper + with open(filename, "w") as fp: json.dump(dump, fp) + def get_peer_dev_info(self): + content = self.read_json('peer_dev_info') + for key in content.keys(): + if 'ARISTA' in key: + self.vm_dut_map[key] = dict() + self.vm_dut_map[key]['mgmt_addr'] = content[key]['mgmt_addr'] + # initialize all the port mapping + self.vm_dut_map[key]['dut_ports'] = [] + self.vm_dut_map[key]['neigh_ports'] = [] + self.vm_dut_map[key]['ptf_ports'] = [] + + def get_portchannel_info(self): + content = self.read_json('portchannel_ports_file') + for key in content.keys(): + for member in content[key]['members']: + for vm_key in self.vm_dut_map.keys(): + if member in self.vm_dut_map[vm_key]['dut_ports']: + self.vm_dut_map[vm_key]['dut_portchannel'] = key + self.vm_dut_map[vm_key]['neigh_portchannel'] = 'Port-Channel 1' + break + + def get_neigh_port_info(self): + content = self.read_json('neigh_port_info') + for key in content.keys(): + if content[key]['name'] in self.vm_dut_map.keys(): + self.vm_dut_map[content[key]['name']]['dut_ports'].append(key) + self.vm_dut_map[content[key]['name']]['neigh_ports'].append(content[key]['port']) + self.vm_dut_map[content[key]['name']]['ptf_ports'].append(self.port_indices[key]) + + def build_peer_mapping(self): + ''' + Builds a map of the form + 'ARISTA01T1': {'mgmt_addr': + 'neigh_portchannel' + 'dut_portchannel' + 'neigh_ports' + 'dut_ports' + 'ptf_ports' + } + ''' + self.vm_dut_map = {} + for file in self.test_params['preboot_files'].split(','): + self.test_params[file] = '/tmp/' + file + '.json' + self.get_peer_dev_info() + self.get_neigh_port_info() + self.get_portchannel_info() + def setUp(self): + self.fails['dut'] = set() self.port_indices = self.read_port_indices() self.portchannel_ports = self.read_portchannel_ports() self.vlan_ports = self.read_vlan_ports() + if self.test_params['preboot_oper'] is not None: + self.build_peer_mapping() self.vlan_ip_range = self.test_params['vlan_ip_range'] self.default_ip_range = self.test_params['default_ip_range'] self.limit = datetime.timedelta(seconds=self.test_params['reboot_limit_in_seconds']) self.reboot_type = self.test_params['reboot_type'] + self.preboot_oper = self.test_params['preboot_oper'] if self.reboot_type not in ['fast-reboot', 'warm-reboot']: raise ValueError('Not supported reboot_type %s' % self.reboot_type) self.dut_ssh = self.test_params['dut_username'] + '@' + self.test_params['dut_hostname'] self.dut_mac = self.test_params['dut_mac'] + # get VM info + arista_vms = self.test_params['arista_vms'][1:-1].split(",") + self.ssh_targets = [] + for vm in arista_vms: + if (vm.startswith("'") or vm.startswith('"')) and (vm.endswith("'") or vm.endswith('"')): + self.ssh_targets.append(vm[1:-1]) + else: + self.ssh_targets.append(vm) + + self.log("Converted addresses VMs: %s" % str(self.ssh_targets)) + if self.preboot_oper is not None: + self.log("Preboot Operations:") + self.pre_handle = sp.PrebootTest(self.preboot_oper, self.ssh_targets, self.portchannel_ports, self.vm_dut_map, self.test_params, self.dut_ssh) + (self.ssh_targets, self.portchannel_ports, self.neigh_vm), (log_info, fails_dut, fails_vm) = self.pre_handle.setup() + self.fails['dut'] |= fails_dut + self.fails[self.neigh_vm] = fails_vm + for log in log_info: + self.log(log) + log_info, fails_dut, fails_vm = self.pre_handle.verify() + self.fails['dut'] |= fails_dut + self.fails[self.neigh_vm] |= fails_vm + for log in log_info: + self.log(log) + self.log(" ") + self.vlan_host_map = self.generate_vlan_servers() arp_responder_conf = self.generate_arp_responder_conf(self.vlan_host_map) self.dump_arp_responder_config(arp_responder_conf) @@ -327,6 +414,7 @@ def setUp(self): self.generate_arp_ping_packet() if self.reboot_type == 'warm-reboot': + self.log("Preboot Oper: %s" % self.preboot_oper) # Pre-generate list of packets to be sent in send_in_background method. generate_start = datetime.datetime.now() self.generate_bidirectional() @@ -385,7 +473,7 @@ def generate_from_t1(self): dst_addr = server_ip # generate source MAC address for traffic based on LAG_BASE_MAC_PATTERN - mac_addr = self.hex_to_mac(self.LAG_BASE_MAC_PATTERN.format(counter)) + mac_addr = self.hex_to_mac(self.LAG_BASE_MAC_PATTERN.format(counter)) packet = simple_tcp_packet(eth_src=mac_addr, eth_dst=self.dut_mac, @@ -507,18 +595,8 @@ def runTest(self): no_routing_stop = None no_cp_replies = None - arista_vms = self.test_params['arista_vms'][1:-1].split(",") - ssh_targets = [] - for vm in arista_vms: - if (vm.startswith("'") or vm.startswith('"')) and (vm.endswith("'") or vm.endswith('"')): - ssh_targets.append(vm[1:-1]) - else: - ssh_targets.append(vm) - - self.log("Converted addresses VMs: %s" % str(ssh_targets)) - self.ssh_jobs = [] - for addr in ssh_targets: + for addr in self.ssh_targets: q = Queue.Queue() thr = threading.Thread(target=self.peer_state_check, kwargs={'ip': addr, 'queue': q}) thr.setDaemon(True) @@ -529,8 +607,6 @@ def runTest(self): thr.setDaemon(True) try: - self.fails['dut'] = set() - self.log("Starting reachability state watch thread...") self.watching = True self.light_probe = False @@ -648,12 +724,29 @@ def wait_for_ssh_threads(): self.fails['dut'].add("%s cycle must be less than graceful limit %s seconds" % (self.reboot_type, self.test_params['graceful_limit'])) if self.reboot_type == 'fast-reboot' and no_cp_replies < 0.95 * self.nr_vl_pkts: self.fails['dut'].add("Dataplane didn't route to all servers, when control-plane was down: %d vs %d" % (no_cp_replies, self.nr_vl_pkts)) + + if self.reboot_type == 'warm-reboot' and self.preboot_oper is not None: + if self.pre_handle is not None: + self.log("Postboot checks:") + log_info, fails_dut, fails_vm = self.pre_handle.verify() + self.fails[self.neigh_vm] |= fails_vm + self.fails['dut'] |= fails_dut + for log in log_info: + self.log(log) + self.log(" ") + except Exception as e: self.fails['dut'].add(e) finally: # Stop watching DUT self.watching = False + # revert to pretest state + if self.preboot_oper is not None and self.pre_handle is not None: + self.log("Revert to preboot state:") + self.pre_handle.revert() + self.log(" ") + # Generating report self.log("="*50) self.log("Report:") @@ -829,7 +922,7 @@ def sniff_in_background(self, wait = None): self.sniffer_started.clear() def save_sniffed_packets(self): - filename = '/tmp/capture.pcap' + filename = "/tmp/capture_%s.pcap" % self.preboot_oper if self.preboot_oper is not None else "/tmp/capture.pcap" if self.packets: scapyall.wrpcap(filename, self.packets) self.log("Pcap file dumped to %s" % filename) @@ -963,7 +1056,7 @@ def examine_flow(self, filename = None): self.log("Gaps in forwarding not found.") self.log("Total incoming packets captured %d" % received_counter) if packets: - filename = '/tmp/capture_filtered.pcap' + filename = '/tmp/capture_filtered.pcap' if self.preboot_oper is None else "/tmp/capture_filtered_%s.pcap" % self.preboot_oper scapyall.wrpcap(filename, packets) self.log("Filtered pcap dumped to %s" % filename) diff --git a/ansible/roles/test/files/ptftests/arista.py b/ansible/roles/test/files/ptftests/arista.py index d26de63068b..3ddbf1bb178 100644 --- a/ansible/roles/test/files/ptftests/arista.py +++ b/ansible/roles/test/files/ptftests/arista.py @@ -41,7 +41,7 @@ def __init__(self, ip, queue, test_params, login='admin', password='123456'): self.login = login self.password = password self.conn = None - self.hostname = None + self.arista_prompt = None self.v4_routes = [test_params['vlan_ip_range'], test_params['lo_prefix']] self.v6_routes = [test_params['lo_v6_prefix']] self.fails = set() @@ -58,27 +58,28 @@ def connect(self): self.shell = self.conn.invoke_shell() first_prompt = self.do_cmd(None, prompt = '>') - self.hostname = self.extract_hostname(first_prompt) + self.arista_prompt = self.get_arista_prompt(first_prompt) self.do_cmd('enable') self.do_cmd('terminal length 0') return self.shell - def extract_hostname(self, first_prompt): + def get_arista_prompt(self, first_prompt): lines = first_prompt.split('\n') prompt = lines[-1] - return prompt.strip().replace('>', '#') + # match all modes - A#, A(config)#, A(config-if)# + return prompt.strip().replace('>', '.*#') def do_cmd(self, cmd, prompt = None): if prompt == None: - prompt = self.hostname + prompt = self.arista_prompt if cmd is not None: self.shell.send(cmd + '\n') input_buffer = '' - while prompt not in input_buffer: + while re.search(prompt, input_buffer) is None: input_buffer += self.shell.recv(16384) return input_buffer @@ -288,6 +289,21 @@ def parse_bgp_neighbor_once(self, output): return is_gr_ipv4_enabled, is_gr_ipv6_enabled, restart_time + def parse_bgp_info(self, output): + neigh_bgp = None + dut_bgp = None + asn = None + for line in output.split('\n'): + if 'BGP neighbor is' in line: + dut_bgp = re.findall('BGP neighbor is (.*?),', line)[0] + elif 'Local AS is' in line: + asn = re.findall('Local AS is (\d+?),', line)[0] + elif 'Local TCP address is' in line: + neigh_bgp = re.findall('Local TCP address is (.*?),', line)[0] + break + + return neigh_bgp, dut_bgp, asn + def parse_bgp_neighbor(self, output): gr_active = None gr_timer = None @@ -313,6 +329,52 @@ def parse_bgp_route(self, output, expects): return set(expects) == prefixes + def get_bgp_info(self): + # Retreive BGP info (peer addr, AS) for the dut and neighbor + neigh_bgp = {} + dut_bgp = {} + for cmd, ver in [('show ip bgp neighbors', 'v4'), ('show ipv6 bgp neighbors', 'v6')]: + output = self.do_cmd(cmd) + if ver == 'v6': + neigh_bgp[ver], dut_bgp[ver], neigh_bgp['asn'] = self.parse_bgp_info(output) + else: + neigh_bgp[ver], dut_bgp[ver], neigh_bgp['asn'] = self.parse_bgp_info(output) + + return neigh_bgp, dut_bgp + + def change_bgp_neigh_state(self, asn, is_up=True): + state = ['shut', 'no shut'] + self.do_cmd('configure') + self.do_cmd('router bgp %s' % asn) + self.do_cmd('%s' % state[is_up]) + self.do_cmd('exit') + self.do_cmd('exit') + + def verify_bgp_neigh_state(self, dut=None, state="Active"): + bgp_state = {} + bgp_state['v4'] = bgp_state['v6'] = False + for cmd, ver in [('show ip bgp summary | json', 'v4'), ('show ipv6 bgp summary | json', 'v6')]: + output = self.do_cmd(cmd) + data = '\n'.join(output.split('\r\n')[1:-1]) + obj = json.loads(data) + + if state != 'Active': + if 'vrfs' in obj: + # return True when obj['vrfs'] is empty which is the case when the bgp state is 'down' + bgp_state[ver] = not obj['vrfs'] + else: + self.fails.add('Verify BGP %s neighbor: Object missing in output' % ver) + else: + if 'vrfs' in obj and 'default' in obj['vrfs']: + obj = obj['vrfs']['default'] + if 'peers' in obj: + bgp_state[ver] = (obj['peers'][dut[ver]]['peerState'] == state) + else: + self.fails.add('Verify BGP %S neighbor: Peer attribute missing in output' % ver) + else: + self.fails.add('Verify BGP %s neighbor: Object missing in output' % ver) + return self.fails, bgp_state + def check_gr_peer_status(self, output): # [0] True 'ipv4_gr_enabled', [1] doesn't matter 'ipv6_enabled', [2] should be >= 120 if not self.ipv4_gr_enabled: diff --git a/ansible/roles/test/files/ptftests/sad_path.py b/ansible/roles/test/files/ptftests/sad_path.py new file mode 100644 index 00000000000..ccae8ad0878 --- /dev/null +++ b/ansible/roles/test/files/ptftests/sad_path.py @@ -0,0 +1,183 @@ +import datetime +import re +import subprocess +import time + +from arista import Arista + + +class PrebootTest(object): + def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh): + self.oper_type = oper_type + self.vm_list = vm_list + self.portchannel_ports = portchannel_ports + self.vm_dut_map = vm_dut_map + self.test_args = test_args + self.dut_ssh = dut_ssh + self.fails_vm = set() + self.fails_dut = set() + self.log = [] + self.shandle = SadOper(self.oper_type, self.vm_list, self.portchannel_ports, self.vm_dut_map, self.test_args, self.dut_ssh) + + def setup(self): + if 'bgp' in self.oper_type: + self.shandle.sad_setup(is_up=False) + return self.shandle.retreive_test_info(), self.shandle.retreive_logs() + + def verify(self): + self.shandle.sad_bgp_verify() + return self.shandle.retreive_logs() + + def revert(self): + self.shandle.sad_setup() + return self.shandle.retreive_logs() + + +class SadPath(object): + def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args): + self.oper_type = oper_type + self.vm_list = vm_list + self.portchannel_ports = portchannel_ports + self.vm_dut_map = vm_dut_map + self.test_args = test_args + self.neigh_vm = None + self.neigh_name = None + self.vm_handle = None + self.neigh_bgp = None + self.dut_bgp = None + self.log = [] + self.fails = dict() + self.fails['dut'] = set() + + def cmd(self, cmds): + process = subprocess.Popen(cmds, + shell=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + return_code = process.returncode + + return stdout, stderr, return_code + + def select_vm(self): + self.vm_list.sort() + # use the day of the month to select a VM from the list for the sad pass operation + vm_index = datetime.datetime.now().day % len(self.vm_list) + self.neigh_vm = self.vm_list.pop(vm_index) + + def get_neigh_name(self): + for key in self.vm_dut_map.keys(): + if self.vm_dut_map[key]['mgmt_addr'] == self.neigh_vm: + self.neigh_name = key + break + + def down_neigh_port(self): + # extract ptf ports for the selected VM and mark them down + for port in self.vm_dut_map[self.neigh_name]['ptf_ports']: + self.portchannel_ports.remove(port) + + def vm_connect(self): + self.vm_handle = Arista(self.neigh_vm, None, self.test_args) + self.vm_handle.connect() + + def __del__(self): + self.vm_disconnect() + + def vm_disconnect(self): + self.vm_handle.disconnect() + + def setup(self): + self.select_vm() + self.get_neigh_name() + self.down_neigh_port() + self.vm_connect() + self.neigh_bgp, self.dut_bgp = self.vm_handle.get_bgp_info() + self.fails[self.neigh_vm] = set() + self.log.append('Neighbor AS: %s' % self.neigh_bgp['asn']) + self.log.append('BGP v4 neighbor: %s' % self.neigh_bgp['v4']) + self.log.append('BGP v6 neighbor: %s' % self.neigh_bgp['v6']) + self.log.append('DUT BGP v4: %s' % self.dut_bgp['v4']) + self.log.append('DUT BGP v6: %s' % self.dut_bgp['v6']) + + def retreive_test_info(self): + return self.vm_list, self.portchannel_ports, self.neigh_vm + + def retreive_logs(self): + return self.log, self.fails['dut'], self.fails[self.neigh_vm] + + +class SadOper(SadPath): + def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh): + super(SadOper, self).__init__(oper_type, vm_list, portchannel_ports, vm_dut_map, test_args) + self.dut_ssh = dut_ssh + self.dut_needed = None + + def populate_bgp_state(self): + if self.oper_type == 'neigh_bgp_down': + self.neigh_bgp['changed_state'] = 'down' + self.dut_bgp['changed_state'] = 'Active' + self.dut_needed = None + elif self.oper_type == 'dut_bgp_down': + self.neigh_bgp['changed_state'] = 'Active' + self.dut_bgp['changed_state'] = 'Idle' + self.dut_needed = self.dut_bgp + + def sad_setup(self, is_up=True): + self.log = [] + if not is_up: + self.setup() + self.populate_bgp_state() + self.log.append('BGP state change will be for %s' % self.neigh_vm) + if self.oper_type == 'neigh_bgp_down': + self.log.append('Changing state of AS %s to shut' % self.neigh_bgp['asn']) + self.vm_handle.change_bgp_neigh_state(self.neigh_bgp['asn'], is_up=is_up) + elif self.oper_type == 'dut_bgp_down': + self.change_bgp_dut_state(is_up=is_up) + time.sleep(30) + + def change_bgp_dut_state(self, is_up=True): + state = ['shutdown', 'startup'] + for key in self.neigh_bgp.keys(): + if key not in ['v4', 'v6']: + continue + + self.log.append('Changing state of BGP peer %s from DUT side to %s' % (self.neigh_bgp[key], state[is_up])) + stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config bgp %s neighbor %s' % (state[is_up], self.neigh_bgp[key])]) + if return_code != 0: + self.fails['dut'].add('State change not successful from DUT side for peer %s' % self.neigh_bgp[key]) + self.fails['dut'].add('Return code: %d' % return_code) + self.fails['dut'].add('Stderr: %s' % stderr) + + def verify_bgp_dut_state(self, state='Idle'): + bgp_state = {} + bgp_state['v4'] = bgp_state['v6'] = False + for key in self.neigh_bgp.keys(): + if key not in ['v4', 'v6']: + continue + self.log.append('Verifying if the DUT side BGP peer %s is %s' % (self.neigh_bgp[key], state)) + stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'show ip bgp neighbor %s' % self.neigh_bgp[key]]) + if return_code == 0: + for line in stdout.split('\n'): + if 'BGP state' in line: + curr_state = re.findall('BGP state = (\w+)', line)[0] + bgp_state[key] = (curr_state == state) + break + else: + self.fails['dut'].add('Retreiving BGP info for peer %s from DUT side failed' % self.neigh_bgp[key]) + self.fails['dut'].add('Return code: %d' % return_code) + self.fails['dut'].add('Stderr: %s' % stderr) + return bgp_state + + def sad_bgp_verify(self): + self.log = [] + fails_vm, bgp_state = self.vm_handle.verify_bgp_neigh_state(dut=self.dut_needed, state=self.neigh_bgp['changed_state']) + self.fails[self.neigh_vm] |= fails_vm + if bgp_state['v4'] and bgp_state['v6']: + self.log.append('BGP state down as expected for %s' % self.neigh_vm) + else: + self.fails[self.neigh_vm].add('BGP state not down for %s' % self.neigh_vm) + bgp_state = self.verify_bgp_dut_state(state=self.dut_bgp['changed_state']) + if bgp_state['v4'] and bgp_state['v6']: + self.log.append('BGP state down as expected on DUT') + else: + self.fails['dut'].add('BGP state not down on DUT') diff --git a/ansible/roles/test/tasks/advanced-reboot.yml b/ansible/roles/test/tasks/advanced-reboot.yml index 7a9973e11c7..73ab543de9b 100644 --- a/ansible/roles/test/tasks/advanced-reboot.yml +++ b/ansible/roles/test/tasks/advanced-reboot.yml @@ -20,6 +20,21 @@ - fail: msg="Please set vm_hosts variable with a list of VMs" when: vm_hosts is not defined + - name: Preboot-list initialization + set_fact: preboot_list={% if preboot_list is not defined %}[None]{% else %}{{ preboot_list }}{% endif %} + + - name: Preboot files initialization + set_fact: preboot_files={% if preboot_files is not defined %}None{% else %}{{ preboot_files }}{% endif %} + + - debug: + msg: "Preboot-list: {{ preboot_list }} Preboot-files: {{ preboot_files }}" + + - name: Set PTF test params + set_fact: + dut_mac: "{{ ansible_Ethernet0['macaddress'] }}" + vlan_ip_range: "{{ minigraph_vlan_interfaces[0]['subnet'] }}" + lo_v6_prefix: "{{ minigraph_lo_interfaces | map(attribute='addr') | ipv6 | first | ipsubnet(64) }}" + - name: Remove existing ip from ptf host script: roles/test/files/helpers/remove_ip.sh delegate_to: "{{ ptf_host }}" @@ -106,6 +121,22 @@ dest: /tmp/ports.json delegate_to: "{{ ptf_host }}" + - block: + + - name: Copy peer device info to ptf host + copy: + content: "{{ minigraph_devices | to_nice_json }}" + dest: /tmp/peer_dev_info.json + delegate_to: "{{ ptf_host }}" + + - name: Copy neighbor port info to ptf host + copy: + content: "{{ minigraph_neighbors | to_nice_json }}" + dest: /tmp/neigh_port_info.json + delegate_to: "{{ ptf_host }}" + + when: preboot_list|length > 1 + - debug: msg="Defined new sonic image url is {{ new_sonic_image }}" when: new_sonic_image is defined @@ -116,85 +147,13 @@ - include: advanced_reboot/reboot-image-handle.yml when: new_sonic_image is defined - - include: ptf_runner.yml - vars: - ptf_test_name: Advanced-reboot test - ptf_test_dir: ptftests - ptf_test_path: advanced-reboot.ReloadTest - ptf_platform: remote - ptf_platform_dir: ptftests - ptf_qlen: 1000 - ptf_test_params: - - verbose=False - - dut_username=\"{{ ansible_ssh_user }}\" - - dut_hostname=\"{{ ansible_host }}\" - - reboot_limit_in_seconds={{ reboot_limit }} - - reboot_type=\"{{ reboot_type }}\" - - portchannel_ports_file=\"/tmp/portchannel_interfaces.json\" - - vlan_ports_file=\"/tmp/vlan_interfaces.json\" - - ports_file=\"/tmp/ports.json\" - - dut_mac='{{ ansible_Ethernet0['macaddress'] }}' - - dut_vlan_ip='192.168.0.1' - - default_ip_range='192.168.0.0/16' - - vlan_ip_range=\"{{ minigraph_vlan_interfaces[0]['subnet'] }}\" - - lo_v6_prefix=\"{{ minigraph_lo_interfaces | map(attribute='addr') | ipv6 | first | ipsubnet(64) }}\" - - arista_vms=\"['{{ vm_hosts | list | join("','") }}']\" + - include: ptf_runner_reboot.yml + with_items: "{{ preboot_list }}" always: - - name: Copy test results from ptf to the local box /tmp/*-reboot.log - fetch: src='/tmp/{{reboot_type}}.log' dest='/tmp/' flat=true fail_on_missing=false - delegate_to: "{{ ptf_host }}" - - - name: Copy pcap files from ptf to the local box /tmp/ - fetch: src={{ item }} dest='/tmp/' flat=true fail_on_missing=false - delegate_to: "{{ ptf_host }}" - with_items: - - "/tmp/capture.pcap" - - "/tmp/capture_filtered.pcap" - - name: Remove existing ip from ptf host script: roles/test/files/helpers/remove_ip.sh delegate_to: "{{ ptf_host }}" - - name: Extract all syslog entries since the last reboot - extract_log: - directory: '/var/log' - file_prefix: 'syslog' - start_string: 'Linux version' - target_filename: '/tmp/syslog' - become: yes - - - name: Copy the exctracted syslog entries to the local machine - fetch: - src: '/tmp/syslog' - dest: '/tmp/' - flat: yes - - - name: Extract all sairedis.rec entries since the last reboot - extract_log: - directory: '/var/log/swss' - file_prefix: 'sairedis.rec' - start_string: 'recording on:' - target_filename: '/tmp/sairedis.rec' - - - name: Copy the exctracted sairedis.rec entries to the local machine - fetch: - src: '/tmp/sairedis.rec' - dest: '/tmp/' - flat: yes - - - name: Extract all swss.rec entries since the last reboot - extract_log: - directory: '/var/log/swss' - file_prefix: 'swss.rec' - start_string: 'recording started' - target_filename: '/tmp/swss.rec' - - - name: Copy the exctracted swss.rec entries to the local machine - fetch: - src: '/tmp/swss.rec' - dest: '/tmp/' - flat: yes - - name: make sure all handlers run meta: flush_handlers diff --git a/ansible/roles/test/tasks/ptf_runner_reboot.yml b/ansible/roles/test/tasks/ptf_runner_reboot.yml new file mode 100644 index 00000000000..cfe55d66424 --- /dev/null +++ b/ansible/roles/test/tasks/ptf_runner_reboot.yml @@ -0,0 +1,104 @@ +- block: + - include: ptf_runner.yml + vars: + ptf_test_name: Advanced-reboot test + ptf_test_dir: ptftests + ptf_test_path: advanced-reboot.ReloadTest + ptf_platform: remote + ptf_platform_dir: ptftests + ptf_qlen: 1000 + ptf_test_params: + - verbose=False + - dut_username=\"{{ ansible_ssh_user }}\" + - dut_hostname=\"{{ ansible_host }}\" + - reboot_limit_in_seconds={{ reboot_limit }} + - reboot_type=\"{{ reboot_type }}\" + - portchannel_ports_file=\"/tmp/portchannel_interfaces.json\" + - vlan_ports_file=\"/tmp/vlan_interfaces.json\" + - ports_file=\"/tmp/ports.json\" + - dut_mac='{{ dut_mac }}' + - dut_vlan_ip='192.168.0.1' + - default_ip_range='192.168.0.0/16' + - vlan_ip_range='{{ vlan_ip_range }}' + - lo_v6_prefix='{{ lo_v6_prefix }}' + - arista_vms=\"['{{ vm_hosts | list | join("','") }}']\" + - preboot_files='{{ preboot_files }}' + - preboot_oper='{{ item }}' + + always: + + - name: Set all the filename vars when there is no preboot type + set_fact: + reboot_log: '/tmp/{{reboot_type}}.log' + capture_pcap: '/tmp/capture.pcap' + filter_pcap: '/tmp/capture_filtered.pcap' + syslog_file: '/tmp/syslog' + sairedis_rec: '/tmp/sairedis.rec' + swss_rec: '/tmp/swss.rec' + when: not item or item == 'None' + + - name: Set all the filename vars when there is a preboot type + set_fact: + reboot_log: '/tmp/{{reboot_type}}-{{item}}.log' + capture_pcap: '/tmp/capture_{{item}}.pcap' + filter_pcap: '/tmp/capture_filtered_{{item}}.pcap' + syslog_file: '/tmp/syslog_{{item}}' + sairedis_rec: '/tmp/sairedis.rec.{{item}}' + swss_rec: '/tmp/swss.rec.{{item}}' + when: item and item != 'None' + + - name: Copy test results from ptf to the local box /tmp/*-reboot*.log + fetch: src="{{ reboot_log }}" dest='/tmp/' flat=true fail_on_missing=false + delegate_to: "{{ ptf_host }}" + + - name: Copy pcap files from ptf to the local box /tmp/ + fetch: src={{ item }} dest='/tmp/' flat=true fail_on_missing=false + delegate_to: "{{ ptf_host }}" + with_items: + - "{{ capture_pcap }}" + - "{{ filter_pcap }}" + + - name: Extract all syslog entries since the last reboot + extract_log: + directory: '/var/log' + file_prefix: 'syslog' + start_string: 'Linux version' + target_filename: "{{ syslog_file }}" + become: yes + + - name: Copy the exctracted syslog entries to the local machine + fetch: + src: "{{ syslog_file }}" + dest: '/tmp/' + flat: yes + + - name: Extract all sairedis.rec entries since the last reboot + extract_log: + directory: '/var/log/swss' + file_prefix: 'sairedis.rec' + start_string: 'recording on:' + target_filename: "{{ sairedis_rec }}" + + - name: Copy the exctracted sairedis.rec entries to the local machine + fetch: + src: "{{ sairedis_rec }}" + dest: '/tmp/' + flat: yes + + - name: Extract all swss.rec entries since the last reboot + extract_log: + directory: '/var/log/swss' + file_prefix: 'swss.rec' + start_string: 'recording started' + target_filename: "{{ swss_rec }}" + + - name: Copy the exctracted swss.rec entries to the local machine + fetch: + src: "{{ swss_rec }}" + dest: '/tmp/' + flat: yes + + +- name: Wait for the DUT to be ready for the next test + pause: seconds=420 + when: preboot_list|length > 1 diff --git a/ansible/roles/test/tasks/warm-reboot-sad.yml b/ansible/roles/test/tasks/warm-reboot-sad.yml new file mode 100644 index 00000000000..920cae835a7 --- /dev/null +++ b/ansible/roles/test/tasks/warm-reboot-sad.yml @@ -0,0 +1,11 @@ +- name: set default reboot_limit in seconds + set_fact: + reboot_limit: 1 + when: reboot_limit is not defined + +- name: Warm-reboot test + include: advanced-reboot.yml + vars: + reboot_type: warm-reboot + preboot_list: ['neigh_bgp_down', 'dut_bgp_down'] + preboot_files: "peer_dev_info,neigh_port_info" diff --git a/ansible/roles/test/vars/testcases.yml b/ansible/roles/test/vars/testcases.yml index 33e2319075a..32986574095 100644 --- a/ansible/roles/test/vars/testcases.yml +++ b/ansible/roles/test/vars/testcases.yml @@ -84,6 +84,13 @@ testcases: ptf_host: vm_hosts: + warm-reboot-sad: + filename: warm-reboot-sad.yml + topologies: [t0, t0-64, t0-64-32, t0-116] + required_vars: + ptf_host: + vm_hosts: + fib: filename: simple-fib.yml topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] From 84f4a6f79858cd51295077d39cc88d02dfa8751b Mon Sep 17 00:00:00 2001 From: Pavlo Yadvichuk Date: Sat, 8 Jun 2019 05:36:42 +0300 Subject: [PATCH 032/218] Fix python crash in case data plane never stop on fast-reboot (#893) * Do not crash in case data plane never stop on fast-reboot --- ansible/roles/test/files/ptftests/advanced-reboot.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 2f11102a15a..2fc6a7edcb4 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -594,6 +594,8 @@ def runTest(self): no_routing_start = None no_routing_stop = None no_cp_replies = None + upper_replies = [] + routing_always = False self.ssh_jobs = [] for addr in self.ssh_targets: @@ -659,7 +661,8 @@ def runTest(self): self.log("Data plane was stopped, Waiting until it's up. Stop time: %s" % str(no_routing_start)) except TimeoutError: self.log("Data plane never stop") - no_routing_start = datetime.datetime.min + routing_always = True + upper_replies = [self.nr_vl_pkts] if no_routing_start is not None: no_routing_stop, _ = self.timeout(self.check_forwarding_resume, @@ -667,6 +670,7 @@ def runTest(self): "DUT hasn't started to work for %d seconds" % self.task_timeout) else: no_routing_stop = datetime.datetime.min + no_routing_start = datetime.datetime.min # Stop watching DUT self.watching = False @@ -778,7 +782,8 @@ def wait_for_ssh_threads(): if no_routing_stop: self.log("Downtime was %s" % str(no_routing_stop - no_routing_start)) - self.log("Reboot time was %s" % str(no_routing_stop - self.reboot_start)) + reboot_time = "0:00:00" if routing_always else str(no_routing_stop - self.reboot_start) + self.log("Reboot time was %s" % reboot_time) self.log("Expected downtime is less then %s" % self.limit) if self.reboot_type == 'fast-reboot' and no_cp_replies: From 84cd691555323cda1a971553debca1904bdfaa54 Mon Sep 17 00:00:00 2001 From: neethajohn <48968228+neethajohn@users.noreply.github.com> Date: Thu, 13 Jun 2019 09:14:28 -0700 Subject: [PATCH 033/218] [warm-reboot] Add preboot LAG sad path automation (#945) * preboot LAG sad path automation for neigh_lag_down and dut_lag_down scenarios --- .../test/files/ptftests/advanced-reboot.py | 4 +- ansible/roles/test/files/ptftests/arista.py | 32 ++++- ansible/roles/test/files/ptftests/sad_path.py | 117 ++++++++++++++++-- ansible/roles/test/tasks/warm-reboot-sad.yml | 2 +- 4 files changed, 139 insertions(+), 16 deletions(-) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 2fc6a7edcb4..2bc7cb777e3 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -311,7 +311,7 @@ def get_portchannel_info(self): for vm_key in self.vm_dut_map.keys(): if member in self.vm_dut_map[vm_key]['dut_ports']: self.vm_dut_map[vm_key]['dut_portchannel'] = key - self.vm_dut_map[vm_key]['neigh_portchannel'] = 'Port-Channel 1' + self.vm_dut_map[vm_key]['neigh_portchannel'] = 'Port-Channel1' break def get_neigh_port_info(self): @@ -732,7 +732,7 @@ def wait_for_ssh_threads(): if self.reboot_type == 'warm-reboot' and self.preboot_oper is not None: if self.pre_handle is not None: self.log("Postboot checks:") - log_info, fails_dut, fails_vm = self.pre_handle.verify() + log_info, fails_dut, fails_vm = self.pre_handle.verify(pre_check=False) self.fails[self.neigh_vm] |= fails_vm self.fails['dut'] |= fails_dut for log in log_info: diff --git a/ansible/roles/test/files/ptftests/arista.py b/ansible/roles/test/files/ptftests/arista.py index 3ddbf1bb178..a8aaa0fa738 100644 --- a/ansible/roles/test/files/ptftests/arista.py +++ b/ansible/roles/test/files/ptftests/arista.py @@ -358,7 +358,7 @@ def verify_bgp_neigh_state(self, dut=None, state="Active"): data = '\n'.join(output.split('\r\n')[1:-1]) obj = json.loads(data) - if state != 'Active': + if state == 'down': if 'vrfs' in obj: # return True when obj['vrfs'] is empty which is the case when the bgp state is 'down' bgp_state[ver] = not obj['vrfs'] @@ -375,6 +375,36 @@ def verify_bgp_neigh_state(self, dut=None, state="Active"): self.fails.add('Verify BGP %s neighbor: Object missing in output' % ver) return self.fails, bgp_state + def change_neigh_lag_state(self, lag, is_up=True): + state = ['shut', 'no shut'] + self.do_cmd('configure') + is_match = re.match('(Port-Channel|Ethernet)\d+', lag) + if is_match: + output = self.do_cmd('interface %s' % lag) + if 'Invalid' not in output: + self.do_cmd(state[is_up]) + self.do_cmd('exit') + self.do_cmd('exit') + + def verify_neigh_lag_state(self, lag, state="connected", pre_check=True): + lag_state = False + msg_prefix = ['Postboot', 'Preboot'] + is_match = re.match('(Port-Channel|Ethernet)\d+', lag) + if is_match: + output = self.do_cmd('show interfaces %s | json' % lag) + if 'Invalid' not in output: + data = '\n'.join(output.split('\r\n')[1:-1]) + obj = json.loads(data) + + if 'interfaces' in obj and lag in obj['interfaces']: + lag_state = (obj['interfaces'][lag]['interfaceStatus'] == state) + else: + self.fails.add('%s: Verify LAG %s: Object missing in output' % (msg_prefix[pre_check], lag)) + return self.fails, lag_state + + self.fails.add('%s: Invalid interface name' % msg_prefix[pre_check]) + return self.fails, lag_state + def check_gr_peer_status(self, output): # [0] True 'ipv4_gr_enabled', [1] doesn't matter 'ipv6_enabled', [2] should be >= 120 if not self.ipv4_gr_enabled: diff --git a/ansible/roles/test/files/ptftests/sad_path.py b/ansible/roles/test/files/ptftests/sad_path.py index ccae8ad0878..fec1f64fd41 100644 --- a/ansible/roles/test/files/ptftests/sad_path.py +++ b/ansible/roles/test/files/ptftests/sad_path.py @@ -20,12 +20,13 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, self.shandle = SadOper(self.oper_type, self.vm_list, self.portchannel_ports, self.vm_dut_map, self.test_args, self.dut_ssh) def setup(self): - if 'bgp' in self.oper_type: - self.shandle.sad_setup(is_up=False) + self.shandle.sad_setup(is_up=False) return self.shandle.retreive_test_info(), self.shandle.retreive_logs() - def verify(self): + def verify(self, pre_check=True): self.shandle.sad_bgp_verify() + if 'lag' in self.oper_type: + self.shandle.sad_lag_verify(pre_check=pre_check) return self.shandle.retreive_logs() def revert(self): @@ -111,6 +112,9 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, super(SadOper, self).__init__(oper_type, vm_list, portchannel_ports, vm_dut_map, test_args) self.dut_ssh = dut_ssh self.dut_needed = None + self.lag_members_down = None + self.neigh_lag_state = None + self.msg_prefix = ['Postboot', 'Preboot'] def populate_bgp_state(self): if self.oper_type == 'neigh_bgp_down': @@ -121,19 +125,42 @@ def populate_bgp_state(self): self.neigh_bgp['changed_state'] = 'Active' self.dut_bgp['changed_state'] = 'Idle' self.dut_needed = self.dut_bgp + elif self.oper_type == 'neigh_lag_down': + # on the DUT side, bgp states are different pre and post boot. hence passing multiple values + self.neigh_bgp['changed_state'] = 'Idle' + self.dut_bgp['changed_state'] = 'Connect,Active,Idle' + self.dut_needed = self.dut_bgp + elif self.oper_type == 'dut_lag_down': + self.neigh_bgp['changed_state'] = 'Idle' + self.dut_bgp['changed_state'] = 'Active,Connect,Idle' + self.dut_needed = self.dut_bgp def sad_setup(self, is_up=True): self.log = [] + if not is_up: self.setup() self.populate_bgp_state() - self.log.append('BGP state change will be for %s' % self.neigh_vm) - if self.oper_type == 'neigh_bgp_down': - self.log.append('Changing state of AS %s to shut' % self.neigh_bgp['asn']) - self.vm_handle.change_bgp_neigh_state(self.neigh_bgp['asn'], is_up=is_up) - elif self.oper_type == 'dut_bgp_down': - self.change_bgp_dut_state(is_up=is_up) - time.sleep(30) + if 'lag' in self.oper_type: + self.populate_lag_state() + + if 'bgp' in self.oper_type: + self.log.append('BGP state change will be for %s' % self.neigh_vm) + if self.oper_type == 'neigh_bgp_down': + self.log.append('Changing state of AS %s to shut' % self.neigh_bgp['asn']) + self.vm_handle.change_bgp_neigh_state(self.neigh_bgp['asn'], is_up=is_up) + elif self.oper_type == 'dut_bgp_down': + self.change_bgp_dut_state(is_up=is_up) + time.sleep(30) + elif 'lag' in self.oper_type: + self.log.append('LAG state change will be for %s' % self.neigh_vm) + if self.oper_type == 'neigh_lag_down': + self.log.append('Changing state of LAG %s to shut' % self.vm_dut_map[self.neigh_name]['neigh_portchannel']) + self.vm_handle.change_neigh_lag_state(self.vm_dut_map[self.neigh_name]['neigh_portchannel'], is_up=is_up) + elif self.oper_type == 'dut_lag_down': + self.change_dut_lag_state(is_up=is_up) + # wait for sometime for lag members state to sync + time.sleep(120) def change_bgp_dut_state(self, is_up=True): state = ['shutdown', 'startup'] @@ -149,18 +176,19 @@ def change_bgp_dut_state(self, is_up=True): self.fails['dut'].add('Stderr: %s' % stderr) def verify_bgp_dut_state(self, state='Idle'): + states = state.split(',') bgp_state = {} bgp_state['v4'] = bgp_state['v6'] = False for key in self.neigh_bgp.keys(): if key not in ['v4', 'v6']: continue - self.log.append('Verifying if the DUT side BGP peer %s is %s' % (self.neigh_bgp[key], state)) + self.log.append('Verifying if the DUT side BGP peer %s is %s' % (self.neigh_bgp[key], states)) stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'show ip bgp neighbor %s' % self.neigh_bgp[key]]) if return_code == 0: for line in stdout.split('\n'): if 'BGP state' in line: curr_state = re.findall('BGP state = (\w+)', line)[0] - bgp_state[key] = (curr_state == state) + bgp_state[key] = (curr_state in states) break else: self.fails['dut'].add('Retreiving BGP info for peer %s from DUT side failed' % self.neigh_bgp[key]) @@ -181,3 +209,68 @@ def sad_bgp_verify(self): self.log.append('BGP state down as expected on DUT') else: self.fails['dut'].add('BGP state not down on DUT') + + def populate_lag_state(self): + if self.oper_type == 'neigh_lag_down': + self.neigh_lag_state = 'disabled' + self.lag_members_down = self.vm_dut_map[self.neigh_name]['dut_ports'] + elif self.oper_type == 'dut_lag_down': + self.lag_members_down = self.vm_dut_map[self.neigh_name]['dut_ports'] + self.neigh_lag_state = 'notconnect' + + def change_dut_lag_state(self, is_up=True): + state = ['shutdown', 'startup'] + dut_portchannel = self.vm_dut_map[self.neigh_name]['dut_portchannel'] + if not re.match('(PortChannel|Ethernet)\d+', dut_portchannel): return + self.log.append('Changing state of %s from DUT side to %s' % (dut_portchannel, state[is_up])) + stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], dut_portchannel)]) + if return_code != 0: + self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], dut_portchannel)) + self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code)) + self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[1 - is_up], stderr)) + else: + self.log.append('State change successful on DUT') + + def verify_dut_lag_member_state(self, lag_memb_output, pre_check=True): + success = True + for member in self.vm_dut_map[self.neigh_name]['dut_ports']: + if self.lag_members_down is not None and member in self.lag_members_down: + search_str = '%s(D)' % member + else: + search_str = '%s(S)' % member + + if lag_memb_output.find(search_str) != -1: + self.log.append('Lag member %s state as expected' % member) + else: + success = False + self.fails['dut'].add('%s: Lag member %s state not as expected' % (self.msg_prefix[pre_check], member)) + return success + + def verify_dut_lag_state(self, pre_check=True): + pat = re.compile(".*%s\s+LACP\(A\)\(Dw\)\s+(.*)" % self.vm_dut_map[self.neigh_name]['dut_portchannel']) + stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'show interfaces portchannel']) + if return_code == 0: + for line in stdout.split('\n'): + if self.vm_dut_map[self.neigh_name]['dut_portchannel'] in line: + is_match = pat.match(line) + if is_match and self.verify_dut_lag_member_state(is_match.group(1), pre_check=pre_check): + self.log.append('Lag state is down as expected on the DUT') + self.log.append('Pattern check: %s' % line) + else: + self.fails['dut'].add('%s: Lag state is not down on the DUT' % self.msg_prefix[pre_check]) + self.fails['dut'].add('%s: Obtained: %s' % (self.msg_prefix[pre_check], line)) + break + else: + self.fails['dut'].add('%s: Retreiving LAG info from DUT side failed' % self.msg_prefix[pre_check]) + self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[pre_check], return_code)) + self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[pre_check], stderr)) + + def sad_lag_verify(self, pre_check=True): + fails_vm, lag_state = self.vm_handle.verify_neigh_lag_state(self.vm_dut_map[self.neigh_name]['neigh_portchannel'], state=self.neigh_lag_state, pre_check=pre_check) + self.fails[self.neigh_vm] |= fails_vm + if lag_state: + self.log.append('LAG state down as expected for %s' % self.neigh_vm) + else: + self.fails[self.neigh_vm].add('%s: LAG state not down for %s' % (self.msg_prefix[pre_check], self.neigh_vm)) + self.log.append('Verifying LAG state on the dut end') + self.verify_dut_lag_state(pre_check=pre_check) diff --git a/ansible/roles/test/tasks/warm-reboot-sad.yml b/ansible/roles/test/tasks/warm-reboot-sad.yml index 920cae835a7..6ace17b74e0 100644 --- a/ansible/roles/test/tasks/warm-reboot-sad.yml +++ b/ansible/roles/test/tasks/warm-reboot-sad.yml @@ -7,5 +7,5 @@ include: advanced-reboot.yml vars: reboot_type: warm-reboot - preboot_list: ['neigh_bgp_down', 'dut_bgp_down'] + preboot_list: ['neigh_bgp_down', 'dut_bgp_down', 'dut_lag_down', 'neigh_lag_down'] preboot_files: "peer_dev_info,neigh_port_info" From 3f30920a8394ba7b602af247adf8df34f39743be Mon Sep 17 00:00:00 2001 From: Prince Sunny Date: Tue, 11 Jun 2019 00:58:35 -0700 Subject: [PATCH 034/218] Default to use jumbo frames for this test. MTU is configurable for any smaller values (#946) --- ansible/roles/test/files/ptftests/fib_test.py | 5 +++++ ansible/roles/test/tasks/simple-fib.yml | 6 +++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ansible/roles/test/files/ptftests/fib_test.py b/ansible/roles/test/files/ptftests/fib_test.py index a52199d8e5b..a53e572772a 100644 --- a/ansible/roles/test/files/ptftests/fib_test.py +++ b/ansible/roles/test/files/ptftests/fib_test.py @@ -87,6 +87,7 @@ def setUp(self): self.dataplane = ptf.dataplane_instance self.fib = fib.Fib(self.test_params['fib_info']) self.router_mac = self.test_params['router_mac'] + self.pktlen = self.test_params['testbed_mtu'] self.test_ipv4 = self.test_params.get('ipv4', True) self.test_ipv6 = self.test_params.get('ipv6', True) @@ -172,6 +173,7 @@ def check_ipv4_route(self, src_port, dst_ip_addr, dst_port_list): src_mac = self.dataplane.get_mac(0, 0) pkt = simple_tcp_packet( + pktlen=self.pktlen, eth_dst=self.router_mac, eth_src=src_mac, ip_src=ip_src, @@ -180,6 +182,7 @@ def check_ipv4_route(self, src_port, dst_ip_addr, dst_port_list): tcp_dport=dport, ip_ttl=64) exp_pkt = simple_tcp_packet( + self.pktlen, eth_src=self.router_mac, ip_src=ip_src, ip_dst=ip_dst, @@ -210,6 +213,7 @@ def check_ipv6_route(self, src_port, dst_ip_addr, dst_port_list): src_mac = self.dataplane.get_mac(0, 0) pkt = simple_tcpv6_packet( + pktlen=self.pktlen, eth_dst=self.router_mac, eth_src=src_mac, ipv6_dst=ip_dst, @@ -218,6 +222,7 @@ def check_ipv6_route(self, src_port, dst_ip_addr, dst_port_list): tcp_dport=dport, ipv6_hlim=64) exp_pkt = simple_tcpv6_packet( + pktlen=self.pktlen, eth_src=self.router_mac, ipv6_dst=ip_dst, ipv6_src=ip_src, diff --git a/ansible/roles/test/tasks/simple-fib.yml b/ansible/roles/test/tasks/simple-fib.yml index e21b16a71fc..889538da3c3 100644 --- a/ansible/roles/test/tasks/simple-fib.yml +++ b/ansible/roles/test/tasks/simple-fib.yml @@ -6,6 +6,9 @@ - debug : msg="Start FIB Test" +- set_fact: mtu=9114 + when: mtu is not defined + - name: "Start PTF runner" include: ptf_runner.yml vars: @@ -20,4 +23,5 @@ - fib_info='/root/fib_info.txt' - ipv4={{ipv4}} - ipv6={{ipv6}} - ptf_extra_options: "--relax --debug info --log-file /tmp/fib_test.FibTest.ipv4.{{ipv4}}.ipv6.{{ipv6}}.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log " + - testbed_mtu={{mtu}} + ptf_extra_options: "--relax --debug info --log-file /tmp/fib_test.FibTest.ipv4.{{ipv4}}.ipv6.{{ipv6}}.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log --socket-recv-size 16384" From c5846e11c7898aefa6e03cc9fddca8b882cbaf45 Mon Sep 17 00:00:00 2001 From: Prince Sunny Date: Wed, 19 Jun 2019 13:43:43 -0700 Subject: [PATCH 035/218] Fix testbed_mtu for tasks that invoke fib_test (#964) * Fix testbed_mtu for tasks that invoke fib_test * Set socket buffer size to 16k --- ansible/roles/test/tasks/bgp_speaker.yml | 3 ++- ansible/roles/test/tasks/warm-reboot-fib.yml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/tasks/bgp_speaker.yml b/ansible/roles/test/tasks/bgp_speaker.yml index 2bb8e087fd0..86cf8581740 100644 --- a/ansible/roles/test/tasks/bgp_speaker.yml +++ b/ansible/roles/test/tasks/bgp_speaker.yml @@ -198,7 +198,8 @@ - fib_info='/root/bgp_speaker_route.txt' - ipv4={{ ipv4|default(false) }} - ipv6={{ ipv6|default(false) }} - ptf_extra_options: "--relax --debug info --log-file /tmp/bgp_speaker_test.FibTest.log" + - testbed_mtu={{ mtu|default(9114) }} + ptf_extra_options: "--relax --debug info --log-file /tmp/bgp_speaker_test.FibTest.log --socket-recv-size 16384" always: - name: Kill exabgp instances shell: pkill exabgp diff --git a/ansible/roles/test/tasks/warm-reboot-fib.yml b/ansible/roles/test/tasks/warm-reboot-fib.yml index 3f1a2c5ac15..8bb93dec474 100644 --- a/ansible/roles/test/tasks/warm-reboot-fib.yml +++ b/ansible/roles/test/tasks/warm-reboot-fib.yml @@ -19,7 +19,8 @@ - fib_info='/root/fib_info.txt' - ipv4={{ipv4}} - ipv6={{ipv6}} - ptf_extra_options: "--relax --debug info --log-file /tmp/fib_test.FibTest.ipv4.{{ipv4}}.ipv6.{{ipv6}}.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log " + - testbed_mtu={{ mtu|default(9114) }} + ptf_extra_options: "--relax --debug info --log-file /tmp/fib_test.FibTest.ipv4.{{ipv4}}.ipv6.{{ipv6}}.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log --socket-recv-size 16384" - name: "Show ptf params PTF Test - {{ ptf_test_name }}" debug: msg="ptf --test-dir {{ ptf_test_dir }} {{ ptf_test_path }} {% if ptf_qlen is defined %} --qlen={{ ptf_qlen }} {% endif %} {% if ptf_platform_dir is defined %} --platform-dir {{ ptf_platform_dir }} {% endif %} --platform {{ ptf_platform }} {% if ptf_test_params is defined %} -t \"{{ ptf_test_params | default([]) | join(';') }}\" {% endif %} {{ ptf_extra_options | default(\"\")}} --disable-vxlan --disable-geneve --disable-erspan --disable-mpls --disable-nvgre 2>&1" From af43f2252922e376f7d1c462c34d921adcbab3b6 Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Fri, 21 Jun 2019 18:15:28 -0700 Subject: [PATCH 036/218] [fast/warm reboot] improve fast/warm reboot handling code (#973) - Improve the data test warm up code: Let the data plane IO stablize for 30 seconds before testing. We observed ptf instability causing the test to fail. - Remove config_db.json when fast-reboot into a new image. We want the new image to reload minigraph in this case. Signed-off-by: Ying Xie --- .../test/files/ptftests/advanced-reboot.py | 17 +++++++++++++---- .../advanced_reboot/reboot-image-handle.yml | 4 ++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 2bc7cb777e3..9e05c0cf09c 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -140,8 +140,8 @@ def __init__(self): self.check_param('lo_v6_prefix', 'fc00:1::/64', required=False) self.check_param('arista_vms', [], required=True) self.check_param('min_bgp_gr_timeout', 15, required=False) - self.check_param('warm_up_timeout_secs', 180, required=False) - self.check_param('dut_stabilize_secs', 20, required=False) + self.check_param('warm_up_timeout_secs', 300, required=False) + self.check_param('dut_stabilize_secs', 30, required=False) self.check_param('preboot_files', None, required = False) self.check_param('preboot_oper', None, required = False) if not self.test_params['preboot_oper'] or self.test_params['preboot_oper'] == 'None': @@ -1109,14 +1109,23 @@ def wait_dut_to_warm_up(self): warm_up_timeout_secs = int(self.test_params['warm_up_timeout_secs']) start_time = datetime.datetime.now() + up_time = None # First wait until DUT data/control planes are up while True: dataplane = self.asic_state.get() ctrlplane = self.cpu_state.get() elapsed = (datetime.datetime.now() - start_time).total_seconds() - if dataplane == 'up' and ctrlplane == 'up' and elapsed > dut_stabilize_secs: - break; + if dataplane == 'up' and ctrlplane == 'up': + if not up_time: + up_time = datetime.datetime.now() + up_secs = (datetime.datetime.now() - up_time).total_seconds() + if up_secs > dut_stabilize_secs: + break; + else: + # reset up_time + up_time = None + if elapsed > warm_up_timeout_secs: raise Exception("Control plane didn't come up within warm up timeout") time.sleep(1) diff --git a/ansible/roles/test/tasks/advanced_reboot/reboot-image-handle.yml b/ansible/roles/test/tasks/advanced_reboot/reboot-image-handle.yml index ed5f5d8a99c..62fd8334c3d 100755 --- a/ansible/roles/test/tasks/advanced_reboot/reboot-image-handle.yml +++ b/ansible/roles/test/tasks/advanced_reboot/reboot-image-handle.yml @@ -47,3 +47,7 @@ - name: Installing new SONiC image shell: sonic_installer install -y {{ new_image_location }} become: true + + - name: Remove config_db.json so the new image will reload minigraph + file: path=/host/old_config/config_db.json state=absent + become: true From 025376acb8e33f8a32434a83ceae9a404b92d37a Mon Sep 17 00:00:00 2001 From: Praveen Chaudhary Date: Wed, 26 Jun 2019 16:05:33 -0700 Subject: [PATCH 037/218] [fdb_mac_expire.yml]: FDB MAC Expire test case. (#785) * [fdb_mac_expire.yml]: FDB MAC Expire test case. [fdb_mac_expire_test.py]: PTF helper to add Mac in L2 table. [fdb.yml]: include fdb_mac_expire.yml. This test case verifies that MAC expires within 10 mins if traffic is not flowing using it. Signed-off-by: Praveen Chaudhary * [fdb_mac_expire.yml]: FDB MAC Expire test case. [fdb_mac_expire_test.py]: PTF helper to add Mac in L2 table. [testcases.yml]: include fdb_mac_expire.yml. This test case verifies that MAC expires within 10 mins if traffic is not flowing using it. Signed-off-by: Praveen Chaudhary * [fdb_mac_expire.yml]: Incorporate swssconfig step to set fdb_aging_timer in fdb_mac_expire.yml Signed-off-by: Praveen Chaudhary * [fdb_mac_expire.yml]: minor changes in logs Signed-off-by: Praveen Chaudhary * [fdb_mac_expire.yml]: minor log changes to show time correctly. Example: "MAC Entires are Cleared within 100 secs." instead of "MAC Entires are Cleared within 2*50 secs." Signed-off-by: Praveen Chaudhary * [fdb_mac_expire.yml]: Address review comments related to sonic-clear, -it option and block-always. Signed-off-by: Praveen Chaudhary * [fdb_mac_expire.yml]: Change "sonic-clear fdb all" to "Clear FDB table". Signed-off-by: Praveen Chaudhary --- .../files/ptftests/fdb_mac_expire_test.py | 37 ++++++ ansible/roles/test/tasks/fdb_mac_expire.yml | 115 ++++++++++++++++++ ansible/roles/test/vars/testcases.yml | 8 ++ 3 files changed, 160 insertions(+) create mode 100644 ansible/roles/test/files/ptftests/fdb_mac_expire_test.py create mode 100644 ansible/roles/test/tasks/fdb_mac_expire.yml diff --git a/ansible/roles/test/files/ptftests/fdb_mac_expire_test.py b/ansible/roles/test/files/ptftests/fdb_mac_expire_test.py new file mode 100644 index 00000000000..7bdbf42eb69 --- /dev/null +++ b/ansible/roles/test/files/ptftests/fdb_mac_expire_test.py @@ -0,0 +1,37 @@ +import fdb + +import ptf +import ptf.packet as scapy +import ptf.dataplane as dataplane + +from ptf import config +from ptf.base_tests import BaseTest +from ptf.testutils import * + +class FdbMacExpireTest(BaseTest): + def __init__(self): + BaseTest.__init__(self) + self.test_params = test_params_get() + #-------------------------------------------------------------------------- + def setUp(self): + self.dataplane = ptf.dataplane_instance + self.router_mac = self.test_params['router_mac'] + self.dummy_mac_prefix = self.test_params['dummy_mac_prefix'] + self.fdb_info = self.test_params['fdb_info'] + #-------------------------------------------------------------------------- + def populateFdb(self): + self.fdb = fdb.Fdb(self.fdb_info) + vlan_table = self.fdb.get_vlan_table() + for vlan in vlan_table: + for member in vlan_table[vlan]: + mac = self.dummy_mac_prefix + ":" + "{:02X}".format(member) + # Send a packet to switch to populate the layer 2 table + pkt = simple_eth_packet(eth_dst=self.router_mac, + eth_src=mac, + eth_type=0x1234) + send(self, member, pkt) + #-------------------------------------------------------------------------- + def runTest(self): + self.populateFdb() + return + #-------------------------------------------------------------------------- diff --git a/ansible/roles/test/tasks/fdb_mac_expire.yml b/ansible/roles/test/tasks/fdb_mac_expire.yml new file mode 100644 index 00000000000..56d3a070e67 --- /dev/null +++ b/ansible/roles/test/tasks/fdb_mac_expire.yml @@ -0,0 +1,115 @@ +- fail: msg="testbed_type is not defined" + when: testbed_type is not defined + +- fail: msg="testbed_type {{test_type}} is invalid" + when: testbed_type not in ['t0', 't0-64', 't0-64-32', 't0-116'] + +- name: set fdb_aging_time to default if no user input + set_fact: + fdb_aging_time: 60 + when: fdb_aging_time is not defined + +- include_vars: "vars/topo_{{testbed_type}}.yml" + +- name: Gather minigraph facts about the device + minigraph_facts: host={{inventory_hostname}} + +- name: Copy tests to PTF + copy: src=roles/test/files/ptftests dest=/root + delegate_to: "{{ptf_host}}" + +- name: Copy FDB information file to PTF + template: src=roles/test/templates/fdb.j2 dest=/root/fdb_info.txt + delegate_to: "{{ ptf_host }}" + +- name: Clear FDB table + shell: sonic-clear fdb all + + # Change the config, populate fdb and observe expire time +- block: + - name: copy current switch.json from docker to host + shell: docker cp swss:/etc/swss/config.d/switch.json . + + - name: set fdb value to "{{fdb_aging_time}}" + replace: + dest: switch.json + regexp: '"fdb_aging_time": ".*"' + replace: '"fdb_aging_time": "{{fdb_aging_time}}"' + become: true + + - name: copy current switch.json from host to docker + shell: docker cp switch.json swss:/etc/swss/config.d/switch.json + + - name: run swssconfig switch.json command in container swss + shell: docker exec swss bash -c "swssconfig /etc/swss/config.d/switch.json" + + - name: set dummy mac prefix to look for in mac table + set_fact: dummy_mac_prefix="00:11:22:33:44" + + - name: check entries in mac table before adding dummy mac + shell: show mac | grep {{dummy_mac_prefix}} | wc -l + register: show_mac_output + failed_when: "show_mac_output.stdout|int > 0" + + - debug: msg="{{show_mac_output.stdout}}" + + - name: "Start PTF runner" + include: ptf_runner.yml + vars: + ptf_test_name: FDB Mac Expire test + ptf_test_dir: ptftests + ptf_test_path: fdb_mac_expire_test.FdbMacExpireTest + ptf_platform: remote + ptf_platform_dir: ptftests + ptf_test_params: + - testbed_type='{{testbed_type}}' + - router_mac='{{ansible_Ethernet0['macaddress']}}' + - fdb_info='/root/fdb_info.txt' + - dummy_mac_prefix='{{dummy_mac_prefix}}' + ptf_extra_options: "--relax --debug info --log-file /tmp/fdb_mac_expire_test.FdbMacExpireTest.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log " + + - name: check entries in mac table after adding dummy mac + shell: show mac | grep {{dummy_mac_prefix}} | wc -l + register: show_mac_output + failed_when: "show_mac_output.stdout|int == 0" + + - debug: msg="{{show_mac_output}}" + + - name: wait for "{{fdb_aging_time}}" secs + pause: seconds="{{fdb_aging_time}}" + + - name: check entries in mac table after wait + shell: show mac | grep {{dummy_mac_prefix}} | wc -l + register: show_mac_after_wait + + - debug: msg="{{show_mac_after_wait}}" + + - name: set extra wait time period + set_fact: + extra_retries: "{{fdb_aging_time|int / 15 + 1}}" + + - debug: msg="{{extra_retries}}" + + # wait in slot of 15 secs to find when MAC expires + - block: + - name: check in mac table after "{{fdb_aging_time}}" secs to find exact time + shell: show mac | grep {{dummy_mac_prefix}} | wc -l + register: show_mac_after_more_wait + until: "show_mac_after_more_wait.stdout|int == 0" + retries: "{{extra_retries|int}}" + delay: 15 + + - fail: + msg: "MAC Entires are not cleaned even after {{2*fdb_aging_time|int}} secs" + when: "show_mac_after_more_wait.stdout|int > 0" + + - debug: msg="MAC Entires are Cleared within {{2*fdb_aging_time|int}} secs." + when: "show_mac_after_wait|int > 0" + + - debug: msg="MAC Entires are Cleared {{fdb_aging_time}} secs." + when: "show_mac_after_wait|int == 0" + + always: + - name: Clear FDB table + shell: sonic-clear fdb all + diff --git a/ansible/roles/test/vars/testcases.yml b/ansible/roles/test/vars/testcases.yml index 32986574095..4e470062860 100644 --- a/ansible/roles/test/vars/testcases.yml +++ b/ansible/roles/test/vars/testcases.yml @@ -112,6 +112,14 @@ testcases: ptf_host: testbed_type: + fdb_mac_expire: + filename: fdb_mac_expire.yml + topologies: [t0, t0-64, t0-64-32, t0-116] + required_vars: + fdb_aging_time: + ptf_host: + testbed_type: + dir_bcast: filename: dir_bcast.yml topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116] From e7dd7cc6b74041d30ad1f2c0c9b511462a14d95f Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Thu, 27 Jun 2019 10:28:46 -0700 Subject: [PATCH 038/218] [fast reboot] allow test to replace fast-reboot script on the DUT before rebooting (#975) - fast-reboot script is an adapted version from 201811 branch. The change is around syncd stop: in 201803 branch, if it is Broadcom platform, request syncd to perform cold shutdown. - Mellanox 201803 branch has a vlan FDB issue causing all vlan IO to flood. Add a knob allow_vlan_flooding to ignore this symptom and continue with fast-reboot. Signed-off-by: Ying Xie --- .../test/files/ptftests/advanced-reboot.py | 5 + ansible/roles/test/files/reboot/fast-reboot | 484 ++++++++++++++++++ ansible/roles/test/tasks/advanced-reboot.yml | 13 +- .../roles/test/tasks/ptf_runner_reboot.yml | 1 + 4 files changed, 501 insertions(+), 2 deletions(-) create mode 100755 ansible/roles/test/files/reboot/fast-reboot diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 9e05c0cf09c..71943b390d0 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -144,6 +144,7 @@ def __init__(self): self.check_param('dut_stabilize_secs', 30, required=False) self.check_param('preboot_files', None, required = False) self.check_param('preboot_oper', None, required = False) + self.check_param('allow_vlan_flooding', False, required = False) if not self.test_params['preboot_oper'] or self.test_params['preboot_oper'] == 'None': self.test_params['preboot_oper'] = None @@ -191,6 +192,8 @@ def __init__(self): # second is the fast send_in_background self.dataplane_io_lock = threading.Lock() + self.allow_vlan_flooding = bool(self.test_params['allow_vlan_flooding']) + return def read_json(self, name): @@ -1139,6 +1142,8 @@ def wait_dut_to_warm_up(self): if not self.asic_state.is_flooding() and elapsed > dut_stabilize_secs: break if elapsed > warm_up_timeout_secs: + if self.allow_vlan_flooding: + break raise Exception("Data plane didn't stop flooding within warm up timeout") time.sleep(1) diff --git a/ansible/roles/test/files/reboot/fast-reboot b/ansible/roles/test/files/reboot/fast-reboot new file mode 100755 index 00000000000..d2383f35dac --- /dev/null +++ b/ansible/roles/test/files/reboot/fast-reboot @@ -0,0 +1,484 @@ +#!/bin/bash -e + +REBOOT_USER=$(logname) +REBOOT_TIME=$(date) +REBOOT_CAUSE_FILE="/host/reboot-cause/reboot-cause.txt" +WARM_DIR=/host/warmboot +REDIS_FILE=dump.rdb +REBOOT_SCRIPT_NAME=$(basename $0) +REBOOT_TYPE="${REBOOT_SCRIPT_NAME}" +VERBOSE=no +FORCE=no +REBOOT_METHOD="/sbin/kexec -e" +ASSISTANT_IP_LIST="" +ASSISTANT_SCRIPT="/usr/bin/neighbor_advertiser" + +# Require 100M available on the hard drive for warm reboot temp files, +# Size is in 1K blocks: +MIN_HD_SPACE_NEEDED=100000 + +EXIT_SUCCESS=0 +EXIT_FAILURE=1 +EXIT_NOT_SUPPORTED=2 +EXIT_FILE_SYSTEM_FULL=3 +EXIT_NEXT_IMAGE_NOT_EXISTS=4 +EXIT_ORCHAGENT_SHUTDOWN=10 +EXIT_SYNCD_SHUTDOWN=11 +EXIT_FAST_REBOOT_DUMP_FAILURE=12 + +function error() +{ + echo $@ >&2 +} + +function debug() +{ + if [[ x"${VERBOSE}" == x"yes" ]]; then + echo `date` $@ + fi + logger "$@" +} + +function showHelpAndExit() +{ + echo "Usage: ${REBOOT_SCRIPT_NAME} [options]" + echo " -h,-? : get this help" + echo " -v : turn on verbose" + echo " -f : force execution" + echo " -r : reboot with /sbin/reboot" + echo " -k : reboot with /sbin/kexec -e [default]" + echo " -x : execute script with -x flag" + echo " -c : specify control plane assistant IP list" + + exit "${EXIT_SUCCESS}" +} + +function parseOptions() +{ + while getopts "vfh?rkxc:" opt; do + case ${opt} in + h|\? ) + showHelpAndExit + ;; + v ) + VERBOSE=yes + ;; + f ) + FORCE=yes + ;; + r ) + REBOOT_METHOD="/sbin/reboot" + ;; + k ) + REBOOT_METHOD="/sbin/kexec -e" + ;; + x ) + set -x + ;; + c ) + ASSISTANT_IP_LIST=${OPTARG} + ;; + esac + done +} + +function clear_fast_boot() +{ + debug "${REBOOT_TYPE} failure ($?) cleanup ..." + + /sbin/kexec -u || /bin/true + + teardown_control_plane_assistant +} + +function clear_warm_boot() +{ + clear_fast_boot + + result=`timeout 10s config warm_restart disable; if [[ $? == 124 ]]; then echo timeout; else echo "code ($?)"; fi` || /bin/true + debug "Cancel warm-reboot: ${result}" + + TIMESTAMP=`date +%Y%m%d-%H%M%S` + if [[ -f ${WARM_DIR}/${REDIS_FILE} ]]; then + mv -f ${WARM_DIR}/${REDIS_FILE} ${WARM_DIR}/${REDIS_FILE}.${TIMESTAMP} || /bin/true + fi +} + +function init_warm_reboot_states() +{ + # If the current running instanace was booted up with warm reboot. Then + # the current DB contents will likely mark warm reboot is done. + # Clear these states so that the next boot up image won't get confused. + if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; then + redis-cli -n 6 eval " + for _, key in ipairs(redis.call('keys', 'WARM_RESTART_TABLE|*')) do + redis.call('hdel', key, 'state') + end + " 0 >/dev/null + fi +} + +function initialize_pre_shutdown() +{ + debug "Initialize pre-shutdown ..." + TABLE="WARM_RESTART_TABLE|warm-shutdown" + RESTORE_COUNT=`/usr/bin/redis-cli -n 6 hget "${TABLE}" restore_count` + if [[ -z "$RESTORE_COUNT" ]]; then + /usr/bin/redis-cli -n 6 hset "${TABLE}" "restore_count" "0" > /dev/null + fi + /usr/bin/redis-cli -n 6 hset "${TABLE}" "state" "requesting" > /dev/null +} + +function request_pre_shutdown() +{ + debug "Requesting pre-shutdown ..." + /usr/bin/docker exec -i syncd /usr/bin/syncd_request_shutdown --pre &> /dev/null || { + error "Failed to request pre-shutdown" + exit "${EXIT_SYNCD_SHUTDOWN}" + } +} + +function wait_for_pre_shutdown_complete_or_fail() +{ + debug "Waiting for pre-shutdown ..." + TABLE="WARM_RESTART_TABLE|warm-shutdown" + STATE="requesting" + declare -i waitcount + declare -i retrycount + waitcount=0 + retrycount=0 + # Wait up to 60 seconds for pre-shutdown to complete + while [[ ${waitcount} -lt 600 ]]; do + # timeout doesn't work with -i option of "docker exec". Therefore we have + # to invoke docker exec directly below. + STATE=`timeout 5s docker exec database redis-cli -n 6 hget "${TABLE}" state; if [[ $? == 124 ]]; then echo "timed out"; fi` + + if [[ x"${STATE}" == x"timed out" ]]; then + waitcount+=50 + retrycount+=1 + debug "Timed out getting pre-shutdown state (${waitcount}) retry count ${retrycount} ..." + if [[ retrycount -gt 2 ]]; then + break + fi + elif [[ x"${STATE}" != x"requesting" ]]; then + break + else + sleep 0.1 + waitcount+=1 + fi + done + + if [[ x"${STATE}" != x"pre-shutdown-succeeded" ]]; then + debug "Syncd pre-shutdown failed: ${STATE} ..." + exit "${EXIT_SYNCD_SHUTDOWN}" + fi + debug "Pre-shutdown succeeded ..." +} + +function backup_database() +{ + debug "Backing up database ..." + # Dump redis content to a file 'dump.rdb' in warmboot directory + mkdir -p $WARM_DIR + # Delete keys in stateDB except FDB_TABLE|* and WARM_RESTA* + redis-cli -n 6 eval " + for _, k in ipairs(redis.call('keys', '*')) do + if not string.match(k, 'FDB_TABLE|') and not string.match(k, 'WARM_RESTART_TABLE|') \ + and not string.match(k, 'WARM_RESTART_ENABLE_TABLE|') then + redis.call('del', k) + end + end + " 0 > /dev/null + redis-cli save > /dev/null + docker cp database:/var/lib/redis/$REDIS_FILE $WARM_DIR + docker exec -i database rm /var/lib/redis/$REDIS_FILE +} + +function setup_control_plane_assistant() +{ + if [[ -n "${ASSISTANT_IP_LIST}" && -x ${ASSISTANT_SCRIPT} ]]; then + debug "Setting up control plane assistant: ${ASSISTANT_IP_LIST} ..." + ${ASSISTANT_SCRIPT} -s ${ASSISTANT_IP_LIST} -m set + fi +} + +function teardown_control_plane_assistant() +{ + if [[ -n "${ASSISTANT_IP_LIST}" && -x ${ASSISTANT_SCRIPT} ]]; then + debug "Tearing down control plane assistant: ${ASSISTANT_IP_LIST} ..." + ${ASSISTANT_SCRIPT} -s ${ASSISTANT_IP_LIST} -m reset + fi +} + +function setup_reboot_variables() +{ + # Kernel and initrd image + CURRENT_SONIC_IMAGE=$(sonic_installer list | grep "Current: " | cut -d ' ' -f 2) + NEXT_SONIC_IMAGE=$(sonic_installer list | grep "Next: " | cut -d ' ' -f 2) + IMAGE_PATH="/host/image-${NEXT_SONIC_IMAGE#SONiC-OS-}" + if grep -q aboot_platform= /host/machine.conf; then + KERNEL_IMAGE="$(ls $IMAGE_PATH/boot/vmlinuz-*)" + BOOT_OPTIONS="$(cat "$IMAGE_PATH/kernel-cmdline" | tr '\n' ' ') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" + elif grep -q onie_platform= /host/machine.conf; then + KERNEL_OPTIONS=$(cat /host/grub/grub.cfg | sed "/$NEXT_SONIC_IMAGE'/,/}/"'!'"g" | grep linux) + KERNEL_IMAGE="/host$(echo $KERNEL_OPTIONS | cut -d ' ' -f 2)" + BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" + else + error "Unknown bootloader. ${REBOOT_TYPE} is not supported." + exit "${EXIT_NOT_SUPPORTED}" + fi + INITRD=$(echo $KERNEL_IMAGE | sed 's/vmlinuz/initrd.img/g') +} + +function reboot_pre_check() +{ + # Make sure that the file system is normal: read-write able + filename="/host/test-`date +%Y%m%d-%H%M%S`" + if [[ ! -f ${filename} ]]; then + touch ${filename} + fi + rm ${filename} + + # Make sure /host has enough space for warm reboot temp files + avail=$(df -k /host | tail -1 | awk '{ print $4 }') + if [[ ${avail} -lt ${MIN_HD_SPACE_NEEDED} ]]; then + debug "/host has ${avail}K bytes available, not enough for warm reboot." + exit ${EXIT_FILE_SYSTEM_FULL} + fi + + # Make sure that the next image exists + if [[ ! -d ${IMAGE_PATH} ]]; then + debug "Next image ${NEXT_SONIC_IMAGE} doesn't exist ..." + exit ${EXIT_NEXT_IMAGE_NOT_EXISTS} + fi +} + +function unload_kernel() +{ + # Unload the previously loaded kernel if any loaded + if [[ "$(cat /sys/kernel/kexec_loaded)" -eq 1 ]]; then + /sbin/kexec -u + fi +} + +# main starts here +parseOptions $@ + +# Check root privileges +if [[ "$EUID" -ne 0 ]] +then + echo "This command must be run as root" >&2 + exit "${EXIT_FAILURE}" +fi + +sonic_asic_type=$(sonic-cfggen -y /etc/sonic/sonic_version.yml -v asic_type) + +# Check reboot type supported +BOOT_TYPE_ARG="cold" +case "$REBOOT_TYPE" in + "fast-reboot") + BOOT_TYPE_ARG=$REBOOT_TYPE + trap clear_fast_boot EXIT HUP INT QUIT TERM KILL ABRT ALRM + ;; + "warm-reboot") + if [[ "$sonic_asic_type" == "mellanox" ]]; then + REBOOT_TYPE="fastfast-reboot" + BOOT_TYPE_ARG="fastfast" + # source mlnx-ffb.sh file with + # functions to check ISSU upgrade possibility + source mlnx-ffb.sh + else + BOOT_TYPE_ARG="warm" + fi + trap clear_warm_boot EXIT HUP INT QUIT TERM KILL ABRT ALRM + config warm_restart enable system + ;; + *) + error "Not supported reboot type: $REBOOT_TYPE" + exit "${EXIT_NOT_SUPPORTED}" + ;; +esac + +# Stopping all SLB neighbors if they're presented +PASSIVE_BGP_NEIGHBORS=$(sonic-cfggen -m /etc/sonic/minigraph.xml -v "BGP_PEER_RANGE | list") +case "$PASSIVE_BGP_NEIGHBORS" in + *BGPSLBPassive*) + ASN=$(sonic-cfggen -m /etc/sonic/minigraph.xml -v "DEVICE_METADATA['localhost']['bgp_asn']") + SLBNAME=$(sonic-cfggen -m /etc/sonic/minigraph.xml -v "BGP_PEER_RANGE['BGPSLBPassive']['name']") + vtysh -e "configure terminal" -e "router bgp ${ASN}" -e "neighbor ${SLBNAME} shutdown" + sleep 1 + ;; + *) + ;; +esac + +unload_kernel + +setup_reboot_variables + +reboot_pre_check + +# Install new FW for mellanox platforms before control plane goes down +# So on boot switch will not spend time to upgrade FW increasing the CP downtime +if [[ "$sonic_asic_type" == "mellanox" ]]; then + MLNX_EXIT_SUCCESS=0 + MLNX_EXIT_FW_ERROR=100 + MLNX_EXIT_FFB_FAILURE=101 + + MLNX_FW_UPGRADE_SCRIPT="/usr/bin/mlnx-fw-upgrade.sh" + + + if [[ "$REBOOT_TYPE" = "fastfast-reboot" ]]; then + check_ffb || { + error "Warm reboot is not supported" + exit "${MLNX_EXIT_FFB_FAILURE}" + } + fi + + debug "Prepare MLNX ASIC to ${REBOOT_TYPE}: install new FW if required" + + ${MLNX_FW_UPGRADE_SCRIPT} --upgrade + MLNX_EXIT_CODE="$?" + if [[ "${MLNX_EXIT_CODE}" != "${MLNX_EXIT_SUCCESS}" ]]; then + error "Failed to burn MLNX FW: errno=${MLNX_EXIT_CODE}" + exit "${MLNX_EXIT_FW_ERROR}" + fi +fi + +# Load kernel into the memory +/sbin/kexec -l "$KERNEL_IMAGE" --initrd="$INITRD" --append="$BOOT_OPTIONS" + +if [[ "$REBOOT_TYPE" = "fast-reboot" ]]; then + # Dump the ARP and FDB tables to files also as default routes for both IPv4 and IPv6 + # into /host/fast-reboot + mkdir -p /host/fast-reboot + FAST_REBOOT_DUMP_RC=0 + /usr/bin/fast-reboot-dump.py -t /host/fast-reboot || FAST_REBOOT_DUMP_RC=$? + if [[ FAST_REBOOT_DUMP_RC -ne 0 ]]; then + error "Failed to run fast-reboot-dump.py. Exit code: $FAST_REBOOT_DUMP_RC" + unload_kernel + exit "${EXIT_FAST_REBOOT_DUMP_FAILURE}" + fi +fi + +init_warm_reboot_states + +setup_control_plane_assistant + +if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; then + # Freeze orchagent for warm restart + # Ask orchagent_restart_check to try freeze 5 times with interval of 2 seconds, + # it is possible that the orchagent is in transient state and no opportunity to be freezed + # Note: assume that 2*5 seconds is enough for orchagent to process the request and respone freeze or not + debug "Pausing orchagent ..." + docker exec -i swss /usr/bin/orchagent_restart_check -w 2000 -r 5 > /dev/null || RESTARTCHECK_RC=$? + if [[ RESTARTCHECK_RC -ne 0 ]]; then + error "RESTARTCHECK failed" + if [[ x"${FORCE}" == x"yes" ]]; then + debug "Ignoring orchagent pausing failure ..." + else + exit "${EXIT_ORCHAGENT_SHUTDOWN}" + fi + fi +fi + +# Kill bgpd to start the bgp graceful restart procedure +debug "Stopping bgp ..." +docker exec -i bgp pkill -9 zebra +docker exec -i bgp pkill -9 bgpd || [ $? == 1 ] +debug "Stopped bgp ..." + +# Kill lldp, otherwise it sends informotion about reboot +docker kill lldp > /dev/null + +if [[ "$REBOOT_TYPE" = "fast-reboot" ]]; then + # Kill teamd, otherwise it gets down all LAGs + # Note: teamd must be killed before syncd, because it will send the last packet through CPU port + # TODO: stop teamd gracefully to allow teamd to send last valid update to be sure we'll have 90 seconds reboot time + docker kill teamd > /dev/null +fi + +# Kill swss dockers +docker kill swss > /dev/null + +# Pre-shutdown syncd +if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; then + initialize_pre_shutdown + + request_pre_shutdown + + wait_for_pre_shutdown_complete_or_fail + + # Warm reboot: dump state to host disk + if [[ "$REBOOT_TYPE" = "fastfast-reboot" ]]; then + redis-cli -n 1 FLUSHDB > /dev/null + redis-cli -n 2 FLUSHDB > /dev/null + redis-cli -n 5 FLUSHDB > /dev/null + fi + + # TODO: backup_database preserves FDB_TABLE + # need to cleanup as well for fastfast boot case + backup_database +fi + +# Stop teamd gracefully +if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; then + debug "Stopping teamd ..." + # Send USR1 signal to all teamd instances to stop them + # It will prepare teamd for warm-reboot + # Note: We must send USR1 signal before syncd, because it will send the last packet through CPU port + docker exec -i teamd pkill -USR1 teamd || [ $? == 1 ] > /dev/null + debug "Stopped teamd ..." +fi + +if [[ "$sonic_asic_type" = 'broadcom' ]]; then + debug "Stopping syncd on ${CURRENT_SONIC_IMAGE} ..." + if [[ ${CURRENT_SONIC_IMAGE} =~ "20180330" ]]; then + # Gracefully stop syncd + docker exec -i syncd /usr/bin/syncd_request_shutdown --cold > /dev/null + + # Check that syncd was stopped + while docker top syncd | grep -q /usr/bin/syncd + do + sleep 0.1 + done + else + systemctl stop syncd + fi + debug "Stopped syncd ..." +fi + +# Kill other containers to make the reboot faster +docker ps -q | xargs docker kill > /dev/null + +# Stop the docker container engine. Otherwise we will have a broken docker storage +systemctl stop docker.service + +# Stop kernel modules for Nephos platform +if [[ "$sonic_asic_type" = 'nephos' ]]; +then + systemctl stop nps-modules-`uname -r`.service +fi + +# Update the reboot cause file to reflect that user issued this script +# Upon next boot, the contents of this file will be used to determine the +# cause of the previous reboot +echo "User issued '${REBOOT_SCRIPT_NAME}' command [User: ${REBOOT_USER}, Time: ${REBOOT_TIME}]" > ${REBOOT_CAUSE_FILE} + +# Wait until all buffers synced with disk +sync +sleep 1 +sync + +# sync the current system time to CMOS +if [ -x /sbin/hwclock ]; then + /sbin/hwclock -w || /bin/true +fi + +# Reboot: explicity call Linux native reboot under sbin +debug "Rebooting with ${REBOOT_METHOD} to ${NEXT_SONIC_IMAGE} ..." +exec ${REBOOT_METHOD} + +# Should never reach here +error "${REBOOT_TYPE} failed!" +exit "${EXIT_FAILURE}" diff --git a/ansible/roles/test/tasks/advanced-reboot.yml b/ansible/roles/test/tasks/advanced-reboot.yml index 73ab543de9b..1b9ea4d636a 100644 --- a/ansible/roles/test/tasks/advanced-reboot.yml +++ b/ansible/roles/test/tasks/advanced-reboot.yml @@ -141,8 +141,17 @@ when: new_sonic_image is defined - set_fact: - stay_in_target_image: "{{ stay_in_target_image | default('false') | bool }}" - cleanup_old_sonic_images: "{{ cleanup_old_sonic_images | default('false') | bool }}" + stay_in_target_image: "{{ stay_in_target_image | default('false') | bool }}" + cleanup_old_sonic_images: "{{ cleanup_old_sonic_images | default('false') | bool }}" + replace_fast_reboot_script: "{{ replace_fast_reboot_script | default('false') | bool }}" + allow_vlan_flooding: "{{ allow_vlan_flooding | default('false') | bool }}" + + - name: msg="Replace fast-reboot script on the DUT" + copy: + src: roles/test/files/reboot/fast-reboot + dest: /usr/bin/fast-reboot + become: yes + when: new_sonic_image is defined and replace_fast_reboot_script - include: advanced_reboot/reboot-image-handle.yml when: new_sonic_image is defined diff --git a/ansible/roles/test/tasks/ptf_runner_reboot.yml b/ansible/roles/test/tasks/ptf_runner_reboot.yml index cfe55d66424..09ac5a61c94 100644 --- a/ansible/roles/test/tasks/ptf_runner_reboot.yml +++ b/ansible/roles/test/tasks/ptf_runner_reboot.yml @@ -24,6 +24,7 @@ - arista_vms=\"['{{ vm_hosts | list | join("','") }}']\" - preboot_files='{{ preboot_files }}' - preboot_oper='{{ item }}' + - allow_vlan_flooding='{{ allow_vlan_flooding }}' always: From e52117b96728d09037679d66a593623235acc35a Mon Sep 17 00:00:00 2001 From: neethajohn <48968228+neethajohn@users.noreply.github.com> Date: Thu, 27 Jun 2019 15:59:53 -0700 Subject: [PATCH 039/218] Remove fast-reboot script and related changes (#982) --- ansible/roles/test/files/reboot/fast-reboot | 484 ------------------- ansible/roles/test/tasks/advanced-reboot.yml | 8 - 2 files changed, 492 deletions(-) delete mode 100755 ansible/roles/test/files/reboot/fast-reboot diff --git a/ansible/roles/test/files/reboot/fast-reboot b/ansible/roles/test/files/reboot/fast-reboot deleted file mode 100755 index d2383f35dac..00000000000 --- a/ansible/roles/test/files/reboot/fast-reboot +++ /dev/null @@ -1,484 +0,0 @@ -#!/bin/bash -e - -REBOOT_USER=$(logname) -REBOOT_TIME=$(date) -REBOOT_CAUSE_FILE="/host/reboot-cause/reboot-cause.txt" -WARM_DIR=/host/warmboot -REDIS_FILE=dump.rdb -REBOOT_SCRIPT_NAME=$(basename $0) -REBOOT_TYPE="${REBOOT_SCRIPT_NAME}" -VERBOSE=no -FORCE=no -REBOOT_METHOD="/sbin/kexec -e" -ASSISTANT_IP_LIST="" -ASSISTANT_SCRIPT="/usr/bin/neighbor_advertiser" - -# Require 100M available on the hard drive for warm reboot temp files, -# Size is in 1K blocks: -MIN_HD_SPACE_NEEDED=100000 - -EXIT_SUCCESS=0 -EXIT_FAILURE=1 -EXIT_NOT_SUPPORTED=2 -EXIT_FILE_SYSTEM_FULL=3 -EXIT_NEXT_IMAGE_NOT_EXISTS=4 -EXIT_ORCHAGENT_SHUTDOWN=10 -EXIT_SYNCD_SHUTDOWN=11 -EXIT_FAST_REBOOT_DUMP_FAILURE=12 - -function error() -{ - echo $@ >&2 -} - -function debug() -{ - if [[ x"${VERBOSE}" == x"yes" ]]; then - echo `date` $@ - fi - logger "$@" -} - -function showHelpAndExit() -{ - echo "Usage: ${REBOOT_SCRIPT_NAME} [options]" - echo " -h,-? : get this help" - echo " -v : turn on verbose" - echo " -f : force execution" - echo " -r : reboot with /sbin/reboot" - echo " -k : reboot with /sbin/kexec -e [default]" - echo " -x : execute script with -x flag" - echo " -c : specify control plane assistant IP list" - - exit "${EXIT_SUCCESS}" -} - -function parseOptions() -{ - while getopts "vfh?rkxc:" opt; do - case ${opt} in - h|\? ) - showHelpAndExit - ;; - v ) - VERBOSE=yes - ;; - f ) - FORCE=yes - ;; - r ) - REBOOT_METHOD="/sbin/reboot" - ;; - k ) - REBOOT_METHOD="/sbin/kexec -e" - ;; - x ) - set -x - ;; - c ) - ASSISTANT_IP_LIST=${OPTARG} - ;; - esac - done -} - -function clear_fast_boot() -{ - debug "${REBOOT_TYPE} failure ($?) cleanup ..." - - /sbin/kexec -u || /bin/true - - teardown_control_plane_assistant -} - -function clear_warm_boot() -{ - clear_fast_boot - - result=`timeout 10s config warm_restart disable; if [[ $? == 124 ]]; then echo timeout; else echo "code ($?)"; fi` || /bin/true - debug "Cancel warm-reboot: ${result}" - - TIMESTAMP=`date +%Y%m%d-%H%M%S` - if [[ -f ${WARM_DIR}/${REDIS_FILE} ]]; then - mv -f ${WARM_DIR}/${REDIS_FILE} ${WARM_DIR}/${REDIS_FILE}.${TIMESTAMP} || /bin/true - fi -} - -function init_warm_reboot_states() -{ - # If the current running instanace was booted up with warm reboot. Then - # the current DB contents will likely mark warm reboot is done. - # Clear these states so that the next boot up image won't get confused. - if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; then - redis-cli -n 6 eval " - for _, key in ipairs(redis.call('keys', 'WARM_RESTART_TABLE|*')) do - redis.call('hdel', key, 'state') - end - " 0 >/dev/null - fi -} - -function initialize_pre_shutdown() -{ - debug "Initialize pre-shutdown ..." - TABLE="WARM_RESTART_TABLE|warm-shutdown" - RESTORE_COUNT=`/usr/bin/redis-cli -n 6 hget "${TABLE}" restore_count` - if [[ -z "$RESTORE_COUNT" ]]; then - /usr/bin/redis-cli -n 6 hset "${TABLE}" "restore_count" "0" > /dev/null - fi - /usr/bin/redis-cli -n 6 hset "${TABLE}" "state" "requesting" > /dev/null -} - -function request_pre_shutdown() -{ - debug "Requesting pre-shutdown ..." - /usr/bin/docker exec -i syncd /usr/bin/syncd_request_shutdown --pre &> /dev/null || { - error "Failed to request pre-shutdown" - exit "${EXIT_SYNCD_SHUTDOWN}" - } -} - -function wait_for_pre_shutdown_complete_or_fail() -{ - debug "Waiting for pre-shutdown ..." - TABLE="WARM_RESTART_TABLE|warm-shutdown" - STATE="requesting" - declare -i waitcount - declare -i retrycount - waitcount=0 - retrycount=0 - # Wait up to 60 seconds for pre-shutdown to complete - while [[ ${waitcount} -lt 600 ]]; do - # timeout doesn't work with -i option of "docker exec". Therefore we have - # to invoke docker exec directly below. - STATE=`timeout 5s docker exec database redis-cli -n 6 hget "${TABLE}" state; if [[ $? == 124 ]]; then echo "timed out"; fi` - - if [[ x"${STATE}" == x"timed out" ]]; then - waitcount+=50 - retrycount+=1 - debug "Timed out getting pre-shutdown state (${waitcount}) retry count ${retrycount} ..." - if [[ retrycount -gt 2 ]]; then - break - fi - elif [[ x"${STATE}" != x"requesting" ]]; then - break - else - sleep 0.1 - waitcount+=1 - fi - done - - if [[ x"${STATE}" != x"pre-shutdown-succeeded" ]]; then - debug "Syncd pre-shutdown failed: ${STATE} ..." - exit "${EXIT_SYNCD_SHUTDOWN}" - fi - debug "Pre-shutdown succeeded ..." -} - -function backup_database() -{ - debug "Backing up database ..." - # Dump redis content to a file 'dump.rdb' in warmboot directory - mkdir -p $WARM_DIR - # Delete keys in stateDB except FDB_TABLE|* and WARM_RESTA* - redis-cli -n 6 eval " - for _, k in ipairs(redis.call('keys', '*')) do - if not string.match(k, 'FDB_TABLE|') and not string.match(k, 'WARM_RESTART_TABLE|') \ - and not string.match(k, 'WARM_RESTART_ENABLE_TABLE|') then - redis.call('del', k) - end - end - " 0 > /dev/null - redis-cli save > /dev/null - docker cp database:/var/lib/redis/$REDIS_FILE $WARM_DIR - docker exec -i database rm /var/lib/redis/$REDIS_FILE -} - -function setup_control_plane_assistant() -{ - if [[ -n "${ASSISTANT_IP_LIST}" && -x ${ASSISTANT_SCRIPT} ]]; then - debug "Setting up control plane assistant: ${ASSISTANT_IP_LIST} ..." - ${ASSISTANT_SCRIPT} -s ${ASSISTANT_IP_LIST} -m set - fi -} - -function teardown_control_plane_assistant() -{ - if [[ -n "${ASSISTANT_IP_LIST}" && -x ${ASSISTANT_SCRIPT} ]]; then - debug "Tearing down control plane assistant: ${ASSISTANT_IP_LIST} ..." - ${ASSISTANT_SCRIPT} -s ${ASSISTANT_IP_LIST} -m reset - fi -} - -function setup_reboot_variables() -{ - # Kernel and initrd image - CURRENT_SONIC_IMAGE=$(sonic_installer list | grep "Current: " | cut -d ' ' -f 2) - NEXT_SONIC_IMAGE=$(sonic_installer list | grep "Next: " | cut -d ' ' -f 2) - IMAGE_PATH="/host/image-${NEXT_SONIC_IMAGE#SONiC-OS-}" - if grep -q aboot_platform= /host/machine.conf; then - KERNEL_IMAGE="$(ls $IMAGE_PATH/boot/vmlinuz-*)" - BOOT_OPTIONS="$(cat "$IMAGE_PATH/kernel-cmdline" | tr '\n' ' ') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" - elif grep -q onie_platform= /host/machine.conf; then - KERNEL_OPTIONS=$(cat /host/grub/grub.cfg | sed "/$NEXT_SONIC_IMAGE'/,/}/"'!'"g" | grep linux) - KERNEL_IMAGE="/host$(echo $KERNEL_OPTIONS | cut -d ' ' -f 2)" - BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" - else - error "Unknown bootloader. ${REBOOT_TYPE} is not supported." - exit "${EXIT_NOT_SUPPORTED}" - fi - INITRD=$(echo $KERNEL_IMAGE | sed 's/vmlinuz/initrd.img/g') -} - -function reboot_pre_check() -{ - # Make sure that the file system is normal: read-write able - filename="/host/test-`date +%Y%m%d-%H%M%S`" - if [[ ! -f ${filename} ]]; then - touch ${filename} - fi - rm ${filename} - - # Make sure /host has enough space for warm reboot temp files - avail=$(df -k /host | tail -1 | awk '{ print $4 }') - if [[ ${avail} -lt ${MIN_HD_SPACE_NEEDED} ]]; then - debug "/host has ${avail}K bytes available, not enough for warm reboot." - exit ${EXIT_FILE_SYSTEM_FULL} - fi - - # Make sure that the next image exists - if [[ ! -d ${IMAGE_PATH} ]]; then - debug "Next image ${NEXT_SONIC_IMAGE} doesn't exist ..." - exit ${EXIT_NEXT_IMAGE_NOT_EXISTS} - fi -} - -function unload_kernel() -{ - # Unload the previously loaded kernel if any loaded - if [[ "$(cat /sys/kernel/kexec_loaded)" -eq 1 ]]; then - /sbin/kexec -u - fi -} - -# main starts here -parseOptions $@ - -# Check root privileges -if [[ "$EUID" -ne 0 ]] -then - echo "This command must be run as root" >&2 - exit "${EXIT_FAILURE}" -fi - -sonic_asic_type=$(sonic-cfggen -y /etc/sonic/sonic_version.yml -v asic_type) - -# Check reboot type supported -BOOT_TYPE_ARG="cold" -case "$REBOOT_TYPE" in - "fast-reboot") - BOOT_TYPE_ARG=$REBOOT_TYPE - trap clear_fast_boot EXIT HUP INT QUIT TERM KILL ABRT ALRM - ;; - "warm-reboot") - if [[ "$sonic_asic_type" == "mellanox" ]]; then - REBOOT_TYPE="fastfast-reboot" - BOOT_TYPE_ARG="fastfast" - # source mlnx-ffb.sh file with - # functions to check ISSU upgrade possibility - source mlnx-ffb.sh - else - BOOT_TYPE_ARG="warm" - fi - trap clear_warm_boot EXIT HUP INT QUIT TERM KILL ABRT ALRM - config warm_restart enable system - ;; - *) - error "Not supported reboot type: $REBOOT_TYPE" - exit "${EXIT_NOT_SUPPORTED}" - ;; -esac - -# Stopping all SLB neighbors if they're presented -PASSIVE_BGP_NEIGHBORS=$(sonic-cfggen -m /etc/sonic/minigraph.xml -v "BGP_PEER_RANGE | list") -case "$PASSIVE_BGP_NEIGHBORS" in - *BGPSLBPassive*) - ASN=$(sonic-cfggen -m /etc/sonic/minigraph.xml -v "DEVICE_METADATA['localhost']['bgp_asn']") - SLBNAME=$(sonic-cfggen -m /etc/sonic/minigraph.xml -v "BGP_PEER_RANGE['BGPSLBPassive']['name']") - vtysh -e "configure terminal" -e "router bgp ${ASN}" -e "neighbor ${SLBNAME} shutdown" - sleep 1 - ;; - *) - ;; -esac - -unload_kernel - -setup_reboot_variables - -reboot_pre_check - -# Install new FW for mellanox platforms before control plane goes down -# So on boot switch will not spend time to upgrade FW increasing the CP downtime -if [[ "$sonic_asic_type" == "mellanox" ]]; then - MLNX_EXIT_SUCCESS=0 - MLNX_EXIT_FW_ERROR=100 - MLNX_EXIT_FFB_FAILURE=101 - - MLNX_FW_UPGRADE_SCRIPT="/usr/bin/mlnx-fw-upgrade.sh" - - - if [[ "$REBOOT_TYPE" = "fastfast-reboot" ]]; then - check_ffb || { - error "Warm reboot is not supported" - exit "${MLNX_EXIT_FFB_FAILURE}" - } - fi - - debug "Prepare MLNX ASIC to ${REBOOT_TYPE}: install new FW if required" - - ${MLNX_FW_UPGRADE_SCRIPT} --upgrade - MLNX_EXIT_CODE="$?" - if [[ "${MLNX_EXIT_CODE}" != "${MLNX_EXIT_SUCCESS}" ]]; then - error "Failed to burn MLNX FW: errno=${MLNX_EXIT_CODE}" - exit "${MLNX_EXIT_FW_ERROR}" - fi -fi - -# Load kernel into the memory -/sbin/kexec -l "$KERNEL_IMAGE" --initrd="$INITRD" --append="$BOOT_OPTIONS" - -if [[ "$REBOOT_TYPE" = "fast-reboot" ]]; then - # Dump the ARP and FDB tables to files also as default routes for both IPv4 and IPv6 - # into /host/fast-reboot - mkdir -p /host/fast-reboot - FAST_REBOOT_DUMP_RC=0 - /usr/bin/fast-reboot-dump.py -t /host/fast-reboot || FAST_REBOOT_DUMP_RC=$? - if [[ FAST_REBOOT_DUMP_RC -ne 0 ]]; then - error "Failed to run fast-reboot-dump.py. Exit code: $FAST_REBOOT_DUMP_RC" - unload_kernel - exit "${EXIT_FAST_REBOOT_DUMP_FAILURE}" - fi -fi - -init_warm_reboot_states - -setup_control_plane_assistant - -if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; then - # Freeze orchagent for warm restart - # Ask orchagent_restart_check to try freeze 5 times with interval of 2 seconds, - # it is possible that the orchagent is in transient state and no opportunity to be freezed - # Note: assume that 2*5 seconds is enough for orchagent to process the request and respone freeze or not - debug "Pausing orchagent ..." - docker exec -i swss /usr/bin/orchagent_restart_check -w 2000 -r 5 > /dev/null || RESTARTCHECK_RC=$? - if [[ RESTARTCHECK_RC -ne 0 ]]; then - error "RESTARTCHECK failed" - if [[ x"${FORCE}" == x"yes" ]]; then - debug "Ignoring orchagent pausing failure ..." - else - exit "${EXIT_ORCHAGENT_SHUTDOWN}" - fi - fi -fi - -# Kill bgpd to start the bgp graceful restart procedure -debug "Stopping bgp ..." -docker exec -i bgp pkill -9 zebra -docker exec -i bgp pkill -9 bgpd || [ $? == 1 ] -debug "Stopped bgp ..." - -# Kill lldp, otherwise it sends informotion about reboot -docker kill lldp > /dev/null - -if [[ "$REBOOT_TYPE" = "fast-reboot" ]]; then - # Kill teamd, otherwise it gets down all LAGs - # Note: teamd must be killed before syncd, because it will send the last packet through CPU port - # TODO: stop teamd gracefully to allow teamd to send last valid update to be sure we'll have 90 seconds reboot time - docker kill teamd > /dev/null -fi - -# Kill swss dockers -docker kill swss > /dev/null - -# Pre-shutdown syncd -if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; then - initialize_pre_shutdown - - request_pre_shutdown - - wait_for_pre_shutdown_complete_or_fail - - # Warm reboot: dump state to host disk - if [[ "$REBOOT_TYPE" = "fastfast-reboot" ]]; then - redis-cli -n 1 FLUSHDB > /dev/null - redis-cli -n 2 FLUSHDB > /dev/null - redis-cli -n 5 FLUSHDB > /dev/null - fi - - # TODO: backup_database preserves FDB_TABLE - # need to cleanup as well for fastfast boot case - backup_database -fi - -# Stop teamd gracefully -if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; then - debug "Stopping teamd ..." - # Send USR1 signal to all teamd instances to stop them - # It will prepare teamd for warm-reboot - # Note: We must send USR1 signal before syncd, because it will send the last packet through CPU port - docker exec -i teamd pkill -USR1 teamd || [ $? == 1 ] > /dev/null - debug "Stopped teamd ..." -fi - -if [[ "$sonic_asic_type" = 'broadcom' ]]; then - debug "Stopping syncd on ${CURRENT_SONIC_IMAGE} ..." - if [[ ${CURRENT_SONIC_IMAGE} =~ "20180330" ]]; then - # Gracefully stop syncd - docker exec -i syncd /usr/bin/syncd_request_shutdown --cold > /dev/null - - # Check that syncd was stopped - while docker top syncd | grep -q /usr/bin/syncd - do - sleep 0.1 - done - else - systemctl stop syncd - fi - debug "Stopped syncd ..." -fi - -# Kill other containers to make the reboot faster -docker ps -q | xargs docker kill > /dev/null - -# Stop the docker container engine. Otherwise we will have a broken docker storage -systemctl stop docker.service - -# Stop kernel modules for Nephos platform -if [[ "$sonic_asic_type" = 'nephos' ]]; -then - systemctl stop nps-modules-`uname -r`.service -fi - -# Update the reboot cause file to reflect that user issued this script -# Upon next boot, the contents of this file will be used to determine the -# cause of the previous reboot -echo "User issued '${REBOOT_SCRIPT_NAME}' command [User: ${REBOOT_USER}, Time: ${REBOOT_TIME}]" > ${REBOOT_CAUSE_FILE} - -# Wait until all buffers synced with disk -sync -sleep 1 -sync - -# sync the current system time to CMOS -if [ -x /sbin/hwclock ]; then - /sbin/hwclock -w || /bin/true -fi - -# Reboot: explicity call Linux native reboot under sbin -debug "Rebooting with ${REBOOT_METHOD} to ${NEXT_SONIC_IMAGE} ..." -exec ${REBOOT_METHOD} - -# Should never reach here -error "${REBOOT_TYPE} failed!" -exit "${EXIT_FAILURE}" diff --git a/ansible/roles/test/tasks/advanced-reboot.yml b/ansible/roles/test/tasks/advanced-reboot.yml index 1b9ea4d636a..93e6fa0ebbb 100644 --- a/ansible/roles/test/tasks/advanced-reboot.yml +++ b/ansible/roles/test/tasks/advanced-reboot.yml @@ -143,16 +143,8 @@ - set_fact: stay_in_target_image: "{{ stay_in_target_image | default('false') | bool }}" cleanup_old_sonic_images: "{{ cleanup_old_sonic_images | default('false') | bool }}" - replace_fast_reboot_script: "{{ replace_fast_reboot_script | default('false') | bool }}" allow_vlan_flooding: "{{ allow_vlan_flooding | default('false') | bool }}" - - name: msg="Replace fast-reboot script on the DUT" - copy: - src: roles/test/files/reboot/fast-reboot - dest: /usr/bin/fast-reboot - become: yes - when: new_sonic_image is defined and replace_fast_reboot_script - - include: advanced_reboot/reboot-image-handle.yml when: new_sonic_image is defined From d3c69928348ee7f41bd8fde644d29e5a7574f83c Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Tue, 25 Jun 2019 22:20:46 +0800 Subject: [PATCH 040/218] [loganalyzer] Fix the files not copied issue in run_command_with_log_analyzer.yml (#963) The copy files task was after the fail tests. In case of failure, the copy task would never get a chance to run. This commit adjusted the task sequence. In case of failure, copy the files, then fail the test. The original copy task copies files with deep folder structure. This issue was also fixed in this commit. Signed-off-by: Xin Wang --- .../tasks/run_command_with_log_analyzer.yml | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/ansible/roles/test/tasks/run_command_with_log_analyzer.yml b/ansible/roles/test/tasks/run_command_with_log_analyzer.yml index a9e5bd169f9..6f04d1e16d5 100644 --- a/ansible/roles/test/tasks/run_command_with_log_analyzer.yml +++ b/ansible/roles/test/tasks/run_command_with_log_analyzer.yml @@ -41,21 +41,24 @@ register: expects_found when: errors_expected == true - - name: Check that expected error messages are found (negative tests only). - fail: msg="Expected error messages are not found while running {{ testname }} / {{ command_to_run }}" - when: errors_expected == true and expects_found.stdout == "0" - - name: Get the total number of error messages. shell: grep "TOTAL MATCHES" "{{ test_out_dir }}/{{ summary_file }}" | sed -n "s/TOTAL MATCHES:[[:space:]]*//p" register: errors_found - - name: Check the number of error messages (positive tests only). - fail: msg="{{ errors_found.stdout }} errors found while running {{ testname }} / {{ command_to_run }}." - when: errors_expected == false and errors_found.stdout != "0" - - name: Copy test data to host. - fetch: src={{ test_out_dir }}/{{ item }} dest=failed-test-data/{{ testname_unique }}/{{ item }} + fetch: + src: "{{ test_out_dir }}/{{ item }}" + dest: "test/{{ inventory_hostname }}/{{ item | basename }}" + flat: yes with_items: - "{{ summary_file }}" - "{{ result_file }}" when: (errors_expected == true and expects_found.stdout == "0") or (errors_expected == false and errors_found.stdout != "0") + + - name: Check that expected error messages are found (negative tests only). + fail: msg="Expected error messages are not found while running {{ testname }} / {{ command_to_run }}" + when: errors_expected == true and expects_found.stdout == "0" + + - name: Check the number of error messages (positive tests only). + fail: msg="{{ errors_found.stdout }} errors found while running {{ testname }} / {{ command_to_run }}." + when: errors_expected == false and errors_found.stdout != "0" From fdf4caa6c2e3fac8c13145a8989d47b259077377 Mon Sep 17 00:00:00 2001 From: Kebo Liu Date: Sun, 30 Jun 2019 16:42:08 +0800 Subject: [PATCH 041/218] [Fanout] Update Mellanox ONYX fanout switch deploy code and templates (#968) * fix grep ipv6 addr issue * Add Mellanox onyx fanout switch deploy yml and template * fix typo * remove debug code * revert the change to check_pfcwd_fanout.yml and deploy_pfcwd_fanout.yml * fix typo --- ansible/roles/fanout/tasks/fanout_mlnx.yml | 51 ++++++ ansible/roles/fanout/tasks/main.yml | 82 ++++----- .../templates/mlnx_check_pfcwd_fanout.j2 | 4 + .../templates/mlnx_deploy_pfcwd_fanout.j2 | 9 + ansible/roles/fanout/templates/mlnx_fanout.j2 | 165 ++++++++++++++++++ 5 files changed, 265 insertions(+), 46 deletions(-) create mode 100644 ansible/roles/fanout/tasks/fanout_mlnx.yml create mode 100644 ansible/roles/fanout/templates/mlnx_check_pfcwd_fanout.j2 create mode 100644 ansible/roles/fanout/templates/mlnx_deploy_pfcwd_fanout.j2 create mode 100644 ansible/roles/fanout/templates/mlnx_fanout.j2 diff --git a/ansible/roles/fanout/tasks/fanout_mlnx.yml b/ansible/roles/fanout/tasks/fanout_mlnx.yml new file mode 100644 index 00000000000..4e2de20b6f1 --- /dev/null +++ b/ansible/roles/fanout/tasks/fanout_mlnx.yml @@ -0,0 +1,51 @@ +############################################################################################## +### playbook to deploy the fanout swtich +### Use this playbook to deploy the VLAN configurations of fanout leaf switch in SONiC testbed +### This playbook will run based on hardware flatform. Each fanout switch hardware type has its +### own unique feature and configuration command or format. Unless you use the hardware swtich +### specified in this playbook, you would need to come up with your own fanout switch deployment +### playbook +################################################################################################ + # Gather minigraph facts +- name: Gathering lab graph facts about the device + conn_graph_facts: host={{ inventory_hostname }} + connection: local + tags: always + +- name: prepare fanout switch admin login info + set_fact: ansible_ssh_user={{ fanout_admin_user }} ansible_ssh_pass={{ fanout_admin_password }} peer_hwsku={{device_info['HwSku']}} + tags: always + +- set_fact: + fanout_root_user: "user" + fanout_root_pass: "password" + tags: deploy,pfcwd_config,check_pfcwd_config + + ########################################################## + # deploy tasks to deploy default configuration on fanout # + ########################################################## +- block: + - debug: msg={{ inventory_hostname }} + - name: build fanout startup config for fanout mlnx-os-switch + action: apswitch template=mlnx_fanout.j2 + connection: switch + args: + login: "{{ switch_login['MLNX-OS'] }}" + vars: + action_variable: "deploy" + when: peer_hwsku == "MLNX-OS" + tags: deploy + + ################################################################### + # build, deploy and start docker images for the PFC WD test # + ################################################################### +- include: mlnx/deploy_pfcwd_fanout.yml + when: peer_hwsku == "MLNX-OS" + tags: deploy,pfcwd_config + + ################################################################### + # check and recover docker images for the PFC WD test # + ################################################################### +- include: mlnx/check_pfcwd_fanout.yml + when: peer_hwsku == "MLNX-OS" + tags: check_pfcwd_config diff --git a/ansible/roles/fanout/tasks/main.yml b/ansible/roles/fanout/tasks/main.yml index 1eae1f3f8e0..3ed4aae8a7e 100644 --- a/ansible/roles/fanout/tasks/main.yml +++ b/ansible/roles/fanout/tasks/main.yml @@ -1,46 +1,36 @@ -############################################################################################## -### playbook to deploy the fanout swtich -### Use this playbook to deploy the VLAN configurations of fanout switch in SONiC testbed -### This playbook will run based on hardware flatform. Each fanout switch hardware type has its -### own unique feature and configuration command or format. Unless you use the hardware swtich -### specified in this playbook, you would need to come up with your own fanout switch deployment -### playbook -################################################################################################ -# Deploy fanout switch -- name: Gathering lab graph facts about the device - conn_graph_facts: host={{ inventory_hostname }} - connection: local - -- set_fact: sw_type="{{ device_info['Type'] }}" - -- set_fact: os='eos' - when: os is not defined - -- include: fanout_eos.yml - when: os == 'eos' - -- include: fanout_sonic.yml - when: os == 'sonic' - -- block: - - set_fact: - leaf_name: "{{ inventory_hostname }}" - leaf: "{{ ansible_host }}" - - - include: rootfanout_connect.yml - deploy_leaf=true - when: sw_type == 'FanoutLeaf' - - ################################################################### - # build, deploy and start docker images for the PFC WD test # - ################################################################### -- include: mlnx/deploy_pfcwd_fanout.yml - when: peer_hwsku == "MLNX-OS" - tags: deploy,pfcwd_config - - ################################################################### - # check and recover docker images for the PFC WD test # - ################################################################### -- include: mlnx/check_pfcwd_fanout.yml - when: peer_hwsku == "MLNX-OS" - tags: check_pfcwd_config +############################################################################################## +### playbook to deploy the fanout switch +### Use this playbook to deploy the VLAN configurations of fanout switch in SONiC testbed +### This playbook will run based on hardware platform. Each fanout switch hardware type has its +### own unique feature and configuration command or format. Unless you use the hardware switch +### specified in this playbook, you would need to come up with your own fanout switch deployment +### playbook +################################################################################################ +# Deploy fanout switch +- name: Gathering lab graph facts about the device + conn_graph_facts: host={{ inventory_hostname }} + connection: local + +- set_fact: sw_type="{{ device_info['Type'] }}" + +- set_fact: os='eos' + when: os is not defined + +- include: fanout_eos.yml + when: os == 'eos' + +- include: fanout_sonic.yml + when: os == 'sonic' + +- include: fanout_mlnx.yml + when: os == 'mellanox' + +- block: + - set_fact: + leaf_name: "{{ inventory_hostname }}" + leaf: "{{ ansible_host }}" + + - include: rootfanout_connect.yml + deploy_leaf=true + when: sw_type == 'FanoutLeaf' + diff --git a/ansible/roles/fanout/templates/mlnx_check_pfcwd_fanout.j2 b/ansible/roles/fanout/templates/mlnx_check_pfcwd_fanout.j2 new file mode 100644 index 00000000000..61532916d40 --- /dev/null +++ b/ansible/roles/fanout/templates/mlnx_check_pfcwd_fanout.j2 @@ -0,0 +1,4 @@ +config t + +show docker ps +show docker images diff --git a/ansible/roles/fanout/templates/mlnx_deploy_pfcwd_fanout.j2 b/ansible/roles/fanout/templates/mlnx_deploy_pfcwd_fanout.j2 new file mode 100644 index 00000000000..be7e0675f1e --- /dev/null +++ b/ansible/roles/fanout/templates/mlnx_deploy_pfcwd_fanout.j2 @@ -0,0 +1,9 @@ +config t + +docker no shutdown +ping -c 5 8.8.8.8 +docker label storm +docker load pfc_storm.tgz +docker load storm_args.tgz +docker start storm_args latest args init label storm privileged network sdk +docker start storm_args latest args now label storm privileged network sdk diff --git a/ansible/roles/fanout/templates/mlnx_fanout.j2 b/ansible/roles/fanout/templates/mlnx_fanout.j2 new file mode 100644 index 00000000000..81f36278e5f --- /dev/null +++ b/ansible/roles/fanout/templates/mlnx_fanout.j2 @@ -0,0 +1,165 @@ +{% set management_interface = 'mgmt0' %} +{% set ofpvid_present = 4096 %} +{% set vlanid_offset = 100 %} +{% set vlan_range = '101-132' %} +{% set open_flow_tableid = '0' %} +{% set flowid_offset = 160 %} +{% set last_flowid = '193' %} +{% set server_to_dut_flow_priority = '100' %} +{% set dut_to_server_flow_priority = '101' %} +{% set low_priority = '1' %} +{% set eth_typ_lldp = '0x88CC' %} +{% set eth_typ_slow = '0x8809' %} +{% set eth_typ_arp = '0x0806' %} +{% set eth_typ_vlan = '0x8100' %} +{% set eth_typ_test = '0x1234' %} +{% set MTU = '9216' %} +{% set trunk_port = '1/32' %} + +{# ----------------------------Start of default port configuration------------------------ #} + +{% set default_eth_ports = ['ERR', '1/1', '1/2', '1/3', '1/4', '1/5', '1/6', '1/7', '1/8', '1/9', '1/10', '1/11', '1/12', '1/13', '1/14', '1/15', '1/16', '1/17', '1/18', '1/19', '1/20', '1/21', '1/22', '1/23', '1/24', '1/25', '1/26', '1/27', '1/28', '1/29', '1/30', '1/31/1', '1/31/2', '1/32'] %} +{% set default_of_ports = [-1, 125, 127, 121, 123, 117, 119, 113, 115, 109, 111, 105, 107, 101, 103, 97, 99, 65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 94, 95] -%} +{% set uplink_port_id = 33 %} + +{# ----------------------------End of default port configuration-------------------------- #} + +{# ----------------------------Start of port speed configuration-------------------------- #} + +{% set mellanox-fanout_port_speed = [] %} + +{# -----------------------------End of speed configuration-------------------------------- #} + +{# -----------------------------Start of params dictionary-------------------------------- #} + +{% set qsfp_split_2_dict = {'mellanox-fanout':['1/31']} %} + +{% set qsfp_split_4_dict = {'mellanox-fanout':['1/9', '1/11', '1/13', '1/15', '1/17', '1/19', '1/21', '1/23']} %} + +{% set eth_ports_dict = {'mellanox-fanout':default_eth_ports} %} + +{% set of_ports_dict = {'mellanox-fanout':default_of_ports} %} + +{% set port_speed_dict = {'mellanox-fanout':mellanox-fanout_port_speed} %} + +{% set qsfp_split_2 = qsfp_split_2_dict[inventory_hostname] %} +{% set qsfp_split_4 = qsfp_split_4_dict[inventory_hostname] %} +{% set eth_ports = eth_ports_dict[inventory_hostname] %} +{% set of_ports = of_ports_dict[inventory_hostname] %} +{% set port_speed = port_speed_dict[inventory_hostname] %} + +{# ------------------------------End of params dictionary-------------------------------------- #} + +{# ------------------------------Start of fanout deploy function ------------------------------ #} + +{% macro fanout_deploy() %} +conf t +no lldp +no spanning-tree +ip routing +interface {{ management_interface }} dhcp +hostname {{ inventory_hostname }} +no fae trap-group TRAP_GROUP_LLDP policer bind +no fae trap-group TRAP_GROUP_OF_CONTROLLER policer bind + +{% for i in range(0, qsfp_split_2|length) %} +interface ethernet {{ qsfp_split_2[i] }} module-type qsfp-split-2 force +{% endfor %} + +{% for i in range(0, qsfp_split_4|length) %} +interface ethernet {{ qsfp_split_4[i] }} module-type qsfp-split-4 force +{% endfor %} + +{% for i in range(1, port_speed|length) %} +interface ethernet {{ eth_ports[i] }} speed {{ port_speed[i] }} force +{% endfor %} + +{% for i in range(1, eth_ports|length) %} +interface ethernet {{ eth_ports[i] }} shutdown +interface ethernet {{ eth_ports[i] }} mtu {{ MTU }} +interface ethernet {{ eth_ports[i] }} no shutdown +{% endfor %} + +interface ethernet {{ trunk_port }} switchport mode trunk +vlan {{ vlan_range }} +ex +interface ethernet {{ trunk_port }} switchport trunk allowed-vlan none + +{% for i in range(1,eth_ports|length-1) %} +{% set vlanid = (vlanid_offset + i)|string %} +interface ethernet {{ eth_ports[i] }} switchport mode dot1q-tunnel +interface ethernet {{ eth_ports[i] }} switchport access vlan {{ vlanid }} +{% endfor %} + +interface ethernet {{ trunk_port }} switchport trunk allowed-vlan add {{ vlan_range }} +interface ethernet {{ trunk_port }} switchport trunk allowed-vlan remove 1 + +protocol openflow + +{% for i in range(1, eth_ports|length) %} +interface ethernet {{ eth_ports[i] }} openflow mode hybrid +{% endfor %} + +{% set of_counter = 0 -%} + +{% for i in range(1, eth_ports|length-1) %} +{% set vlanid = (vlanid_offset + i)|string %} +openflow add-flows {{ of_counter + i }} table={{ open_flow_tableid }},priority={{ server_to_dut_flow_priority }},dl_type={{ eth_typ_lldp }},in_port={{ of_ports[uplink_port_id] }},dl_vlan={{ vlanid }},actions=strip_vlan,output:{{ of_ports[i] }} +{% endfor %} + +{% set of_counter = of_counter + eth_ports|length-2 -%} + +{% for i in range(1, eth_ports|length-1) %} +{% set vlanid = (ofpvid_present + vlanid_offset + i)|string %} +openflow add-flows {{ of_counter + i }} table={{ open_flow_tableid }},priority={{ dut_to_server_flow_priority }},dl_type={{ eth_typ_lldp }},in_port={{ of_ports[i] }},actions=push_vlan:{{ eth_typ_vlan }},set_field:{{ vlanid }}->vlan_vid,output:{{ of_ports[uplink_port_id] }} +{% endfor %} + +{% set of_counter = of_counter + eth_ports|length-2 -%} + +{% for i in range(1, eth_ports|length-1) %} +{% set vlanid = (vlanid_offset + i)|string %} +openflow add-flows {{ of_counter + i }} table={{ open_flow_tableid }},priority={{ server_to_dut_flow_priority }},dl_type={{ eth_typ_slow }},in_port={{ of_ports[uplink_port_id] }},dl_vlan={{ vlanid }},actions=strip_vlan,output:{{ of_ports[i] }} +{% endfor %} + +{% set of_counter = of_counter + eth_ports|length-2 -%} + +{% for i in range(1, eth_ports|length-1) %} +{% set vlanid = (ofpvid_present + vlanid_offset + i)|string %} +openflow add-flows {{ of_counter + i }} table={{ open_flow_tableid }},priority={{ dut_to_server_flow_priority }},dl_type={{ eth_typ_slow }},in_port={{ of_ports[i] }},actions=push_vlan:{{ eth_typ_vlan }},set_field:{{ vlanid }}->vlan_vid,output:{{ of_ports[uplink_port_id] }} +{% endfor %} + +{% set of_counter = of_counter + eth_ports|length-2 -%} + +{% for i in range(1, eth_ports|length-1) %} +{% set vlanid = (vlanid_offset + i)|string %} +openflow add-flows {{ of_counter + i }} table={{ open_flow_tableid }},priority={{ server_to_dut_flow_priority }},dl_type={{ eth_typ_arp }},in_port={{ of_ports[uplink_port_id] }},dl_vlan={{ vlanid }},actions=output:{{ of_ports[i] }} +{% endfor %} + +{% set of_counter = of_counter + eth_ports|length-2 -%} + +{% for i in range(1, eth_ports|length-1) %} +{% set vlanid = (vlanid_offset + i)|string %} +openflow add-flows {{ of_counter + i }} table={{ open_flow_tableid }},priority={{ dut_to_server_flow_priority }},dl_type={{ eth_typ_arp }},in_port={{ of_ports[i] }},actions=output:{{ of_ports[uplink_port_id] }} +{% endfor %} + +{% set of_counter = of_counter + eth_ports|length-2 -%} + +{% for i in range(1, eth_ports|length-1) %} +{% set vlanid = (vlanid_offset + i)|string %} +openflow add-flows {{ of_counter + i }} table={{ open_flow_tableid }},priority={{ dut_to_server_flow_priority }},dl_type={{ eth_typ_test }},in_port={{ of_ports[i] }},actions=output:{{ of_ports[uplink_port_id] }} +{% endfor %} + +openflow add-flows {{ last_flowid }} table={{ open_flow_tableid }},priority={{ low_priority }},actions=normal + +docker +no shutdown +exit +write memory + +{% endmacro %} + +{# ------------------------------End of fanout deploy function -------------------------------- #} + +{% if action_variable == "deploy" %} + {{ fanout_deploy() }} +{% endif %} From e7823f19f55a760db9acb95d43e794fd5caac469 Mon Sep 17 00:00:00 2001 From: neethajohn Date: Fri, 28 Jun 2019 15:13:35 -0700 Subject: [PATCH 042/218] [warm-reboot] Fix the issue where BGP info was not being extracted from neigh logs (#974) --- ansible/roles/test/files/ptftests/arista.py | 36 +++++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/ansible/roles/test/files/ptftests/arista.py b/ansible/roles/test/files/ptftests/arista.py index a8aaa0fa738..e0bef368a54 100644 --- a/ansible/roles/test/files/ptftests/arista.py +++ b/ansible/roles/test/files/ptftests/arista.py @@ -47,6 +47,7 @@ def __init__(self, ip, queue, test_params, login='admin', password='123456'): self.fails = set() self.info = set() self.min_bgp_gr_timeout = int(test_params['min_bgp_gr_timeout']) + self.reboot_type = test_params['reboot_type'] def __del__(self): self.disconnect() @@ -153,15 +154,19 @@ def run(self): } attempts = 60 + log_present = False for _ in range(attempts): log_output = self.do_cmd("show log | begin %s" % log_first_line) log_lines = log_output.split("\r\n")[1:-1] log_data = self.parse_logs(log_lines) - if len(log_data) != 0: + if (self.reboot_type == 'fast-reboot' and \ + any(k.startswith('BGP') for k in log_data) and any(k.startswith('PortChannel') for k in log_data)) \ + or (self.reboot_type == 'warm-reboot' and any(k.startswith('BGP') for k in log_data)): + log_present = True break time.sleep(1) # wait until logs are populated - if len(log_data) == 0: + if not log_present: log_data['error'] = 'Incomplete output' self.disconnect() @@ -226,7 +231,10 @@ def parse_logs(self, data): result['route_timeout'] = result_rt - if initial_time_bgp == -1 or initial_time_if == -1: + # for fast-reboot, we expect to have both the bgp and portchannel events in the logs. for warm-reboot, portchannel events might not be present in the logs all the time. + if self.reboot_type == 'fast-reboot' and (initial_time_bgp == -1 or initial_time_if == -1): + return result + elif self.reboot_type == 'warm-reboot' and initial_time_bgp == -1: return result for events in result_bgp.values(): @@ -245,13 +253,7 @@ def parse_logs(self, data): assert(events[0][1] == 'down') assert(events[-1][1] == 'up') - po_name = [ifname for ifname in result_if.keys() if 'Port-Channel' in ifname][0] neigh_ipv4 = [neig_ip for neig_ip in result_bgp.keys() if '.' in neig_ip][0] - - result['PortChannel was down (seconds)'] = result_if[po_name][-1][0] - result_if[po_name][0][0] - for if_name in sorted(result_if.keys()): - result['Interface %s was down (times)' % if_name] = map(itemgetter(1), result_if[if_name]).count("down") - for neig_ip in result_bgp.keys(): key = "BGP IPv6 was down (seconds)" if ':' in neig_ip else "BGP IPv4 was down (seconds)" result[key] = result_bgp[neig_ip][-1][0] - result_bgp[neig_ip][0][0] @@ -260,12 +262,18 @@ def parse_logs(self, data): key = "BGP IPv6 was down (times)" if ':' in neig_ip else "BGP IPv4 was down (times)" result[key] = map(itemgetter(1), result_bgp[neig_ip]).count("Idle") - bgp_po_offset = (initial_time_if - initial_time_bgp if initial_time_if > initial_time_bgp else initial_time_bgp - initial_time_if).seconds - result['PortChannel went down after bgp session was down (seconds)'] = bgp_po_offset + result_if[po_name][0][0] + if initial_time_if != -1: + po_name = [ifname for ifname in result_if.keys() if 'Port-Channel' in ifname][0] + result['PortChannel was down (seconds)'] = result_if[po_name][-1][0] - result_if[po_name][0][0] + for if_name in sorted(result_if.keys()): + result['Interface %s was down (times)' % if_name] = map(itemgetter(1), result_if[if_name]).count("down") - for neig_ip in result_bgp.keys(): - key = "BGP IPv6 was gotten up after Po was up (seconds)" if ':' in neig_ip else "BGP IPv4 was gotten up after Po was up (seconds)" - result[key] = result_bgp[neig_ip][-1][0] - bgp_po_offset - result_if[po_name][-1][0] + bgp_po_offset = (initial_time_if - initial_time_bgp if initial_time_if > initial_time_bgp else initial_time_bgp - initial_time_if).seconds + result['PortChannel went down after bgp session was down (seconds)'] = bgp_po_offset + result_if[po_name][0][0] + + for neig_ip in result_bgp.keys(): + key = "BGP IPv6 was gotten up after Po was up (seconds)" if ':' in neig_ip else "BGP IPv4 was gotten up after Po was up (seconds)" + result[key] = result_bgp[neig_ip][-1][0] - bgp_po_offset - result_if[po_name][-1][0] return result From a888eff7ff4269338bdcb08c4eb7e21d6108f3f9 Mon Sep 17 00:00:00 2001 From: Nazarii Hnydyn Date: Wed, 26 Jun 2019 23:42:27 +0300 Subject: [PATCH 043/218] Improved link flap test: added smart timeout. (#977) Signed-off-by: Nazarii Hnydyn --- ansible/roles/test/tasks/link_flap.yml | 5 ++-- .../test/tasks/link_flap/link_flap_helper.yml | 28 ++++++++++++++----- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/ansible/roles/test/tasks/link_flap.yml b/ansible/roles/test/tasks/link_flap.yml index 69ad611761b..2a1ae0256a3 100644 --- a/ansible/roles/test/tasks/link_flap.yml +++ b/ansible/roles/test/tasks/link_flap.yml @@ -7,8 +7,9 @@ connection: local tags: always -- set_fact: - neighbors: "{{device_conn}}" +- name: Set neighbor facts + set_fact: + neighbors: "{{ device_conn }}" - include: link_flap/link_flap_helper.yml with_items: "{{ device_conn.keys() }}" diff --git a/ansible/roles/test/tasks/link_flap/link_flap_helper.yml b/ansible/roles/test/tasks/link_flap/link_flap_helper.yml index 9ef6ba54d91..ef3ae6dd81f 100644 --- a/ansible/roles/test/tasks/link_flap/link_flap_helper.yml +++ b/ansible/roles/test/tasks/link_flap/link_flap_helper.yml @@ -3,10 +3,14 @@ # flapped. - block: - - set_fact: - interface: "{{item}}" + - name: Set interface name + set_fact: + interface: "{{ item }}" - - debug: msg={{interface}} + - name: Set default link timeout + set_fact: + link_timeout: 20 + link_delay: 5 - set_fact: peer_device: "{{neighbors[interface]['peerdevice']}}" @@ -42,8 +46,13 @@ delegate_to: "{{peer_host}}" when: peer_type == "FanoutLeafSonic" - - pause: - seconds: 20 + - name: Wait until interface {{ interface }} on {{ inventory_hostname }} is down + interface_facts: up_ports="[ '{{ interface }}' ]" + register: out + until: out.ansible_facts.ansible_interface_link_down_ports | length > 0 + retries: "{{ (link_timeout / link_delay) | round(0, 'ceil') | int }}" + delay: "{{ link_delay }}" + when: "interface in minigraph_ports.keys()" - interface_facts: up_ports={{minigraph_ports | difference(intfs_to_exclude)}} @@ -71,8 +80,13 @@ delegate_to: "{{peer_host}}" when: peer_type == "FanoutLeafSonic" - - pause: - seconds: 20 + - name: Wait until interface {{ interface }} on {{ inventory_hostname }} is up + interface_facts: up_ports="[ '{{ interface }}' ]" + register: out + until: out.ansible_facts.ansible_interface_link_down_ports | length == 0 + retries: "{{ (link_timeout / link_delay) | round(0, 'ceil') | int }}" + delay: "{{ link_delay }}" + when: "interface in minigraph_ports.keys()" - interface_facts: up_ports={{minigraph_ports}} From ca9f36eb5db8e6ef7aaaade61bd1fe1c8fc7ef36 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Wed, 10 Jul 2019 15:08:13 +0800 Subject: [PATCH 044/218] [loganalyzer] Generate dump of log within 1 hour by default if failed (#997) By default the log analyzer generate a dump which collect all the available log files by default in case of failure. This unnecessary and the dump file could be too big. This fix is to generate a dump to collect log within 1 hour by default. If more log is needed, parameter 'dump_since' can be used. Signed-off-by: Xin Wang --- ansible/roles/test/files/tools/loganalyzer/loganalyzer.py | 6 +++--- .../test/files/tools/loganalyzer/loganalyzer_end.yml | 8 ++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py b/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py index 9b631f612d2..8875e874b47 100644 --- a/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py +++ b/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py @@ -616,9 +616,9 @@ def main(argv): analyzer.place_marker(log_file_list, analyzer.create_start_marker()) return 0 elif (action == "analyze"): - match_file_list = match_files_in.split(tokenizer); - ignore_file_list = ignore_files_in.split(tokenizer); - expect_file_list = expect_files_in.split(tokenizer); + match_file_list = match_files_in.split(tokenizer) + ignore_file_list = ignore_files_in.split(tokenizer) + expect_file_list = expect_files_in.split(tokenizer) analyzer.place_marker(log_file_list, analyzer.create_end_marker()) diff --git a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_end.yml b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_end.yml index 2dd4d5826ba..d3ab2301feb 100644 --- a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_end.yml +++ b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_end.yml @@ -21,10 +21,14 @@ register: expected_missing_matches - set_fact: - fail_in_logs: "{{ errors_found.stdout != \"0\" or expected_missing_matches.stdout != \"0\" }}" + fail_in_logs: "{{ errors_found.stdout != \"0\" or expected_missing_matches.stdout != \"0\" }}" + +- set_fact: + dump_since: '1 hour ago' + when: dump_since is not defined - name: Generate system dump - command: generate_dump + command: "generate_dump -s '{{ dump_since }}'" become: true register: generate_dump when: fail_in_logs From e7bb1fd853180f6821bc88c19198dd95afbf334b Mon Sep 17 00:00:00 2001 From: neethajohn Date: Wed, 10 Jul 2019 09:05:21 -0700 Subject: [PATCH 045/218] [fast-reboot] Upgrade FW for mellanox before fast-reboot (#1000) * Upgrade FW for mellanox before fast-reboot * Move some condition check to the main file --- .../roles/test/files/mlnx/upgrade_mlnx_fw.sh | 40 +++++++++++++++++++ ansible/roles/test/tasks/advanced-reboot.yml | 6 +++ .../tasks/advanced_reboot/upgrade_mlnx_fw.yml | 25 ++++++++++++ 3 files changed, 71 insertions(+) create mode 100644 ansible/roles/test/files/mlnx/upgrade_mlnx_fw.sh create mode 100644 ansible/roles/test/tasks/advanced_reboot/upgrade_mlnx_fw.yml diff --git a/ansible/roles/test/files/mlnx/upgrade_mlnx_fw.sh b/ansible/roles/test/files/mlnx/upgrade_mlnx_fw.sh new file mode 100644 index 00000000000..d517ef941ef --- /dev/null +++ b/ansible/roles/test/files/mlnx/upgrade_mlnx_fw.sh @@ -0,0 +1,40 @@ +#!/bin/bash -e + +EXIT_FS_MOUNT_MOUNT_FAILED=111 +EXIT_FS_MOUNT_UNMOUNT_FAILED=112 +EXIT_MLNX_QUERY_FAILED=113 +EXIT_NO_FW_INFO=114 +EXIT_MLNX_FW_UPGRADE_FAILED=115 + +upgradeMLNXFW() { + FS_PATH="/host/image-${TARGET_FW#SONiC-OS-}/fs.squashfs" + FS_MOUNTPOINT="/tmp/image-${TARGET_FW#SONiC-OS-}-fs" + + mkdir -p "${FS_MOUNTPOINT}" + mount -t squashfs "${FS_PATH}" "${FS_MOUNTPOINT}" || exit ${EXIT_FS_MOUNT_FAILED} + + FW_FILE="${FS_MOUNTPOINT}/etc/mlnx/fw-SPC.mfa" + FW_QUERY="/tmp/mlnxfwmanager-query.txt" + + mlxfwmanager --query -i "${FW_FILE}" > "${FW_QUERY}" || exit ${EXIT_MLNX_QUERY_FAILED} + + FW_INFO="$(grep FW ${FW_QUERY})" + FW_CURRENT="$(echo ${FW_INFO} | cut -f2 -d' ')" + FW_AVAILABLE="$(echo ${FW_INFO} | cut -f3 -d' ')" + + [ -z "${FW_CURRENT}" -o -z "${FW_AVAILABLE}" ] && exit ${EXIT_NO_FW_INFO} + + if [ "${FW_CURRENT}" == "${FW_AVAILABLE}" ]; then + echo "Mellanox firmware is up to date" + else + echo "Mellanox firmware upgrade is required. Installing compatible version..." + mlxfwmanager -i "${FW_FILE}" -u -f -y || exit ${EXIT_MLNX_FW_UPGRADE_FAILED} + fi + + umount -rf "${FS_MOUNTPOINT}" || exit ${EXIT_FS_UNMOUNT_FAILED} + rm -rf "${FS_MOUNTPOINT}" +} + +TARGET_FW=$(sonic_installer list | grep "Next: " | cut -d ' ' -f 2) + +upgradeMLNXFW diff --git a/ansible/roles/test/tasks/advanced-reboot.yml b/ansible/roles/test/tasks/advanced-reboot.yml index 93e6fa0ebbb..44931ecc374 100644 --- a/ansible/roles/test/tasks/advanced-reboot.yml +++ b/ansible/roles/test/tasks/advanced-reboot.yml @@ -148,6 +148,12 @@ - include: advanced_reboot/reboot-image-handle.yml when: new_sonic_image is defined + - include: advanced_reboot/upgrade_mlnx_fw.yml + when: + - new_sonic_image is defined + - reboot_type == "fast-reboot" + - minigraph_hwsku is defined and minigraph_hwsku in mellanox_hwskus + - include: ptf_runner_reboot.yml with_items: "{{ preboot_list }}" diff --git a/ansible/roles/test/tasks/advanced_reboot/upgrade_mlnx_fw.yml b/ansible/roles/test/tasks/advanced_reboot/upgrade_mlnx_fw.yml new file mode 100644 index 00000000000..3bbb0282aa5 --- /dev/null +++ b/ansible/roles/test/tasks/advanced_reboot/upgrade_mlnx_fw.yml @@ -0,0 +1,25 @@ +- name: Get current image version + shell: sonic_installer list | grep Current | cut -f2 -d " " + register: cur_image + become: true + +- name: Get next image version + shell: sonic_installer list | grep Next | cut -f2 -d " " + register: next_image + become: true + +- set_fact: + current_sonic_image={{cur_image.stdout}} + next_sonic_image={{next_image.stdout}} + +- debug: + msg: "current: {{current_sonic_image}} next: {{next_sonic_image}} reboot: {{reboot_type}}" + +- block: + - name: Run the FW upgrade script + script: roles/test/files/mlnx/upgrade_mlnx_fw.sh + become: true + + when: + - current_sonic_image | search('SONiC-OS-201803') + - next_sonic_image | search('SONiC-OS-201811') From 3b52d800511c366e6ac49bb4a628a3e0f0f8ecfc Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Thu, 11 Jul 2019 13:27:54 -0700 Subject: [PATCH 046/218] [warm reboot] save config after upgrading into new image (#1003) * [warm/fast reboot] make sure that /etc/sonic/config_db.json exsits after upgrade Signed-off-by: Ying Xie * [warm reboot] save config after warm reboot into new image When new image is defined, test removed /host/config_db.json before warm rebooting. So after the device boots up, it will miss /etc/sonic/config_db.json. It is not an issue for the device to stay up. But it will be an issue when device reboot again (cold or fast). Signed-off-by: Ying Xie * review comments --- ansible/roles/test/tasks/advanced-reboot.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ansible/roles/test/tasks/advanced-reboot.yml b/ansible/roles/test/tasks/advanced-reboot.yml index 44931ecc374..e9531d3793f 100644 --- a/ansible/roles/test/tasks/advanced-reboot.yml +++ b/ansible/roles/test/tasks/advanced-reboot.yml @@ -157,6 +157,26 @@ - include: ptf_runner_reboot.yml with_items: "{{ preboot_list }}" + # When new image is defined, test removed /host/config_db.json + # before warm rebooting. So after the device boots up, it will + # miss /etc/sonic/config_db.json. It is not an issue for the + # device to stay up. But it will be an issue when device reboot + # again (cold or fast). + - name: Save configuration after warm rebooting into new image + shell: config save -y + become: yes + when: + - new_sonic_image is defined + - reboot_type == "warm-reboot" + + - name: check /etc/sonic/config_db.json existence + stat: + path: /etc/sonic/config_db.json + register: stat_result + + - fail: msg="/etc/sonic/config_db.json is missing" + when: not stat_result.stat.exists + always: - name: Remove existing ip from ptf host script: roles/test/files/helpers/remove_ip.sh From b37eeab5ee30f60fa24360c8cf224d93cbb62312 Mon Sep 17 00:00:00 2001 From: Shuotian Cheng Date: Mon, 29 Apr 2019 17:39:03 -0700 Subject: [PATCH 047/218] [everflow]: Replace deprecated mirror_session.py file with CLI (#895) mirror_session.py file is deprecated; use config mirror_session command instead Signed-off-by: Shu0T1an ChenG --- .../test/files/helpers/mirror_session.py | 50 ------------------- .../tasks/everflow_testbed/apply_config.yml | 6 +-- .../tasks/everflow_testbed/del_config.yml | 6 +-- 3 files changed, 4 insertions(+), 58 deletions(-) delete mode 100644 ansible/roles/test/files/helpers/mirror_session.py diff --git a/ansible/roles/test/files/helpers/mirror_session.py b/ansible/roles/test/files/helpers/mirror_session.py deleted file mode 100644 index 04b3f4e393a..00000000000 --- a/ansible/roles/test/files/helpers/mirror_session.py +++ /dev/null @@ -1,50 +0,0 @@ -import click -from swsssdk import ConfigDBConnector - - -@click.group() -def cli(): - pass - - -@cli.command() -@click.argument('session_name', type=click.STRING, required=True) -@click.argument('src_ip', type=click.STRING, required=True) -@click.argument('dst_ip', type=click.STRING, required=True) -@click.argument('gre_type', type=click.STRING, required=True) -@click.argument('dscp', type=click.STRING, required=True) -@click.argument('ttl', type=click.STRING, required=True) -@click.argument('queue', type=click.STRING, required=True) -def create(session_name, src_ip, dst_ip, gre_type, dscp, ttl, queue): - """ - Create mirror session. - """ - configdb = ConfigDBConnector() - configdb.connect() - - session_info = { - "src_ip": src_ip, - "dst_ip": dst_ip, - "gre_type": gre_type, - "dscp": dscp, - "ttl": ttl, - "queue": queue - } - - configdb.set_entry("MIRROR_SESSION", session_name, session_info) - - -@cli.command() -@click.argument('session_name', type=click.STRING, required=False) -def delete(session_name): - """ - Delete mirror session. - """ - configdb = ConfigDBConnector() - configdb.connect() - - configdb.set_entry("MIRROR_SESSION", session_name, None) - - -if __name__ == "__main__": - cli() diff --git a/ansible/roles/test/tasks/everflow_testbed/apply_config.yml b/ansible/roles/test/tasks/everflow_testbed/apply_config.yml index dde5b04e9d7..c75e1b0337d 100644 --- a/ansible/roles/test/tasks/everflow_testbed/apply_config.yml +++ b/ansible/roles/test/tasks/everflow_testbed/apply_config.yml @@ -8,13 +8,11 @@ - name: Get session info. include: roles/test/tasks/everflow_testbed/get_session_info.yml -- name: Copy python script for session configuration. - copy: src=roles/test/files/helpers/mirror_session.py dest={{ run_dir }}/ - - name: Copy ACL rules configuration file. copy: src={{ tests_location }}/{{ testname}}/acl_rule_persistent.json dest={{ run_dir }}/ -- command: "python {{ run_dir }}/mirror_session.py create {{ session_name }} {{ session_src_ip }} {{ session_dst_ip }} {{ session_gre }} {{ session_dscp }} {{ session_ttl }} {{ session_queue }}" +- command: "config mirror_session add {{session_name}} {{session_src_ip}} {{session_dst_ip}} {{session_dscp}} {{session_ttl}} {{session_gre}} {{session_queue}}" + become: yes - command: "acl-loader update full {{ run_dir }}/acl_rule_persistent.json --session_name={{ session_name }}" become: yes diff --git a/ansible/roles/test/tasks/everflow_testbed/del_config.yml b/ansible/roles/test/tasks/everflow_testbed/del_config.yml index 07b688e1d3a..4bcff2f2d11 100644 --- a/ansible/roles/test/tasks/everflow_testbed/del_config.yml +++ b/ansible/roles/test/tasks/everflow_testbed/del_config.yml @@ -8,13 +8,11 @@ - name: Get session info. include: roles/test/tasks/everflow_testbed/get_session_info.yml -- name: Copy python script for session configuration. - copy: src=roles/test/files/helpers/mirror_session.py dest={{ run_dir }}/ - - name: Copy ACL rules configuration file. copy: src={{ tests_location }}/{{ testname}}/acl_rule_persistent-del.json dest={{ run_dir }}/ - command: "acl-loader update full {{ run_dir }}/acl_rule_persistent-del.json" become: yes -- command: "python {{ run_dir }}/mirror_session.py delete {{ session_name }}" +- command: "config mirror_session remove {{session_name}}" + become: yes From d61d21ce94a88c85269f169602f365cfb7ff16eb Mon Sep 17 00:00:00 2001 From: Shuotian Cheng Date: Mon, 20 May 2019 13:24:34 -0700 Subject: [PATCH 048/218] [everflow]: Change the test command from ip route to vtysh (#920) Due to the current issues with ip route change with FRR, change all the ip route commands to vtysh commands. Remove the current testcase_6 since it's overlapped with testcase_8. Signed-off-by: Shu0T1an ChenG --- .../everflow_testbed/get_session_info.yml | 2 +- .../test/tasks/everflow_testbed/run_test.yml | 6 +- .../tasks/everflow_testbed/testcase_1.yml | 4 +- .../tasks/everflow_testbed/testcase_2.yml | 14 +++-- .../tasks/everflow_testbed/testcase_3.yml | 11 ++-- .../tasks/everflow_testbed/testcase_4.yml | 4 +- .../tasks/everflow_testbed/testcase_5.yml | 4 +- .../tasks/everflow_testbed/testcase_6.yml | 33 ++++++++--- .../tasks/everflow_testbed/testcase_7.yml | 15 ++--- .../tasks/everflow_testbed/testcase_8.yml | 55 ------------------- 10 files changed, 58 insertions(+), 90 deletions(-) delete mode 100644 ansible/roles/test/tasks/everflow_testbed/testcase_8.yml diff --git a/ansible/roles/test/tasks/everflow_testbed/get_session_info.yml b/ansible/roles/test/tasks/everflow_testbed/get_session_info.yml index 6642472747e..a1fcd77c058 100644 --- a/ansible/roles/test/tasks/everflow_testbed/get_session_info.yml +++ b/ansible/roles/test/tasks/everflow_testbed/get_session_info.yml @@ -13,7 +13,7 @@ session_gre: "0x8949" when: sonic_hwsku in mellanox_hwskus -- debug: msg="session name {{ session_name }}" +- debug: msg=session name {{ session_name }} - set_fact: addr_1: "{{ session_dst_ip }}/24" diff --git a/ansible/roles/test/tasks/everflow_testbed/run_test.yml b/ansible/roles/test/tasks/everflow_testbed/run_test.yml index bc72ced85fd..5a8f5db46ad 100644 --- a/ansible/roles/test/tasks/everflow_testbed/run_test.yml +++ b/ansible/roles/test/tasks/everflow_testbed/run_test.yml @@ -12,7 +12,7 @@ minigraph_facts: host={{ inventory_hostname }} - name: Print neighbors in minigraph - debug: msg="{{ minigraph_neighbors }}" + debug: msg={{ minigraph_neighbors }} - set_fact: testname: everflow_testbed @@ -44,7 +44,7 @@ delegate_to: "{{ ptf_host }}" - name: Add route to unresolved next hop. - shell: ip route add {{ unresolved_nexthop_prefix }} dev {{ dst_port_2 }} + shell: vtysh -e "conf t" -e "ip route {{ unresolved_nexthop_prefix }} {{ dst_port_2 }}" become: yes - name: Run testcase 1 - Resolved route @@ -74,7 +74,7 @@ always: - name: Remove route to unresolved next hop. - shell: ip route del {{ unresolved_nexthop_prefix }} + shell: vtysh -e "conf t" -e "no ip route {{ unresolved_nexthop_prefix }} {{ dst_port_2 }}" become: yes - include: roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_1.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_1.yml index 1da85f49817..8decdab8395 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_1.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_1.yml @@ -2,7 +2,7 @@ # Verify that session with resolved route has active state. - name: Create route with next hop {{ dst_port_1 }}. - shell: ip route add {{ session_prefix_1 }} via {{ neighbor_info_1['addr'] }} + shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" become: yes - block: @@ -15,5 +15,5 @@ always: - name: Remove route - shell: ip route del {{ session_prefix_1 }} + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" become: yes diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_2.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_2.yml index ae69cf97db8..dab483f837d 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_2.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_2.yml @@ -3,7 +3,7 @@ - block: - name: Create route with next hop on {{ dst_port_1 }}. - shell: ip route add {{ session_prefix_1 }} via {{ neighbor_info_1['addr'] }} + shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' @@ -13,7 +13,7 @@ register: out - name: Create route with best match and unresolved next hop. - shell: ip route add {{ session_prefix_2 }} via {{ unresolved_nexthop }} + shell: vtysh -e "conf t" -e "ip route {{ session_prefix_2 }} {{ unresolved_nexthop }}" - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' @@ -23,7 +23,7 @@ register: out - name: Create route with best match prefix and resolved next hop on destination port {{ dst_port_2 }}. - shell: ip route change {{ session_prefix_2 }} via {{ neighbor_info_2['addr'] }} + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_2 }} {{ unresolved_nexthop }}" -e "ip route {{ session_prefix_2 }} {{ neighbor_info_2['addr'] }}" - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_2 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_2_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' @@ -35,10 +35,14 @@ always: - name: Remove route. - shell: ip route del {{ session_prefix_1 }} + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" ignore_errors: yes - name: Remove best match route. - shell: ip route del {{ session_prefix_2 }} + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_2 }} {{ unresolved_nexthop }}" + ignore_errors: yes + + - name: Remove best match route. + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_2 }} {{ neighbor_info_2['addr'] }}" ignore_errors: yes become: yes diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_3.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_3.yml index b5894488cb4..a96f371f827 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_3.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_3.yml @@ -3,7 +3,7 @@ - block: - name: Create route with next hop on {{ dst_port_1 }}. - shell: ip route add {{ session_prefix_1 }} via {{ neighbor_info_1['addr'] }} + shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' @@ -13,7 +13,7 @@ register: out - name: Create route with best match prefix and resolved next hop {{ dst_port_2 }}. - shell: ip route add {{ session_prefix_2 }} via {{ neighbor_info_2['addr'] }} + shell: vtysh -e "conf t" -e "ip route {{ session_prefix_2 }} {{ neighbor_info_2['addr'] }}" - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_2}}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_2_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' @@ -23,8 +23,7 @@ register: out - name: Remove best match route. - shell: ip route del {{ session_prefix_2 }} - ignore_errors: yes + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_2 }} {{ neighbor_info_2['addr'] }}" - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' @@ -36,10 +35,10 @@ always: - name: Remove route. - shell: ip route del {{ session_prefix_1 }} + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" ignore_errors: yes - name: Remove best match route. - shell: ip route del {{ session_prefix_2 }} + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_2 }} {{ neighbor_info_2['addr'] }}" ignore_errors: yes become: yes diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_4.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_4.yml index e67912cc5de..a8af1c15ed9 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_4.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_4.yml @@ -3,7 +3,7 @@ - block: - name: Create route with next hop on {{ dst_port_1 }}. - shell: ip route add {{ session_prefix_1 }} via {{ neighbor_info_1['addr'] }} + shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";expected_dst_mac="{{ neighbor_mac_1 }}";verbose=True' @@ -31,6 +31,6 @@ shell: ping {{ neighbor_info_1['addr'] }} -c3 - name: Remove route. - shell: ip route del {{ session_prefix_1 }} + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" ignore_errors: yes become: yes diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_5.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_5.yml index aa128dc3417..1e4e9c2ecc4 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_5.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_5.yml @@ -1,7 +1,7 @@ # Test case 5 - Resolved ECMP route. - name: Create ECMP route with next hops on {{ dst_port_1 }} and {{ dst_port_2 }}. - shell: ip route add {{ session_prefix_1 }} nexthop via {{ neighbor_info_1['addr'] }} via {{ neighbor_info_2['addr'] }} + shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_2['addr'] }}" become: yes - block: @@ -14,5 +14,5 @@ always: - name: Remove route - shell: ip route del {{ session_prefix_1 }} + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_2['addr'] }}" become: yes diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_6.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_6.yml index 4fafe2b658a..3839bc5ca2d 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_6.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_6.yml @@ -1,9 +1,9 @@ -# Test case 6 - ECMP route change (add next hop). -# Verify that insertion of additional next hop to ECMP group doesn't affects session DST MAC and port. +# Test case 8 - ECMP route change (remove next hop not used by session). +# Verify that after removal of next hop that was used by session from ECMP route session state is active. - block: - name: Create ECMP route with next hops on {{ dst_port_1 }} and {{ dst_port_2 }}. - shell: ip route add {{ session_prefix_1 }} nexthop via {{ neighbor_info_1['addr'] }} via {{ neighbor_info_2['addr'] }} + shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_2['addr'] }}" - name: Send traffic and verify that packets with correct Everflow header are received on {{ dst_port_1 }} or {{ dst_port_2 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}, {{ dst_port_2_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' @@ -12,8 +12,26 @@ delegate_to: "{{ ptf_host }}" register: out - - name: Add next hop on {{ dst_port_3 }} to ECMP route. - shell: ip route change {{ session_prefix_1 }} nexthop via {{ neighbor_info_1['addr'] }} via {{ neighbor_info_2['addr'] }} via {{ neighbor_info_3['addr'] }} + - name: Add next hop to ECMP route. + shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_3['addr'] }}" + + - name: Send traffic and verify that packets with correct Everflow header are received on {{ dst_port_1 }} or {{ dst_port_2 }}. + shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}, {{ dst_port_2_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' + args: + chdir: /root + delegate_to: "{{ ptf_host }}" + register: out + + - name: Send traffic and verify that packets are not received on {{ dst_port_3 }}. + shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_3_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' + args: + chdir: /root + delegate_to: "{{ ptf_host }}" + register: out + failed_when: out.rc == 0 + + - name: Delete next hop from ECMP route. + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_3['addr'] }}" - name: Send traffic and verify that packets with correct Everflow header are received on {{ dst_port_1 }} or {{ dst_port_2 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}, {{ dst_port_2_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' @@ -33,5 +51,6 @@ always: - name: Remove route - shell: ip route del {{ session_prefix_1 }} - become: yes + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_2['addr'] }}" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_3['addr'] }}" + ignore_errors: yes + become: yes diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_7.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_7.yml index 4c628e11292..c8f7cfae972 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_7.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_7.yml @@ -3,7 +3,7 @@ - block: - name: Create route with next hop on {{ dst_port_1 }}. - shell: ip route add {{ session_prefix_1 }} nexthop via {{ neighbor_info_1['addr'] }} + shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" - name: Send traffic and verify that packets with correct Everflow header are received on {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' @@ -13,7 +13,7 @@ register: out - name: Add next hops on {{ dst_port_2 }} and {{ dst_port_3 }} to route. - shell: ip route change {{ session_prefix_1 }} nexthop via {{ neighbor_info_1['addr'] }} via {{ neighbor_info_2['addr'] }} via {{ neighbor_info_3['addr'] }} + shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_2['addr'] }}" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_3['addr'] }}" - name: Send traffic and verify that packets with correct Everflow header are received on {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' @@ -30,8 +30,8 @@ register: out failed_when: out.rc == 0 - - name: Delete next hop from ECMP route. - shell: ip route change {{ session_prefix_1 }} nexthop via {{ neighbor_info_2['addr'] }} via {{ neighbor_info_3['addr'] }} + - name: Delete one next hop from ECMP route. + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" - name: Send traffic and verify that packets are not received {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' @@ -50,6 +50,7 @@ become: yes always: - - name: Remove route - shell: ip route del {{ session_prefix_1 }} - become: yes + - name: Remove route {{session_prefix_1}} + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_2['addr'] }}" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_3['addr'] }}" + ignore_errors: yes + become: yes diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml deleted file mode 100644 index ee99a10c219..00000000000 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml +++ /dev/null @@ -1,55 +0,0 @@ -# Test case 8 - ECMP route change (remove next hop not used by session). -# Verify that after removal of next hop that was used by session from ECMP route session state is active. - -- block: - - name: Create ECMP route with next hops on {{ dst_port_1 }} and {{ dst_port_2 }}. - shell: ip route add {{ session_prefix_1 }} nexthop via {{ neighbor_info_1['addr'] }} via {{ neighbor_info_2['addr'] }} - - - name: Send traffic and verify that packets with correct Everflow header are received on {{ dst_port_1 }} or {{ dst_port_2 }}. - shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}, {{ dst_port_2_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' - args: - chdir: /root - delegate_to: "{{ ptf_host }}" - register: out - - - name: Add next hop to ECMP route. - shell: ip route change {{ session_prefix_1 }} nexthop via {{ neighbor_info_1['addr'] }} via {{ neighbor_info_2['addr'] }} via {{ neighbor_info_3['addr'] }} - - - name: Send traffic and verify that packets with correct Everflow header are received on {{ dst_port_1 }} or {{ dst_port_2 }}. - shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}, {{ dst_port_2_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' - args: - chdir: /root - delegate_to: "{{ ptf_host }}" - register: out - - - name: Send traffic and verify that packets are not received on {{ dst_port_3 }}. - shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_3_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' - args: - chdir: /root - delegate_to: "{{ ptf_host }}" - register: out - failed_when: out.rc == 0 - - - name: Delete next hop from ECMP route. - shell: ip route change {{ session_prefix_1 }} nexthop via {{ neighbor_info_1['addr'] }} via {{ neighbor_info_2['addr'] }} - - - name: Send traffic and verify that packets with correct Everflow header are received on {{ dst_port_1 }} or {{ dst_port_2 }}. - shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}, {{ dst_port_2_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' - args: - chdir: /root - delegate_to: "{{ ptf_host }}" - register: out - - - name: Send traffic and verify that packets are not received on {{ dst_port_3 }}. - shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_3_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' - args: - chdir: /root - delegate_to: "{{ ptf_host }}" - register: out - failed_when: out.rc == 0 - become: yes - - always: - - name: Remove route - shell: ip route del {{ session_prefix_1 }} - become: yes From 9057b932b9d7b20aa068f935c6c6d21d2aeeaafe Mon Sep 17 00:00:00 2001 From: Shuotian Cheng Date: Mon, 20 May 2019 13:44:14 -0700 Subject: [PATCH 049/218] [everflow]: Remove deprecated tests (#923) --- ansible/roles/test/tasks/everflow_testbed/run_test.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/ansible/roles/test/tasks/everflow_testbed/run_test.yml b/ansible/roles/test/tasks/everflow_testbed/run_test.yml index 5a8f5db46ad..c0b129b0e56 100644 --- a/ansible/roles/test/tasks/everflow_testbed/run_test.yml +++ b/ansible/roles/test/tasks/everflow_testbed/run_test.yml @@ -69,9 +69,6 @@ - name: Run testcase 7 - ECMP route change (remove next hop used by session). include: roles/test/tasks/everflow_testbed/testcase_7.yml - - name: Run testcase 8 - ECMP route change (remove next hop not used by session). - include: roles/test/tasks/everflow_testbed/testcase_8.yml - always: - name: Remove route to unresolved next hop. shell: vtysh -e "conf t" -e "no ip route {{ unresolved_nexthop_prefix }} {{ dst_port_2 }}" From 922b2699c25a7e395748b93bd4b14a982ae190e4 Mon Sep 17 00:00:00 2001 From: Shuotian Cheng Date: Tue, 28 May 2019 15:42:03 -0700 Subject: [PATCH 050/218] [everflow]: Remove unused variables (#931) Signed-off-by: Shu0T1an ChenG --- .../roles/test/tasks/everflow_testbed/get_port_info.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/ansible/roles/test/tasks/everflow_testbed/get_port_info.yml b/ansible/roles/test/tasks/everflow_testbed/get_port_info.yml index a6d46508f80..1779766979f 100644 --- a/ansible/roles/test/tasks/everflow_testbed/get_port_info.yml +++ b/ansible/roles/test/tasks/everflow_testbed/get_port_info.yml @@ -20,10 +20,6 @@ with_dict: "{{ minigraph_neighbors }}" when: "'T0' in item.value.name" -- name: Sort tor ports by name. - set_fact: - tor_port: "{{ tor_ports|sort }}" - - name: Print tor ports debug: msg="{{ tor_ports }}" @@ -33,10 +29,6 @@ with_dict: "{{ minigraph_neighbors }}" when: "'T2' in item.value.name" -- name: Sort tor ports by name. - set_fact: - tor_port: "{{ spine_ports|sort }}" - - name: Print spine ports debug: msg="{{ spine_ports }}" From d0440a71bdac215e80469e14a156cc1b67ac15e0 Mon Sep 17 00:00:00 2001 From: Shuotian Cheng Date: Tue, 11 Jun 2019 14:25:27 -0700 Subject: [PATCH 051/218] [everflow]: Add pause after route change (#942) Stablize the test by adding pause after the route change Signed-off-by: Shu0T1an ChenG --- ansible/roles/test/tasks/everflow_testbed/testcase_1.yml | 3 +++ ansible/roles/test/tasks/everflow_testbed/testcase_2.yml | 9 +++++++++ ansible/roles/test/tasks/everflow_testbed/testcase_3.yml | 9 +++++++++ ansible/roles/test/tasks/everflow_testbed/testcase_4.yml | 6 ++++++ ansible/roles/test/tasks/everflow_testbed/testcase_5.yml | 3 +++ ansible/roles/test/tasks/everflow_testbed/testcase_6.yml | 9 +++++++++ ansible/roles/test/tasks/everflow_testbed/testcase_7.yml | 9 +++++++++ 7 files changed, 48 insertions(+) diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_1.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_1.yml index 8decdab8395..74c4142eef9 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_1.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_1.yml @@ -5,6 +5,9 @@ shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" become: yes +- pause: + seconds: 3 + - block: - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_2.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_2.yml index dab483f837d..84f414e1363 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_2.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_2.yml @@ -5,6 +5,9 @@ - name: Create route with next hop on {{ dst_port_1 }}. shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" + - pause: + seconds: 3 + - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' args: @@ -15,6 +18,9 @@ - name: Create route with best match and unresolved next hop. shell: vtysh -e "conf t" -e "ip route {{ session_prefix_2 }} {{ unresolved_nexthop }}" + - pause: + seconds: 3 + - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' args: @@ -25,6 +31,9 @@ - name: Create route with best match prefix and resolved next hop on destination port {{ dst_port_2 }}. shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_2 }} {{ unresolved_nexthop }}" -e "ip route {{ session_prefix_2 }} {{ neighbor_info_2['addr'] }}" + - pause: + seconds: 3 + - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_2 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_2_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' args: diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_3.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_3.yml index a96f371f827..0eefbcc3cf2 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_3.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_3.yml @@ -5,6 +5,9 @@ - name: Create route with next hop on {{ dst_port_1 }}. shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" + - pause: + seconds: 3 + - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' args: @@ -15,6 +18,9 @@ - name: Create route with best match prefix and resolved next hop {{ dst_port_2 }}. shell: vtysh -e "conf t" -e "ip route {{ session_prefix_2 }} {{ neighbor_info_2['addr'] }}" + - pause: + seconds: 3 + - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_2}}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_2_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' args: @@ -25,6 +31,9 @@ - name: Remove best match route. shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_2 }} {{ neighbor_info_2['addr'] }}" + - pause: + seconds: 3 + - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' args: diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_4.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_4.yml index a8af1c15ed9..b388ffdd7d6 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_4.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_4.yml @@ -5,6 +5,9 @@ - name: Create route with next hop on {{ dst_port_1 }}. shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" + - pause: + seconds: 3 + - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";expected_dst_mac="{{ neighbor_mac_1 }}";verbose=True' args: @@ -15,6 +18,9 @@ - name: Change neighbor MAC address. shell: ip neigh replace {{ neighbor_info_1['addr'] }} lladdr "00:11:22:33:44:55" nud permanent dev {{ dst_port_1 }} + - pause: + seconds: 3 + - name: Send traffic and verify that packets with correct Everflow header are received on destination port {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";expected_dst_mac="00:11:22:33:44:55";verbose=True' args: diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_5.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_5.yml index 1e4e9c2ecc4..fe953eacfc8 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_5.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_5.yml @@ -4,6 +4,9 @@ shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_2['addr'] }}" become: yes +- pause: + seconds: 3 + - block: - name: Send traffic and verify that packets with correct Everflow header are received on {{ dst_port_1 }} or {{ dst_port_2 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}, {{ dst_port_2_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_6.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_6.yml index 3839bc5ca2d..9445d648179 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_6.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_6.yml @@ -5,6 +5,9 @@ - name: Create ECMP route with next hops on {{ dst_port_1 }} and {{ dst_port_2 }}. shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_2['addr'] }}" + - pause: + seconds: 3 + - name: Send traffic and verify that packets with correct Everflow header are received on {{ dst_port_1 }} or {{ dst_port_2 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}, {{ dst_port_2_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' args: @@ -15,6 +18,9 @@ - name: Add next hop to ECMP route. shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_3['addr'] }}" + - pause: + seconds: 3 + - name: Send traffic and verify that packets with correct Everflow header are received on {{ dst_port_1 }} or {{ dst_port_2 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}, {{ dst_port_2_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' args: @@ -33,6 +39,9 @@ - name: Delete next hop from ECMP route. shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_3['addr'] }}" + - pause: + seconds: 3 + - name: Send traffic and verify that packets with correct Everflow header are received on {{ dst_port_1 }} or {{ dst_port_2 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}, {{ dst_port_2_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' args: diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_7.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_7.yml index c8f7cfae972..92ebb6d1f1e 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_7.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_7.yml @@ -5,6 +5,9 @@ - name: Create route with next hop on {{ dst_port_1 }}. shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" + - pause: + seconds: 3 + - name: Send traffic and verify that packets with correct Everflow header are received on {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' args: @@ -15,6 +18,9 @@ - name: Add next hops on {{ dst_port_2 }} and {{ dst_port_3 }} to route. shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_2['addr'] }}" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_3['addr'] }}" + - pause: + seconds: 3 + - name: Send traffic and verify that packets with correct Everflow header are received on {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' args: @@ -33,6 +39,9 @@ - name: Delete one next hop from ECMP route. shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" + - pause: + seconds: 3 + - name: Send traffic and verify that packets are not received {{ dst_port_1 }}. shell: ptf --test-dir acstests everflow_tb_test.EverflowTest --platform-dir ptftests --platform remote -t 'asic_type="{{ sonic_asic_type }}";hwsku="{{ sonic_hwsku }}";router_mac="{{ ansible_Ethernet0['macaddress'] }}";src_port="{{ src_port_ptf_id }}";dst_ports="{{ dst_port_1_ptf_id }}";session_src_ip="{{ session_src_ip }}";session_dst_ip="{{ session_dst_ip }}";session_ttl="{{ session_ttl }}";session_dscp="{{ session_dscp }}";verbose=True' args: From a3fb8ea5347db1b853fd65aefb68ea5f31f3a694 Mon Sep 17 00:00:00 2001 From: Shuotian Cheng Date: Tue, 25 Jun 2019 15:33:44 -0700 Subject: [PATCH 052/218] [EVERFLOW]: Add EVERFLOW policer test with DSCP value/mask (#932) Signed-off-by: Shu0T1an ChenG --- .../files/acstests/everflow_policer_test.py | 149 ++++++++++++++++++ .../tasks/everflow_testbed/get_port_info.yml | 10 +- .../test/tasks/everflow_testbed/run_test.yml | 3 + .../tasks/everflow_testbed/testcase_8.yml | 76 +++++++++ 4 files changed, 236 insertions(+), 2 deletions(-) create mode 100644 ansible/roles/test/files/acstests/everflow_policer_test.py create mode 100644 ansible/roles/test/tasks/everflow_testbed/testcase_8.yml diff --git a/ansible/roles/test/files/acstests/everflow_policer_test.py b/ansible/roles/test/files/acstests/everflow_policer_test.py new file mode 100644 index 00000000000..b09e30a74e5 --- /dev/null +++ b/ansible/roles/test/files/acstests/everflow_policer_test.py @@ -0,0 +1,149 @@ +''' +Description: This file contains the EVERFLOW policer test + +Usage: Examples of how to use: + ptf --test-dir acstests everflow_policer_test.EverflowPolicerTest --platform remote -t 'router_mac="00:02:03:04:05:00";src_port="20";dst_ports="21,22";verbose=True' --relax +''' + + +import ptf +import ptf.packet as scapy +import ptf.dataplane as dataplane +import ptf.testutils as testutils +from ptf.base_tests import BaseTest +from ptf.mask import Mask + +class EverflowPolicerTest(BaseTest): + + GRE_PROTOCOL_NUMBER = 47 + NUM_OF_TOTAL_PACKETS = 200 + + + def __init__(self): + ''' + @summary: constructor + ''' + BaseTest.__init__(self) + self.test_params = testutils.test_params_get() + + + def greFilter(self, pkt_str): + ''' + @summaty: Filter GRE packets + ''' + try: + pkt = scapy.Ether(pkt_str) + + if scapy.IP not in pkt: + return False + + return pkt[scapy.IP].proto == self.GRE_PROTOCOL_NUMBER + except: + return False + + + def setUp(self): + ''' + @summary: Setup the test + ''' + print "" + + self.dataplane = ptf.dataplane_instance + self.hwsku = self.test_params['hwsku'] + self.asic_type = self.test_params['asic_type'] + self.router_mac = self.test_params['router_mac'] + self.session_src_ip = "1.1.1.1" + self.session_dst_ip = "2.2.2.2" + self.session_ttl = 1 + self.session_dscp = 8 + self.src_port = int(self.test_params['src_port']) + self.dst_mirror_ports = [int(p) for p in self.test_params['dst_mirror_ports'].split(",") if p] + self.dst_ports = [int(p) for p in self.test_params['dst_ports'].split(",")] + + self.base_pkt = testutils.simple_tcp_packet( + eth_dst = self.router_mac, + eth_src = self.dataplane.get_mac(0, 0), + ip_src = "20.0.0.1", + ip_dst = "30.0.0.1", + tcp_sport = 0x1234, + tcp_dport = 0x50, + ip_dscp = 9, + ip_ttl = 64) + + def checkOriginalFlow(self): + """ + @summary: Send traffic & check how many original packets are received + @return: count: number of original packets received + """ + exp_pkt = self.base_pkt.copy() + exp_pkt['Ethernet'].src = self.router_mac + exp_pkt['IP'].ttl = self.base_pkt['IP'].ttl - 1 + + masked_exp_pkt = Mask(exp_pkt) + masked_exp_pkt.set_do_not_care_scapy(scapy.Ether, "dst") + + self.dataplane.flush() + + count = 0 + for i in range(0, self.NUM_OF_TOTAL_PACKETS): + testutils.send_packet(self, self.src_port, self.base_pkt) + (rcv_device, rcv_port, rcv_pkt, pkt_time) = testutils.dp_poll(self, timeout=0.1, exp_pkt=masked_exp_pkt) + if rcv_pkt is not None: + count += 1 + elif count == 0: + print "The first original packet is not recieved" + assert False # Fast failure without waiting for full iteration + print "Recieved " + str(count) + " original packets" + return count + + def checkMirroredFlow(self): + """ + @summary: Send traffic & check how many mirrored packets are received + @return: count: number of mirrored packets received + """ + exp_pkt = testutils.simple_gre_packet( + eth_src = self.router_mac, + ip_src = self.session_src_ip, + ip_dst = self.session_dst_ip, + ip_dscp = self.session_dscp, + ip_id = 0, + #ip_flags = 0x10, # need to upgrade ptf version to support it + ip_ttl = self.session_ttl, + inner_frame = self.base_pkt) + + exp_pkt['GRE'].proto = 0x88be + + masked_exp_pkt = Mask(exp_pkt) + masked_exp_pkt.set_do_not_care_scapy(scapy.Ether, "dst") + masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "flags") + masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "chksum") + + self.dataplane.flush() + + count = 0 + for i in range(0,self.NUM_OF_TOTAL_PACKETS): + testutils.send_packet(self, self.src_port, self.base_pkt) + (rcv_device, rcv_port, rcv_pkt, pkt_time) = testutils.dp_poll(self, timeout=0.1, exp_pkt=masked_exp_pkt) + if rcv_pkt is not None: + count += 1 + elif count == 0: + print "The first mirrored packet is not recieved" + assert False # Fast failure without waiting for full iteration + print "Received " + str(count) + " mirrored packets after rate limiting" + return count + + + def runTest(self): + """ + @summary: Run EVERFLOW Policer Test + """ + + # Send traffic and verify the original traffic is not rate limited + count = self.checkOriginalFlow() + assert count == self.NUM_OF_TOTAL_PACKETS + + testutils.add_filter(self.greFilter) + + # Send traffic and verify the mirroed traffic is rate limited + count = self.checkMirroredFlow() + assert count > 100 and count < self.NUM_OF_TOTAL_PACKETS # cbs = cir = 100 diff --git a/ansible/roles/test/tasks/everflow_testbed/get_port_info.yml b/ansible/roles/test/tasks/everflow_testbed/get_port_info.yml index 1779766979f..01851ded32e 100644 --- a/ansible/roles/test/tasks/everflow_testbed/get_port_info.yml +++ b/ansible/roles/test/tasks/everflow_testbed/get_port_info.yml @@ -5,6 +5,7 @@ set_fact: tor_ports: [] spine_ports: [] + spine_ptf_ports: [] dst_port_1_is_lag_member: "" dst_port_1_ptf_id: "" dst_port_2: "" @@ -21,7 +22,7 @@ when: "'T0' in item.value.name" - name: Print tor ports - debug: msg="{{ tor_ports }}" + debug: msg={{ tor_ports }} - name: Get spine ports set_fact: @@ -30,7 +31,12 @@ when: "'T2' in item.value.name" - name: Print spine ports - debug: msg="{{ spine_ports }}" + debug: msg={{ spine_ports }} + +- name: Define spine PTF ports + set_fact: + spine_ptf_ports: "{{ spine_ptf_ports + [minigraph_port_indices[item] | string] }}" + with_items: "{{ spine_ports }}" - name: Define SRC port variables. set_fact: diff --git a/ansible/roles/test/tasks/everflow_testbed/run_test.yml b/ansible/roles/test/tasks/everflow_testbed/run_test.yml index c0b129b0e56..847a675333d 100644 --- a/ansible/roles/test/tasks/everflow_testbed/run_test.yml +++ b/ansible/roles/test/tasks/everflow_testbed/run_test.yml @@ -69,6 +69,9 @@ - name: Run testcase 7 - ECMP route change (remove next hop used by session). include: roles/test/tasks/everflow_testbed/testcase_7.yml + - name: Run testcase 8 - Policer enforced with DSCP value/mask + include: roles/test/tasks/everflow_testbed/testcase_8.yml + always: - name: Remove route to unresolved next hop. shell: vtysh -e "conf t" -e "no ip route {{ unresolved_nexthop_prefix }} {{ dst_port_2 }}" diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml new file mode 100644 index 00000000000..364dfee4dfe --- /dev/null +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml @@ -0,0 +1,76 @@ +# Test case 8 - Policer enforced DSCP value/mask test + +- set_fact: + policer_name: TEST_POLICER + policer_session_name: TEST_POLICER_SESSION + dscp_table_name: EVERFLOW_DSCP + +- name: Create route with next hop {{ dst_port_1 }}. + shell: vtysh -e "conf t" -e "ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" + become: yes + +- block: + - name: Create a policer + shell: | + redis-cli -n 4 hmset "POLICER|{{policer_name}}" "meter_type" "packets" "mode" "sr_tcm" "cir" "100" "cbs" "100" "red_packet_action" "drop" + become: yes + + - name: Create a policer enforced mirror session + shell: | + config mirror_session add {{policer_session_name}} {{session_src_ip}} {{session_dst_ip}} {{session_dscp}} {{session_ttl}} --policer {{policer_name}} + become: yes + + - name: Create an ACL table with MIRROR_DSCP type + shell: config acl add table {{dscp_table_name}} "MIRROR_DSCP" --description "EVERFLOW_TEST" + become: yes + + - name: Create a rule with DSCP value and mask + shell: | + redis-cli -n 4 hmset "ACL_RULE|{{dscp_table_name}}|RULE_1" "PRIORITY" "9999" "MIRROR_ACTION" "{{policer_session_name}}" "DSCP" "8/56" + become: yes + + - name: "Start PTF runner" + include: roles/test/tasks/ptf_runner.yml + vars: + ptf_test_name: EVERFLOW Policer Test + ptf_test_dir: acstests + ptf_test_path: everflow_policer_test.EverflowPolicerTest + ptf_platform: remote + ptf_platform_dir: ptftests + ptf_test_params: + - asic_type='{{sonic_asic_type}}' + - hwsku='{{sonic_hwsku}}' + - router_mac='{{ansible_Ethernet0['macaddress']}}' + - src_port='{{src_port_ptf_id}}' + - dst_ports='{{",".join((spine_ptf_ports))}}' + - dst_mirror_ports='{{dst_port_1_ptf_id}}' + ptf_extra_options: "--relax --debug info" + + always: + - name: Remove route + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" + ignore_errors: yes + become: yes + + - name: Create a policer + shell: | + redis-cli -n 4 del "POLICER|{{policer_name}}" + ignore_errors: yes + become: yes + + - name: Create a policer enforced mirror session + shell: | + config mirror_session remove {{policer_session_name}} + ignore_errors: yes + become: yes + + - name: Create an ACL table with MIRROR_DSCP type + shell: config acl remove table {{dscp_table_name}} + ignore_errors: yes + become: yes + + - name: Create a rule with DSCP value and mask + shell: | + redis-cli -n 4 del "ACL_RULE|{{dscp_table_name}}|RULE_1" + ignore_errors: yes + become: yes From 22a1721b75d2068b09cd13dd46ce7eb20b7ce6bc Mon Sep 17 00:00:00 2001 From: Shuotian Cheng Date: Fri, 5 Jul 2019 10:37:13 -0700 Subject: [PATCH 053/218] [everflow]: Fix the tearing down procedure order (#988) Signed-off-by: Shu0T1an ChenG --- .../tasks/everflow_testbed/testcase_8.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml index 364dfee4dfe..de87836dc3b 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml @@ -52,25 +52,26 @@ ignore_errors: yes become: yes - - name: Create a policer + - name: Remove the rule with DSCP value and mask shell: | - redis-cli -n 4 del "POLICER|{{policer_name}}" + redis-cli -n 4 del "ACL_RULE|{{dscp_table_name}}|RULE_1" ignore_errors: yes become: yes - - name: Create a policer enforced mirror session - shell: | - config mirror_session remove {{policer_session_name}} + - name: Remove the ACL table with MIRROR_DSCP type + shell: config acl remove table {{dscp_table_name}} ignore_errors: yes become: yes - - name: Create an ACL table with MIRROR_DSCP type - shell: config acl remove table {{dscp_table_name}} + - name: Remove the policer enforced mirror session + shell: | + config mirror_session remove {{policer_session_name}} ignore_errors: yes become: yes - - name: Create a rule with DSCP value and mask + - name: Remove policer shell: | - redis-cli -n 4 del "ACL_RULE|{{dscp_table_name}}|RULE_1" + redis-cli -n 4 del "POLICER|{{policer_name}}" ignore_errors: yes become: yes + From 5da3759c0fd911df5e78859b76adbe31da2db009 Mon Sep 17 00:00:00 2001 From: Neetha John Date: Mon, 15 Jul 2019 20:35:37 -0700 Subject: [PATCH 054/218] Add test parameter for increasing sniff time (#1011) --- ansible/roles/test/files/ptftests/advanced-reboot.py | 3 ++- ansible/roles/test/tasks/advanced-reboot.yml | 1 + ansible/roles/test/tasks/ptf_runner_reboot.yml | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 71943b390d0..89681ed658e 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -145,6 +145,7 @@ def __init__(self): self.check_param('preboot_files', None, required = False) self.check_param('preboot_oper', None, required = False) self.check_param('allow_vlan_flooding', False, required = False) + self.check_param('sniff_time_incr', 60, required = False) if not self.test_params['preboot_oper'] or self.test_params['preboot_oper'] == 'None': self.test_params['preboot_oper'] = None @@ -917,7 +918,7 @@ def sniff_in_background(self, wait = None): The native scapy.snif() is used as a background thread, to allow delayed start for the send_in_background(). """ if not wait: - wait = self.time_to_listen + 60 + wait = self.time_to_listen + self.test_params['sniff_time_incr'] sniffer_start = datetime.datetime.now() self.log("Sniffer started at %s" % str(sniffer_start)) sniff_filter = "tcp and tcp dst port 5000 and tcp src port 1234 and not icmp" diff --git a/ansible/roles/test/tasks/advanced-reboot.yml b/ansible/roles/test/tasks/advanced-reboot.yml index e9531d3793f..04707dd0d32 100644 --- a/ansible/roles/test/tasks/advanced-reboot.yml +++ b/ansible/roles/test/tasks/advanced-reboot.yml @@ -144,6 +144,7 @@ stay_in_target_image: "{{ stay_in_target_image | default('false') | bool }}" cleanup_old_sonic_images: "{{ cleanup_old_sonic_images | default('false') | bool }}" allow_vlan_flooding: "{{ allow_vlan_flooding | default('false') | bool }}" + sniff_time_incr: "{{ sniff_time_incr | default(60) | int }}" - include: advanced_reboot/reboot-image-handle.yml when: new_sonic_image is defined diff --git a/ansible/roles/test/tasks/ptf_runner_reboot.yml b/ansible/roles/test/tasks/ptf_runner_reboot.yml index 09ac5a61c94..7966f81a31e 100644 --- a/ansible/roles/test/tasks/ptf_runner_reboot.yml +++ b/ansible/roles/test/tasks/ptf_runner_reboot.yml @@ -25,6 +25,7 @@ - preboot_files='{{ preboot_files }}' - preboot_oper='{{ item }}' - allow_vlan_flooding='{{ allow_vlan_flooding }}' + - sniff_time_incr={{ sniff_time_incr }} always: From 54af304a2d85f1a148e93220472457999ffa9522 Mon Sep 17 00:00:00 2001 From: chitra-raghavan <32665166+chitra-raghavan@users.noreply.github.com> Date: Thu, 21 Mar 2019 23:30:55 +0530 Subject: [PATCH 055/218] Add tests for interface naming mode (#716) --- ansible/library/show_interface.py | 166 ++++++++++++++++++ .../roles/test/tasks/iface_naming_mode.yml | 41 +++++ .../test/tasks/iface_naming_mode/add_user.yml | 9 + .../iface_naming_mode/check_userifmode.yml | 25 +++ .../iface_naming_mode_tests.yml | 72 ++++++++ .../iface_naming_mode/interface_config.yml | 79 +++++++++ .../test/tasks/iface_naming_mode/show_acl.yml | 19 ++ .../test/tasks/iface_naming_mode/show_arp.yml | 28 +++ .../iface_naming_mode/show_interface.yml | 45 +++++ .../tasks/iface_naming_mode/show_ip_route.yml | 66 +++++++ .../iface_naming_mode/show_pfc_counters.yml | 32 ++++ .../iface_naming_mode/show_portchannel.yml | 24 +++ .../iface_naming_mode/show_queue_counters.yml | 36 ++++ .../vars/iface_naming_vars.yml | 3 + ansible/roles/test/vars/testcases.yml | 8 +- 15 files changed, 652 insertions(+), 1 deletion(-) create mode 100644 ansible/library/show_interface.py create mode 100644 ansible/roles/test/tasks/iface_naming_mode.yml create mode 100644 ansible/roles/test/tasks/iface_naming_mode/add_user.yml create mode 100644 ansible/roles/test/tasks/iface_naming_mode/check_userifmode.yml create mode 100644 ansible/roles/test/tasks/iface_naming_mode/iface_naming_mode_tests.yml create mode 100644 ansible/roles/test/tasks/iface_naming_mode/interface_config.yml create mode 100644 ansible/roles/test/tasks/iface_naming_mode/show_acl.yml create mode 100644 ansible/roles/test/tasks/iface_naming_mode/show_arp.yml create mode 100644 ansible/roles/test/tasks/iface_naming_mode/show_interface.yml create mode 100644 ansible/roles/test/tasks/iface_naming_mode/show_ip_route.yml create mode 100644 ansible/roles/test/tasks/iface_naming_mode/show_pfc_counters.yml create mode 100644 ansible/roles/test/tasks/iface_naming_mode/show_portchannel.yml create mode 100644 ansible/roles/test/tasks/iface_naming_mode/show_queue_counters.yml create mode 100644 ansible/roles/test/tasks/iface_naming_mode/vars/iface_naming_vars.yml diff --git a/ansible/library/show_interface.py b/ansible/library/show_interface.py new file mode 100644 index 00000000000..cf6e2d91dda --- /dev/null +++ b/ansible/library/show_interface.py @@ -0,0 +1,166 @@ +#!/usr/bin/python + +from ansible.module_utils.basic import * +import time +import re + +DOCUMENTATION = ''' +module: show_interface.py +version_added: 2.0.0.2 +Short_description: Retrieve the show interface status and show interface counter output values +Description: + - Retrieve the show interface status and show interface counter output values + and inserted into ansible_facts + +options: + - command: + Description: Show interface command( counter/status) + Required: True + - interfaces: + Description: Interfaces for which the facts to be gathered. By default It will gather facts for all interfaces + Required: False +''' + +EXAMPLES = ''' + # Get show interface status + - show_interface: comamnd='status' + + # Get show interface status of interface Ethernet0 + - show_interface: comamnd='status' interfaces='Ethernet0' + + # Get show interface counter + - show_interface: comamnd='counter' interface='Ethernet4' + +''' + +RETURN = ''' + ansible_facts: + int_status:{ + "Ethernet0":{ + "name": "Ethernet0" + "speed": "40G" + "alias": "fortyGigE1/1/1" + "oper_state": "down" + "admin_state": "up" + } + } + ansible_facts: + int_counter:{ + "Ethernet0":{ + 'IFACE' : "Ethernet0" + 'STATE' : "U" + 'RX_OK' : "25000" + 'RX_DRP' : "3456" + 'RX_OVR' : "0" + 'TX_OK' : "5843" + 'TX_ERR' : "0" + 'TX_DRP' : "0" + 'TX_OVR' : "0" + +''' + + +class ShowInterfaceModule(object): + def __init__(self): + self.module = AnsibleModule( + argument_spec=dict( + command=dict(required=True, type='str'), + interfaces=dict(required=False, type='list', default=None), + ), + supports_check_mode=False) + self.m_args = self.module.params + self.out = None + self.facts = {} + return + + def run(self): + """ + Main method of the class + """ + if self.m_args['command'] == 'status': self.collect_interface_status() + if self.m_args['command'] == 'counter': self.collect_interface_counter() + self.module.exit_json(ansible_facts=self.facts) + + def collect_interface_status(self): + regex_int = re.compile(r'(\S+)\s+[\d,]+\s+(\w+)\s+(\d+)\s+([\w\/]+)\s+(\w+)\s+(\w+)') + self.int_status = {} + if self.m_args['interfaces'] is not None: + for interface in self.m_args['interfaces']: + self.int_status[interface] = {} + command = 'sudo show interface status ' + interface + try: + rc, self.out, err = self.module.run_command(command, executable='/bin/bash', use_unsafe_shell=True) + for line in self.out.split("\n"): + line = line.strip() + if regex_int.match(line): + self.int_status[interface]['name'] = regex_int.match(line).group(1) + self.int_status[interface]['speed'] = regex_int.match(line).group(2) + self.int_status[interface]['alias'] = regex_int.match(line).group(4) + self.int_status[interface]['oper_state'] = regex_int.match(line).group(5) + self.int_status[interface]['admin_state'] = regex_int.match(line).group(6) + self.facts['int_status'] = self.int_status + except Exception as e: + self.module.fail_json(msg=str(e)) + if rc != 0: + self.module.fail_json(msg="Command failed rc=%d, out=%s, err=%s" % (rc, self.out, err)) + else: + try: + rc, self.out, err = self.module.run_command('show interface status', executable='/bin/bash', use_unsafe_shell=True) + for line in self.out.split("\n"): + line = line.strip() + if regex_int.match(line): + interface = regex_int.match(line).group(1) + self.int_status[interface] = {} + self.int_status[interface]['name'] = interface + self.int_status[interface]['speed'] = regex_int.match(line).group(2) + self.int_status[interface]['alias'] = regex_int.match(line).group(4) + self.int_status[interface]['oper_state'] = regex_int.match(line).group(5) + self.int_status[interface]['admin_state'] = regex_int.match(line).group(6) + self.facts['int_status'] = self.int_status + except Exception as e: + self.module.fail_json(msg=str(e)) + if rc != 0: + self.module.fail_json(msg="Command failed rc = %d, out = %s, err = %s" % (rc, self.out, err)) + + return + + def collect_interface_counter(self): + regex_int = re.compile(r'(\S+)\s+(\w)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)') + self.int_counter = {} + try: + rc, self.out, err = self.module.run_command('show interface counter', executable='/bin/bash', use_unsafe_shell=True) + for line in self.out.split("\n"): + line = line.strip() + if regex_int.match(line): + interface = regex_int.match(line).group(1) + self.int_counter[interface] = {} + self.int_counter[interface]['IFACE'] = interface + self.int_counter[interface]['STATE'] = regex_int.match(line).group(2) + self.int_counter[interface]['RX_OK'] = regex_int.match(line).group(3) + self.int_counter[interface]['RX_BPS'] = regex_int.match(line).group(4) + self.int_counter[interface]['RX_UTIL'] = regex_int.match(line).group(5) + self.int_counter[interface]['RX_ERR'] = regex_int.match(line).group(6) + self.int_counter[interface]['RX_DRP'] = regex_int.match(line).group(7) + self.int_counter[interface]['RX_OVR'] = regex_int.match(line).group(8) + self.int_counter[interface]['TX_OK'] = regex_int.match(line).group(9) + self.int_counter[interface]['TX_BPS'] = regex_int.match(line).group(10) + self.int_counter[interface]['TX_UTIL'] = regex_int.match(line).group(11) + self.int_counter[interface]['TX_ERR'] = regex_int.match(line).group(12) + self.int_counter[interface]['TX_DRP'] = regex_int.match(line).group(13) + self.int_counter[interface]['TX_OVR'] = regex_int.match(line).group(14) + except Exception as e: + self.module.fail_json(msg=str(e)) + if rc != 0: + self.module.fail_json(msg="Command failed rc=%d, out=%s, err=%s" % (rc, self.out, err)) + self.facts['int_counter'] = self.int_counter + return + + +def main(): + ShowInt = ShowInterfaceModule() + ShowInt.run() + return + +if __name__ == "__main__": + main() + diff --git a/ansible/roles/test/tasks/iface_naming_mode.yml b/ansible/roles/test/tasks/iface_naming_mode.yml new file mode 100644 index 00000000000..3c298a0d9f7 --- /dev/null +++ b/ansible/roles/test/tasks/iface_naming_mode.yml @@ -0,0 +1,41 @@ +# Tests for interface_naming_mode feature +- include_vars: roles/test/tasks/iface_naming_mode/vars/iface_naming_vars.yml + +- name: Gathering minigraph facts about the device + minigraph_facts: host={{inventory_hostname}} + +- name: find interface name mapping + port_alias: hwsku="{{hwsku}}" + +# Get the default interface names list +- set_fact: + default_interfaces: "{{port_name_map | list}}" + +#Get the configured ports which are up from minigraph facts and get its alias name +- set_fact: + up_ports: "{{minigraph_ports | list}}" + +- set_fact: + upport_alias_list: "{{minigraph_ports.values()| map(attribute='alias') | list }}" + +#Sample Test interface name and its alias +- set_fact: + interface: "{{minigraph_ports | sort | first}}" +- set_fact: + interface_alias: "{{port_name_map[interface]}}" + +############################################################# +######################## START OF TESTS ##################### +############################################################# + +# All tests run for user guest in alias mode as well as in default mode + +- name: Test Interface naming mode feature in alias mode + include: "roles/test/tasks/iface_naming_mode/iface_naming_mode_tests.yml" + vars: + mode: alias + +- name: Test Interface naming mode feature in default mode + include: "roles/test/tasks/iface_naming_mode/iface_naming_mode_tests.yml" + vars: + mode: default diff --git a/ansible/roles/test/tasks/iface_naming_mode/add_user.yml b/ansible/roles/test/tasks/iface_naming_mode/add_user.yml new file mode 100644 index 00000000000..02fb3cab990 --- /dev/null +++ b/ansible/roles/test/tasks/iface_naming_mode/add_user.yml @@ -0,0 +1,9 @@ +- name: create user + user: + name: "{{uname1}}" + groups: sudo + state: present + shell: /bin/bash + +- name: Set password for user + shell: "echo {{uname1}}:{{upasswd1}} | sudo chpasswd" diff --git a/ansible/roles/test/tasks/iface_naming_mode/check_userifmode.yml b/ansible/roles/test/tasks/iface_naming_mode/check_userifmode.yml new file mode 100644 index 00000000000..905b1e7e308 --- /dev/null +++ b/ansible/roles/test/tasks/iface_naming_mode/check_userifmode.yml @@ -0,0 +1,25 @@ +#When the interface naming mode is set,it is written as environment variable in bashrc file. +#The device need to be logout and login for the actual environment variable to take effect. +# As the ansible work in non interactive mode, it doesnt read the environmental varaiable set in bashrc file. Hence as a workaround the variable is extracted through check_userifmode.yml and manually set the variable 'SONIC_CLI_IFACE_MODE' to take effect. + + +- name: Extract the "SONIC_CLI_IFACE_MODE" value from bashrc file + shell: "cat /home/{{uname1}}/.bashrc | grep SONIC_CLI_IFACE_MODE" + args: + executable: /bin/bash + register: envout + +#extract the environmental variable and save it in the variable 'ifmode_env' +- set_fact: + ifmode_env: "{{envout.stdout}}" +- set_fact: ifmode="{{ifmode_env.split('=')[1]}}" + +- debug: msg="Interface mode is set to '{{ifmode}}'" + +- command: show interfaces naming_mode + register: naming_mode + environment: + SONIC_CLI_IFACE_MODE: "{{ifmode}}" + +- name: check the interface mode is properly set to {{mode}} + assert: {that: "'{{ifmode}}'=='{{mode}}' and '{{naming_mode.stdout}}' == '{{mode}}'"} diff --git a/ansible/roles/test/tasks/iface_naming_mode/iface_naming_mode_tests.yml b/ansible/roles/test/tasks/iface_naming_mode/iface_naming_mode_tests.yml new file mode 100644 index 00000000000..cc0f13b622b --- /dev/null +++ b/ansible/roles/test/tasks/iface_naming_mode/iface_naming_mode_tests.yml @@ -0,0 +1,72 @@ +- name: Create normal guest user + include: "roles/test/tasks/iface_naming_mode/add_user.yml" + +- name: set interface naming mode to {{mode}} mode + shell : sudo config interface_naming_mode {{mode}} + register: out + failed_when: out.rc != 0 + become_user: '{{uname1}}' + become: yes + +# Check whether the interface mode is set properly in bashrc file +- include: roles/test/tasks/iface_naming_mode/check_userifmode.yml + +- set_fact: + intf: "{{interface_alias if (mode=='alias') else interface}}" + +############################################################# +######################## START OF TESTS ##################### +############################################################# + +# All tests run for user guest in alias mode as well as in default mode +#Below set of testcases will run for all topologies +- block: + - name: Test show pfc counters output in {{mode}} mode + include: "roles/test/tasks/iface_naming_mode/show_pfc_counters.yml" + + - name: Test show queue counters output in {{mode}} mode + include: "roles/test/tasks/iface_naming_mode/show_queue_counters.yml" + + - name: Test show interface status, counter,description,summary output in {{mode}} mode + include: "roles/test/tasks/iface_naming_mode/show_interface.yml" + + - name: Test config interface in {{mode}} mode + include: "roles/test/tasks/iface_naming_mode/interface_config.yml" + + become_user: '{{uname1}}' + become: yes + +#Test to be run in T1 topology +- block: + - name: Test show arp output in {{mode}} mode + include: "roles/test/tasks/iface_naming_mode/show_arp.yml" + + - name: Test show acl output in {{mode}} mode + include: "roles/test/tasks/iface_naming_mode/show_acl.yml" + + - name: Test show ip/ipv6 route in {{mode}} mode + include: "roles/test/tasks/iface_naming_mode/show_ip_route.yml" + + when: testbed_type in ['t1'] + become_user: '{{uname1}}' + become: yes + +# Test to be run in t0 topology +- block: + + - name: verify show portchannel interface output in {{mode}} mode + include: "roles/test/tasks/iface_naming_mode/show_portchannel.yml" + + become_user: '{{uname1}}' + become: yes + when: testbed_type in ['t0', 't0-64', 't0-64-32', 't0-116', ] + +- always: + + - name: Remove the user + user: + name: "{{uname1}}" + groups: sudo + state: absent + shell: /bin/bash + remove: yes diff --git a/ansible/roles/test/tasks/iface_naming_mode/interface_config.yml b/ansible/roles/test/tasks/iface_naming_mode/interface_config.yml new file mode 100644 index 00000000000..100b21180de --- /dev/null +++ b/ansible/roles/test/tasks/iface_naming_mode/interface_config.yml @@ -0,0 +1,79 @@ + +#set the test interface according to default or alias mode +- set_fact: + intf: "{{interface_alias if (mode=='alias') else interface}}" + +- set_fact: + native_speed: "{{port_speed[interface_alias] if (port_speed | length != 0) else iface_speed}}" + +- block: + + - name: shutdown the interface {{intf}} in {{mode}} mode + shell: sudo config interface {{intf}} shutdown + register: out + failed_when: out.rc != 0 + + - pause: seconds=3 + + - name: Get interface status + show_interface: command="status" interfaces={{intf}} + + - pause: seconds=3 + + - name: Check whether the status is down + assert: {that: "'{{int_status[intf]['admin_state']}}' == 'down'"} + + - name: Bringup the interface {{intf}} in {{mode}} mode + shell: sudo config interface {{intf}} startup + register: out + failed_when: out.rc != 0 + + - pause: seconds=3 + + - name: Get interface status + show_interface: command="status" interfaces="{{intf}}" + + - name: Check whether the status is up + assert: {that: "'{{int_status[intf]['admin_state']}}' == 'up'"} + +# check the config interface speed + + - name: configure interface speed to 10G in {{mode}} mode + shell: sudo config interface {{intf}} speed 10000 + register: out + failed_when: out.rc != 0 + + - name: get the interface speed + shell: sudo redis-cli -n 4 HGET "PORT|{{interface}}" 'speed' + register: speed + + - debug: var=speed + + - name: Check whether the speed is set to 10G + assert: {that: "'{{speed.stdout}}' == '10000'"} + + - name: chamge interface speed to native speed and check + shell: sudo config interface {{intf}} speed {{native_speed}} + register: out + failed_when: out.rc != 0 + + - name: get the interface speed + shell: sudo redis-cli -n 4 HGET "PORT|{{interface}}" 'speed' + register: speed + + - name: Check whether the speed is set to native speed + assert: {that: "'{{speed.stdout}}' == '{{native_speed}}'"} + +# As the ansible work in non interactive mode, it doesnt read the environmental varaiable set in bashrc file. Hence as a workaround , the variable is extracted through check_userifmode.yml and manually set the variable 'SONIC_CLI_IFACE_MODE' to take effect. + + environment: + SONIC_CLI_IFACE_MODE: "{{ifmode}}" + + always: + + - name: set the interface up + shell: sudo config interface {{intf}} startup + + - name: change interface speed to native speed and check + shell: sudo config interface {{intf}} speed {{native_speed}} + diff --git a/ansible/roles/test/tasks/iface_naming_mode/show_acl.yml b/ansible/roles/test/tasks/iface_naming_mode/show_acl.yml new file mode 100644 index 00000000000..5b6837eca4c --- /dev/null +++ b/ansible/roles/test/tasks/iface_naming_mode/show_acl.yml @@ -0,0 +1,19 @@ +# As the ansible work in non interactive mode, it doesnt read the environmental varaiable set in bashrc file. Hence as a workaround, the variable is extracted through check_userifmode.yml and manually set the variable 'SONIC_CLI_IFACE_MODE' to take effect. + +- name: verify show acl table output + shell: show acl table DATAACL + register: acl_table + environment: + SONIC_CLI_IFACE_MODE: "{{ifmode}}" + +- debug: var=acl_table.stdout_lines + +- name: check acl table output shows default interface names when mode is default + assert: {that: item in acl_table.stdout} + with_items: minigraph_acls['DataAcl'] + when: mode=='default' and item not in minigraph_portchannels + +- name: check acl table output shows alias interface names when mode is set to alias + assert: {that: " '{{port_name_map[item]}}' in acl_table.stdout"} + with_items: minigraph_acls['DataAcl'] + when: mode=='alias' and item not in minigraph_portchannels diff --git a/ansible/roles/test/tasks/iface_naming_mode/show_arp.yml b/ansible/roles/test/tasks/iface_naming_mode/show_arp.yml new file mode 100644 index 00000000000..1160c90f1a8 --- /dev/null +++ b/ansible/roles/test/tasks/iface_naming_mode/show_arp.yml @@ -0,0 +1,28 @@ +- name: get arp facts + switch_arptable: + +- debug: var=arptable['v4'] + +# As the ansible work in non interactive mode, it doesnt read the environmental varaiable set in bashrc file. Hence as a workaround, the variable is extracted through check_userifmode.yml and manually set the variable 'SONIC_CLI_IFACE_MODE' to take effect. + +- name: Get the output of show arp command in {{mode}} mode + shell: show arp + register: arp_output + environment: + SONIC_CLI_IFACE_MODE: "{{ifmode}}" + +- debug: var=arp_output + +- name: Check the output shows default interface names corresponding to the arp + assert: + that: + - arp_output.stdout | search("{{item}}.*\s+{{arptable['v4'][item]['interface']}}") + with_items: arptable['v4'] + when: arptable['v4'][item]['interface']!='eth0' and mode=='default' + +- name: Check the output shows alias interface names corresponding to the arp + assert: + that: + - arp_output.stdout | search("{{item}}.*\s+{{port_name_map[arptable['v4'][item]['interface']]}}") + with_items: arptable['v4'] + when: arptable['v4'][item]['interface']!='eth0' and mode =='alias' diff --git a/ansible/roles/test/tasks/iface_naming_mode/show_interface.yml b/ansible/roles/test/tasks/iface_naming_mode/show_interface.yml new file mode 100644 index 00000000000..8afbae4d66f --- /dev/null +++ b/ansible/roles/test/tasks/iface_naming_mode/show_interface.yml @@ -0,0 +1,45 @@ +- block: + + # show interface status + - name: show interface status in {{mode}} mode + show_interface: interfaces={{intf}} command='status' + + - debug: var=int_status + + - name: check proper output is displayed for the given Interface + assert: + that: + - "'{{int_status[intf]['name']}}'=='{{interface}}' and '{{int_status[intf]['alias']}}'=='{{interface_alias}}'" + + #show interface counters + - name: Test interface counters in {{mode}} mode + show_interface: command='counter' + + - name: check counter output in alias mode + assert: {that: item in port_alias} + with_items: int_counter + when: mode=="alias" + + - name: check counter output in default mode + assert: {that: item in default_interfaces} + with_items: int_counter + when: mode=="default" + + # show interface description + + - name: show interface description {{intf}} in {{mode}} mode + shell: show interface description {{intf}} | sed -n '/^ *Eth/ p' + register: int_description + + - debug: var=int_description.stdout_lines + + - name: check the description command shows proper output + assert: + that: + - int_description.stdout | search("{{interface}}.*{{interface_alias}}") + + +# As the ansible work in non interactive mode, it doesnt read the environmental varaiable set in bashrc file. Hence as a workaround, the variable is extracted through check_userifmode.yml and manually set the variable 'SONIC_CLI_IFACE_MODE' to take effect. + + environment: + SONIC_CLI_IFACE_MODE: "{{ifmode}}" diff --git a/ansible/roles/test/tasks/iface_naming_mode/show_ip_route.yml b/ansible/roles/test/tasks/iface_naming_mode/show_ip_route.yml new file mode 100644 index 00000000000..0319b838355 --- /dev/null +++ b/ansible/roles/test/tasks/iface_naming_mode/show_ip_route.yml @@ -0,0 +1,66 @@ +- name: Init variables. + set_fact: + spine_ports: [] + spine_port_alias: [] + +- name: Get spine ports + set_fact: + spine_ports: "{{ spine_ports + [item.key]}}" + with_dict: "{{ minigraph_neighbors }}" + when: "'T2' in item.value.name" + +- name: Get spine ports alias + set_fact: + spine_port_alias: "{{ spine_port_alias + [port_name_map[item.key]]}}" + with_dict: "{{ minigraph_neighbors }}" + when: "'T2' in item.value.name" + +# Test IPv4 routes +# As the ansible work in non interactive mode, it doesnt read the environmental varaiable set in bashrc file. Hence as a workaround, the variable is extracted through check_userifmode.yml and manually set the variable 'SONIC_CLI_IFACE_MODE' to take effect. + +- name: get the show ip route for 192.168.1.1 + shell: show ip route 192.168.1.1 + register: route + environment: + SONIC_CLI_IFACE_MODE: "{{ifmode}}" + +- debug: var=route + +- name: check the output shows interface alias names in alias mode + assert: + that: + - route.stdout | search("via {{item}}") + with_items: spine_port_alias + when: mode=='alias' + +- name: check the output shows default interface names in default mdoe + assert: + that: + - route.stdout | search(" via {{item}}") + with_items: spine_ports + when: mode=='default' + +# Test ipv6 routes +# As the ansible work in non interactive mode, it doesnt read the environmental varaiable set in bashrc file. Hence as a workaround, the variable is extracted through check_userifmode.yml and manually set the variable 'SONIC_CLI_IFACE_MODE' to take effect. + +- name: get the show ipv6 route for 20c0:a800::/64 + shell: show ipv6 route 20c0:a800::/64 + register: route + environment: + SONIC_CLI_IFACE_MODE: "{{ifmode}}" + +- debug: var=route + +- name: check the output shows interface alias names in alias mode + assert: + that: + - route.stdout | search("via {{item}}") + with_items: spine_port_alias + when: mode =='alias' + +- name: check the output shows default interface names in default mdoe + assert: + that: + - route.stdout | search(" via {{item}}") + with_items: spine_ports + when: mode =='default' diff --git a/ansible/roles/test/tasks/iface_naming_mode/show_pfc_counters.yml b/ansible/roles/test/tasks/iface_naming_mode/show_pfc_counters.yml new file mode 100644 index 00000000000..b7e28d88744 --- /dev/null +++ b/ansible/roles/test/tasks/iface_naming_mode/show_pfc_counters.yml @@ -0,0 +1,32 @@ +- block: + - name: get pfc Rx counter values + shell: sudo show pfc counters | sed -n '/Port Rx/,/^$/p' + register: pfc_rx + + - debug: var=pfc_rx.stdout_lines + + - name: get pfc Tx counter values + shell: sudo show pfc counters | sed -n '/Port Tx/,/^$/p' + register: pfc_tx + + - debug: var=pfc_tx.stdout_lines + + - name: check if the output shows default interface names in default mode + assert: + that: + - "'{{item}}' in pfc_rx.stdout and '{{item}}' in pfc_tx.stdout " + - "'{{port_name_map[item]}}' not in pfc_rx.stdout and '{{port_name_map[item]}}' not in pfc_tx.stdout" + with_items: default_interfaces + when: mode=='default' + + - name: check if the output shows alias interface names in alias mode + assert: + that: + - "'{{item}}' in pfc_rx.stdout and '{{item}}' in pfc_tx.stdout " + - "'{{port_alias_map[item]}}' not in pfc_rx.stdout and '{{port_alias_map[item]}}' not in pfc_tx.stdout " + with_items: port_alias + when: mode=='alias' +# As the ansible work in non interactive mode, it doesnt read the environmental varaiable set in bashrc file. Hence as a workaround,the variable is extracted through check_userifmode.yml and manually set the variable 'SONIC_CLI_IFACE_MODE' to take effect. + + environment: + SONIC_CLI_IFACE_MODE: "{{ifmode}}" diff --git a/ansible/roles/test/tasks/iface_naming_mode/show_portchannel.yml b/ansible/roles/test/tasks/iface_naming_mode/show_portchannel.yml new file mode 100644 index 00000000000..c09f284bfc2 --- /dev/null +++ b/ansible/roles/test/tasks/iface_naming_mode/show_portchannel.yml @@ -0,0 +1,24 @@ +- block: + - name: show interface portchannel + shell: sudo show interfaces portchannel + register: int_po + + - debug: var=int_po.stdout + + - name: check show interface portchannel output shows default interface name in default mode + assert: + that: + - int_po.stdout | search("{{item.key}}\s+LACP\(A\)\(Up\).*{{item.value['members'][0]}}") + with_dict: minigraph_portchannels + when: mode=='default' + + - name: check show interface portchannel output shows alias name in alias mode + assert: + that: + - int_po.stdout | search("{{item.key}}\s+LACP\(A\)\(Up\).*{{port_name_map[item.value['members'][0]]}}") + with_dict: minigraph_portchannels + when: mode=='alias' +# As the ansible work in non interactive mode, it doesnt read the environmental varaiable set in bashrc file. Hence as a workaround, the variable is extracted through check_userifmode.yml and manually set the variable 'SONIC_CLI_IFACE_MODE' to take effect. + + environment: + SONIC_CLI_IFACE_MODE: "{{ifmode}}" diff --git a/ansible/roles/test/tasks/iface_naming_mode/show_queue_counters.yml b/ansible/roles/test/tasks/iface_naming_mode/show_queue_counters.yml new file mode 100644 index 00000000000..1aee7fbd0a0 --- /dev/null +++ b/ansible/roles/test/tasks/iface_naming_mode/show_queue_counters.yml @@ -0,0 +1,36 @@ +- block: + - name: show queue counters {{intf}} + shell: sudo show queue counters {{intf}} | grep "UC\|MC" + register: queue_counter + + - debug: var=queue_counter.stdout_lines + + - name: Check the {{mode}} interface name is present in output when mode is set to {{mode}} + assert: + that: + - queue_counter.stdout | search("{{intf}}\s+[U|M]C{{item}}\s+\d+\s+\d+\s+\d+\s+\d+") + with_sequence: start=0 end=9 + + - name: show queue counters for all interfaces + shell: sudo show queue counters | grep "UC\|MC" + register: queue_counter + + - debug: var=queue_counter.stdout_lines + + - name: Check default interface name is present in output when mode is is set to default + assert: + that: + - queue_counter.stdout | search("{{item}}\s+[UC|MC\d]+\s+\d+\s+\d+\s+\d+\s+\d+") and '{{port_name_map[item]}}' not in queue_counter.stdout + with_items: default_interfaces + when: mode=='default' + + - name: Check alias interface name is present in output when mode is set to alias + assert: + that: + - queue_counter.stdout | search("{{item}}\s+[UC|MC\d]+\s+\d+\s+\d+\s+\d+\s+\d+") and '{{port_alias_map[item]}}' not in queue_counter.stdout + with_items: port_alias + when: mode=='alias' +# As the ansible work in non interactive mode, it doesnt read the environmental varaiable set in bashrc file. Hence as a workaround, the variable is extracted through check_userifmode.yml and manually set the variable 'SONIC_CLI_IFACE_MODE' to take effect. + + environment: + SONIC_CLI_IFACE_MODE: "{{ifmode}}" diff --git a/ansible/roles/test/tasks/iface_naming_mode/vars/iface_naming_vars.yml b/ansible/roles/test/tasks/iface_naming_mode/vars/iface_naming_vars.yml new file mode 100644 index 00000000000..5a2deccdca2 --- /dev/null +++ b/ansible/roles/test/tasks/iface_naming_mode/vars/iface_naming_vars.yml @@ -0,0 +1,3 @@ +#--- +uname1 : guest +upasswd1 : guest diff --git a/ansible/roles/test/vars/testcases.yml b/ansible/roles/test/vars/testcases.yml index 4e470062860..bc1c1af85b1 100644 --- a/ansible/roles/test/vars/testcases.yml +++ b/ansible/roles/test/vars/testcases.yml @@ -232,7 +232,7 @@ testcases: required_vars: ptf_host: testbed_type: - + vxlan_decap: filename: vxlan-decap.yml topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116] @@ -243,3 +243,9 @@ testcases: filename: pfc_asym.yml topologies: [t0] + iface_mode: + filename: iface_naming_mode.yml + topologies: [t0, t0-16, t0-64, t0-64-32, t0-116, t1, ptf32, ptf64] + required_vars: + testbed_type: + From b15ac7971677136427884e524f650c13030b4684 Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Tue, 16 Jul 2019 14:04:40 -0700 Subject: [PATCH 056/218] [warm reboot] always save the configuration (#1013) When test failed due to dataplane disruption issue, config save would be skipped and leaving the device in vulnerable state. Move config save to the always block. Signed-off-by: Ying Xie --- ansible/roles/test/tasks/advanced-reboot.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/tasks/advanced-reboot.yml b/ansible/roles/test/tasks/advanced-reboot.yml index 04707dd0d32..65e6ab500d7 100644 --- a/ansible/roles/test/tasks/advanced-reboot.yml +++ b/ansible/roles/test/tasks/advanced-reboot.yml @@ -158,6 +158,7 @@ - include: ptf_runner_reboot.yml with_items: "{{ preboot_list }}" + always: # When new image is defined, test removed /host/config_db.json # before warm rebooting. So after the device boots up, it will # miss /etc/sonic/config_db.json. It is not an issue for the @@ -178,7 +179,6 @@ - fail: msg="/etc/sonic/config_db.json is missing" when: not stat_result.stat.exists - always: - name: Remove existing ip from ptf host script: roles/test/files/helpers/remove_ip.sh delegate_to: "{{ ptf_host }}" From 674c462cfb4b485143a19b69c3d6b200003be74f Mon Sep 17 00:00:00 2001 From: chitra-raghavan <32665166+chitra-raghavan@users.noreply.github.com> Date: Wed, 17 Jul 2019 21:52:46 +0530 Subject: [PATCH 057/218] [ecn]: Modified the ecn profiles in script to LOSSLESS as all platforms doesnt have LOSSY profile (#747) --- ansible/roles/test/tasks/ecn_wred.yml | 9 ++++----- ansible/roles/test/tasks/ecn_wred_worker.yml | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/ansible/roles/test/tasks/ecn_wred.yml b/ansible/roles/test/tasks/ecn_wred.yml index d7800b4d09d..309c9f46829 100644 --- a/ansible/roles/test/tasks/ecn_wred.yml +++ b/ansible/roles/test/tasks/ecn_wred.yml @@ -11,9 +11,9 @@ test_wred_values="[ '491520', '516096', '430080' ]" tags: always - # Read and store original rmin value for AZURE_LOSSY + # Read and store original rmin value for AZURE_LOSSLESS - name: Get red_min_threshold - shell: ecnconfig -l | grep -A20 AZURE_LOSSY | grep red_min_threshold | awk '{print $2}' + shell: ecnconfig -l | grep -A20 AZURE_LOSSLESS | grep red_min_threshold | awk '{print $2}' register: wred_value failed_when: wred_value.rc != 0 @@ -42,8 +42,7 @@ always: - name: Restore original value + shell: ecnconfig -p AZURE_LOSSLESS -rmin {{ red_min_threshold }} become: yes - shell: ecnconfig -p AZURE_LOSSY -rmin {{ red_min_threshold }} register: ecn_restore - failed_when: ecn_restore.rc != 0 - + failed_when: ecn_restore.rc != 0 \ No newline at end of file diff --git a/ansible/roles/test/tasks/ecn_wred_worker.yml b/ansible/roles/test/tasks/ecn_wred_worker.yml index 856a75ff0b1..f7f48ac0790 100644 --- a/ansible/roles/test/tasks/ecn_wred_worker.yml +++ b/ansible/roles/test/tasks/ecn_wred_worker.yml @@ -3,8 +3,8 @@ # Set value... - name: Set WRED value {{ item }} + shell: ecnconfig -p AZURE_LOSSLESS -rmin {{ item }} become: yes - shell: ecnconfig -p AZURE_LOSSY -rmin {{ item }} register: rc failed_when: rc.rc != 0 From 703c350f0b665295c989455bca6812aab2299146 Mon Sep 17 00:00:00 2001 From: Joe LeVeque Date: Wed, 30 Jan 2019 15:25:57 -0800 Subject: [PATCH 058/218] [testbed-cli] Fix and enhance usage statement (#795) --- ansible/testbed-cli.sh | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/ansible/testbed-cli.sh b/ansible/testbed-cli.sh index c242882145b..9bff78f067a 100755 --- a/ansible/testbed-cli.sh +++ b/ansible/testbed-cli.sh @@ -5,34 +5,40 @@ set -e function usage { echo "testbed-cli. Interface to testbeds" - echo "Usage :" - echo " $0 [options] { start-vms | stop-vms } server-name vault-password-file" - echo " $0 [options] { add-topo | remove-topo | renumber-topo | connect-topo } topo-name vault-password-file" - echo " $0 [options] { refresh-dut } topo-name vault-password-file" - echo " $0 [options] { connect-vms | disconnect-vms } topo-name vault-password-file" - echo " $0 [options] { config-vm } topo-name vm-name vault-password-file" - echo " $0 [options] { gen-mg | deploy-mg | test-mg } topo-name inventory vault-password-file" + echo "Usage:" + echo " $0 [options] (start-vms | stop-vms) " + echo " $0 [options] (add-topo | remove-topo | renumber-topo | connect-topo) " + echo " $0 [options] refresh-dut " + echo " $0 [options] (connect-vms | disconnect-vms) " + echo " $0 [options] config-vm " + echo " $0 [options] (gen-mg | deploy-mg | test-mg) " echo - echo "Options :" - echo " -t tbfile : testbed csv file name (default testbed.csv)" - echo " -m vmfile : virtual machine file name (default veos)" + echo "Options:" + echo " -t : testbed CSV file name (default: 'testbed.csv')" + echo " -m : virtual machine file name (default: 'veos')" + echo + echo "Positional Arguments:" + echo " : Hostname of server on which to start VMs" + echo " : Path to file containing Ansible Vault password" + echo " : Name of the target topology" + echo " : Name of the Ansible inventory containing the DUT" echo echo "To start VMs on a server: $0 start-vms 'server-name' ~/.password" echo "To restart a subset of VMs:" - echo " $0 start-vms server-name vault-password-fix -e respin_vms=[vm list]" - echo " vm list is separated by comma and shouldn't have space in the list." - echo " e.g. respin_vms=[VM0310,VM0330]" + echo " $0 start-vms server-name vault-password-file -e respin_vms=[vm_list]" + echo " vm_list is separated by comma and shouldn't have space in the list." + echo " e.g., respin_vms=[VM0310,VM0330]" echo "To stop VMs on a server: $0 stop-vms 'server-name' ~/.password" echo "To deploy a topology on a server: $0 add-topo 'topo-name' ~/.password" echo "To remove a topology on a server: $0 remove-topo 'topo-name' ~/.password" - echo "To renumber a topology on a server: $0 renumber-topo 'topo-name' ~/.password" , where topo-name is target topology + echo "To renumber a topology on a server: $0 renumber-topo 'topo-name' ~/.password" echo "To connect a topology: $0 connect-topo 'topo-name' ~/.password" echo "To refresh DUT in a topology: $0 refresh-dut 'topo-name' ~/.password" echo "To configure a VM on a server: $0 config-vm 'topo-name' 'vm-name' ~/.password" - echo "To generate minigraph for DUT in a topology: $0 gen-mg 'topo-name' ~/.password" - echo "To deploy minigraph to DUT in a topology: $0 deploy-mg 'topo-name' ~/.password" + echo "To generate minigraph for DUT in a topology: $0 gen-mg 'topo-name' 'inventory' ~/.password" + echo "To deploy minigraph to DUT in a topology: $0 deploy-mg 'topo-name' 'inventory' ~/.password" echo - echo "You should define your topology in testbed csv file" + echo "You should define your topology in testbed CSV file" echo exit } From 1ddd9e0d3171d72e40b125ad37c26571b6e8709e Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Fri, 5 Apr 2019 10:27:06 +0800 Subject: [PATCH 059/218] Remove duplicated case branches in testbed-cli.sh (#854) Signed-off-by: Xin Wang --- ansible/testbed-cli.sh | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/ansible/testbed-cli.sh b/ansible/testbed-cli.sh index 9bff78f067a..21be901cd5d 100755 --- a/ansible/testbed-cli.sh +++ b/ansible/testbed-cli.sh @@ -289,16 +289,6 @@ case "${subcmd}" in ;; deploy-mg) deploy_minigraph $@ ;; - connect-vms) connect_vms $@ - ;; - disconnect-vms) disconnect_vms $@ - ;; - config-vm) config_vm $@ - ;; - gen-mg) generate_minigraph $@ - ;; - deploy-mg) deploy_minigraph $@ - ;; test-mg) test_minigraph $@ ;; *) usage From 4aec75a2e7000e9737a9a88c3b78a42d17e3a457 Mon Sep 17 00:00:00 2001 From: Mykola F <37578614+mykolaf@users.noreply.github.com> Date: Mon, 27 May 2019 14:58:34 +0300 Subject: [PATCH 060/218] [testbed-cli] flush ip neighbor after add_topo (#881) Change-Id: Ie9b461efee2fa458c53a28eb8fac1c8f5043bc96 Signed-off-by: Mykola Faryma --- ansible/testbed-cli.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ansible/testbed-cli.sh b/ansible/testbed-cli.sh index 21be901cd5d..e2e407e1026 100755 --- a/ansible/testbed-cli.sh +++ b/ansible/testbed-cli.sh @@ -113,6 +113,9 @@ function add_topo ansible-playbook fanout_connect.yml -i $vmfile --limit "$server" --vault-password-file="${passwd}" -e "dut=$dut" $@ + # Delete the obsoleted arp entry for the PTF IP + ip neighbor flush $ptf_ip + echo Done } From a0608f25fd716f0a484bd450a2cf883f641000ac Mon Sep 17 00:00:00 2001 From: Guohan Lu Date: Wed, 30 Jan 2019 04:07:21 +0000 Subject: [PATCH 061/218] rename eos login/password variable name for clarity Signed-off-by: Guohan Lu --- ansible/group_vars/eos/creds.yml | 4 ---- ansible/roles/eos/tasks/main.yml | 3 +++ ansible/roles/vm_set/tasks/start_vm.yml | 20 ++++++++++---------- ansible/roles/vm_set/vars/main.yml | 10 +++++----- 4 files changed, 18 insertions(+), 19 deletions(-) delete mode 100644 ansible/group_vars/eos/creds.yml diff --git a/ansible/group_vars/eos/creds.yml b/ansible/group_vars/eos/creds.yml deleted file mode 100644 index 3cd1daa7d2c..00000000000 --- a/ansible/group_vars/eos/creds.yml +++ /dev/null @@ -1,4 +0,0 @@ ---- -ansible_user: use_own_value -ansible_password: use_own_value - diff --git a/ansible/roles/eos/tasks/main.yml b/ansible/roles/eos/tasks/main.yml index b3e432749c9..e3c64d6f805 100644 --- a/ansible/roles/eos/tasks/main.yml +++ b/ansible/roles/eos/tasks/main.yml @@ -1,3 +1,6 @@ +- name: Set ansible login user name and password + set_fact: ansible_user="root" ansible_password={{ eos_root_password }} + - name: Load topo variables include_vars: "vars/topo_{{ topo }}.yml" diff --git a/ansible/roles/vm_set/tasks/start_vm.yml b/ansible/roles/vm_set/tasks/start_vm.yml index 2e317f0c5f5..f5cb2a7702a 100644 --- a/ansible/roles/vm_set/tasks/start_vm.yml +++ b/ansible/roles/vm_set/tasks/start_vm.yml @@ -50,14 +50,14 @@ - name: Wait until vm {{ vm_name }} is loaded kickstart: telnet_port={{ serial_port }} - login={{ login }} - password={{ password }} + login={{ eos_default_login }} + password={{ eos_default_password }} hostname={{ hostname }} mgmt_ip="{{ mgmt_ip_address }}/{{ mgmt_prefixlen }}" mgmt_gw={{ vm_mgmt_gw | default(mgmt_gw) }} - new_login={{ new_login }} - new_password={{ new_password }} - new_root_password={{ new_root_password }} + new_login={{ eos_login }} + new_password={{ eos_password }} + new_root_password={{ eos_root_password }} register: kickstart_output when: vm_name not in vm_list_running.list_vms or vm_name in respin_vms @@ -77,14 +77,14 @@ - name: Wait until vm {{ vm_name }} is loaded kickstart: telnet_port={{ serial_port }} - login={{ login }} - password={{ password }} + login={{ eos_default_login }} + password={{ eos_default_password }} hostname={{ hostname }} mgmt_ip="{{ mgmt_ip_address }}/{{ mgmt_prefixlen }}" mgmt_gw={{ vm_mgmt_gw | default(mgmt_gw) }} - new_login={{ new_login }} - new_password={{ new_password }} - new_root_password={{ new_root_password }} + new_login={{ eos_login }} + new_password={{ eos_password }} + new_root_password={{ eos_root_password }} register: kickstart_output_final when: vm_name not in vm_list_running.list_vms and kickstart_output.kickstart_code != 0 diff --git a/ansible/roles/vm_set/vars/main.yml b/ansible/roles/vm_set/vars/main.yml index 344784ecec7..727f59e5a38 100644 --- a/ansible/roles/vm_set/vars/main.yml +++ b/ansible/roles/vm_set/vars/main.yml @@ -1,8 +1,8 @@ -login: "admin" -password: "" -new_login: admin -new_password: 123456 -new_root_password: 123456 +eos_default_login: "admin" +eos_default_password: "" +eos_login: admin +eos_password: 123456 +eos_root_password: 123456 sonic_login: "admin" sonic_passwords: From 5496ab7ba4b8ec2ede00a1cc412c0ca32e9af81c Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Tue, 9 Jul 2019 22:36:22 +0800 Subject: [PATCH 062/218] [vm_set] Reduce the testbed-cli.sh start-vms time from 3 hours to 20 minutes (#962) * [vm_set] Improve the start-vms performance The original approach starts and configures the VMs sequentially. It takes more than 3 hours to start 32 virtual machines. This change is to start all the VMs, then configure them one by one. With this change, starting 32 VMs needs around 30-40 minutes. Another change in this commit is to configure 'autostart' for VMs so that the VMs will automatically start running after host server is rebooted. Signed-off-by: Xin Wang * Add batch_size support, make autostart optional --- ansible/roles/vm_set/library/kickstart.py | 32 ++++++++-- ansible/roles/vm_set/tasks/kickstart_vm.yml | 68 +++++++++++++++++++++ ansible/roles/vm_set/tasks/start.yml | 30 +++++++++ ansible/roles/vm_set/tasks/start_vm.yml | 55 ++++------------- ansible/testbed-cli.sh | 4 ++ 5 files changed, 142 insertions(+), 47 deletions(-) create mode 100644 ansible/roles/vm_set/tasks/kickstart_vm.yml diff --git a/ansible/roles/vm_set/library/kickstart.py b/ansible/roles/vm_set/library/kickstart.py index a15a2dcdcca..8d1dbec4235 100644 --- a/ansible/roles/vm_set/library/kickstart.py +++ b/ansible/roles/vm_set/library/kickstart.py @@ -36,6 +36,14 @@ class EMatchNotFound(Exception): pass +class ELoginPromptNotFound(Exception): + pass + + +class EWrongDefaultPassword(Exception): + pass + + class ENotInEnabled(Exception): pass @@ -78,10 +86,22 @@ def pair(self, action, wait_for, timeout): return index def login(self, user, password): - self.pair('\r', [r'login:'], 240) + try: + self.d.debug('## Getting the login prompt') + self.pair('\r', [r'login:'], 240) + except EMatchNotFound: + self.d.debug('No login prompt is found') + raise ELoginPromptNotFound + + self.d.debug('## Getting the password prompt') index_password = self.pair(user, [r'assword:', r'>'], 20) if index_password == 0: - self.pair(password, [r'>'], 10) + try: + self.d.debug('## Inputing password') + self.pair(password, [r'>'], 10) + except EMatchNotFound: + self.d.debug('The original password "%s" is not working' % password) + raise EWrongDefaultPassword return @@ -179,10 +199,14 @@ def main(): try: result = core(module) + except ELoginPromptNotFound: + result = {'kickstart_code': -1, 'changed': False, 'msg': 'Login prompt not found'} + except EWrongDefaultPassword: + result = {'kickstart_code': 0, 'changed': False, 'msg': 'Wrong default password, kickstart of VM has been done'} except EOFError: - result = {'kickstart_code': -1, 'changed': False, 'msg': 'EOF during the chat'} + result = {'kickstart_code': -2, 'changed': False, 'msg': 'EOF during the chat'} except EMatchNotFound: - result = {'kickstart_code': -1, 'changed': False, 'msg': "Match for output isn't found"} + result = {'kickstart_code': -3, 'changed': False, 'msg': "Match for output isn't found"} except ENotInEnabled: module.fail_json(msg='Not in enabled mode') except Exception, e: diff --git a/ansible/roles/vm_set/tasks/kickstart_vm.yml b/ansible/roles/vm_set/tasks/kickstart_vm.yml new file mode 100644 index 00000000000..34542c8aa6c --- /dev/null +++ b/ansible/roles/vm_set/tasks/kickstart_vm.yml @@ -0,0 +1,68 @@ + +- set_fact: + respin_vms: [] + when: respin_vms is not defined + +- set_fact: + skip_this_vm: True + +- set_fact: + skip_this_vm: False + when: vm_name not in vm_list_running.list_vms or vm_name in respin_vms + +- block: + - name: Wait until vm {{ vm_name }} is loaded + kickstart: telnet_port={{ serial_port }} + login={{ eos_default_login }} + password={{ eos_default_password }} + hostname={{ hostname }} + mgmt_ip="{{ mgmt_ip_address }}/{{ mgmt_prefixlen }}" + mgmt_gw={{ vm_mgmt_gw | default(mgmt_gw) }} + new_login={{ eos_login }} + new_password={{ eos_password }} + new_root_password={{ eos_root_password }} + register: kickstart_output + until: '"kickstart_code" in kickstart_output and kickstart_output.kickstart_code != -1' + retries: 5 + delay: 10 + + - name: Destroy vm {{ vm_name }} if it hangs + virt: name={{ vm_name }} + command=destroy + uri=qemu:///system + when: kickstart_output.kickstart_code != 0 + become: yes + + - name: Start vm again {{ vm_name }} + virt: name={{ vm_name }} + state=running + uri=qemu:///system + when: kickstart_output.kickstart_code != 0 + become: yes + + - name: Wait until vm {{ vm_name }} is loaded + kickstart: telnet_port={{ serial_port }} + login={{ eos_default_login }} + password={{ eos_default_password }} + hostname={{ hostname }} + mgmt_ip="{{ mgmt_ip_address }}/{{ mgmt_prefixlen }}" + mgmt_gw={{ vm_mgmt_gw | default(mgmt_gw) }} + new_login={{ eos_login }} + new_password={{ eos_password }} + new_root_password={{ eos_root_password }} + register: kickstart_output_final + until: '"kickstart_code" in kickstart_output_final and kickstart_output_final.kickstart_code != -1' + retries: 5 + delay: 10 + when: kickstart_output.kickstart_code != 0 + + - name: Fail if kickstart gives error again vm {{ vm_name }} + fail: msg="Two attempts to start vm weren't successful" + when: '"kickstart_code" in kickstart_output_final and kickstart_output_final.kickstart_code != 0' + + - name: Set VM to autostart + command: "virsh autostart {{ vm_name }}" + become: yes + when: autostart|bool == true + + when: not skip_this_vm diff --git a/ansible/roles/vm_set/tasks/start.yml b/ansible/roles/vm_set/tasks/start.yml index defb28a469e..120016111f8 100644 --- a/ansible/roles/vm_set/tasks/start.yml +++ b/ansible/roles/vm_set/tasks/start.yml @@ -36,6 +36,21 @@ fp_mtu: "{{ fp_mtu_size }}" max_fp_num: "{{ max_fp_num }}" +- name: Default autostart to no when it is not defined + set_fact: + autostart: no + when: autostart is not defined + +- name: Default batch_size to 1 when it is not defined + set_fact: + batch_size: "{{ VM_hosts|length }}" + when: batch_size is not defined + +- name: Default wait interval to 0 if it is not defined + set_fact: + interval: 0 + when: interval is not defined + - name: Start VMs include: start_vm.yml vars: @@ -50,3 +65,18 @@ port1_bridge: "br-{{ vm_name }}-back" port1_tap: "{{ vm_name }}-back" with_items: "{{ VM_hosts }}" + +- name: Kickstart VMs + include: kickstart_vm.yml + vars: + vm_name: "{{ item }}" + hostname: "{{ vm_name }}" + mgmt_ip_address: "{{ hostvars[vm_name]['ansible_host'] }}" + serial_port: "{{ vm_console_base|int + vm_name[4:]|int }}" + src_disk_image: "{{ root_path }}/images/{{ hdd_image_filename }}" + disk_image: "{{ root_path }}/disks/{{ vm_name }}_hdd.vmdk" + cdrom_image: "{{ root_path }}/images/{{ cd_image_filename }}" + mgmt_tap: "{{ vm_name }}-m" + port1_bridge: "br-{{ vm_name }}-back" + port1_tap: "{{ vm_name }}-back" + with_items: "{{ VM_hosts }}" diff --git a/ansible/roles/vm_set/tasks/start_vm.yml b/ansible/roles/vm_set/tasks/start_vm.yml index f5cb2a7702a..09bd4e35913 100644 --- a/ansible/roles/vm_set/tasks/start_vm.yml +++ b/ansible/roles/vm_set/tasks/start_vm.yml @@ -48,48 +48,17 @@ when: vm_name not in vm_list_running.list_vms or vm_name in respin_vms become: yes -- name: Wait until vm {{ vm_name }} is loaded - kickstart: telnet_port={{ serial_port }} - login={{ eos_default_login }} - password={{ eos_default_password }} - hostname={{ hostname }} - mgmt_ip="{{ mgmt_ip_address }}/{{ mgmt_prefixlen }}" - mgmt_gw={{ vm_mgmt_gw | default(mgmt_gw) }} - new_login={{ eos_login }} - new_password={{ eos_password }} - new_root_password={{ eos_root_password }} - register: kickstart_output - when: vm_name not in vm_list_running.list_vms or vm_name in respin_vms - -- name: Destroy vm {{ vm_name }} if it hangs - virt: name={{ vm_name }} - command=destroy - uri=qemu:///system - when: vm_name not in vm_list_running.list_vms and kickstart_output.kickstart_code != 0 - become: yes - -- name: Start vm again {{ vm_name }} - virt: name={{ vm_name }} - state=running - uri=qemu:///system - when: vm_name not in vm_list_running.list_vms and kickstart_output.kickstart_code != 0 - become: yes - -- name: Wait until vm {{ vm_name }} is loaded - kickstart: telnet_port={{ serial_port }} - login={{ eos_default_login }} - password={{ eos_default_password }} - hostname={{ hostname }} - mgmt_ip="{{ mgmt_ip_address }}/{{ mgmt_prefixlen }}" - mgmt_gw={{ vm_mgmt_gw | default(mgmt_gw) }} - new_login={{ eos_login }} - new_password={{ eos_password }} - new_root_password={{ eos_root_password }} - register: kickstart_output_final - when: vm_name not in vm_list_running.list_vms and kickstart_output.kickstart_code != 0 - -- name: Fail if kickstart gives error again vm {{ vm_name }} - fail: msg="Two attempts to start vm weren't succesfull" - when: '"skipped" in kickstart_output_final and "kickstart_code" in kickstart_output_final and kickstart_output_final.kickstart_code != 0' +# Some testbed may have issue of starting multiple VMs in parallel, this pause is a workaround for this issue +# A better solution should have been used. But the current used ansible v2.0 has issue with nested loops: +# https://github.com/ansible/ansible/issues/14146 So, we can only use this simple workaround for the parallel +# VM starting issue. +- name: Find out VM index + set_fact: + vm_index: "{{ VM_hosts.index(vm_name)|int + 1 }}" +- name: "Pause after started every {{ batch_size }} VMs" + pause: seconds="{{ interval }}" + when: + - "{{ vm_index }} % {{ batch_size }} == 0" + - "{{ interval }} > 0" diff --git a/ansible/testbed-cli.sh b/ansible/testbed-cli.sh index e2e407e1026..a64f94abfb8 100755 --- a/ansible/testbed-cli.sh +++ b/ansible/testbed-cli.sh @@ -28,6 +28,10 @@ function usage echo " $0 start-vms server-name vault-password-file -e respin_vms=[vm_list]" echo " vm_list is separated by comma and shouldn't have space in the list." echo " e.g., respin_vms=[VM0310,VM0330]" + echo "To pause some time after triggered starting of a batch of VMs:" + echo " $0 start-vms server-name vault-password-file -e batch_size=2 -e interval=60" + echo "To enable autostart of VMs:" + echo " $0 start-vms server-name vault-password-file -e autostart=yes" echo "To stop VMs on a server: $0 stop-vms 'server-name' ~/.password" echo "To deploy a topology on a server: $0 add-topo 'topo-name' ~/.password" echo "To remove a topology on a server: $0 remove-topo 'topo-name' ~/.password" From 993d5551820dcba19b926cc45dd6dfab8818e4d3 Mon Sep 17 00:00:00 2001 From: dawnbeauty <18810562248@163.com> Date: Thu, 18 Jul 2019 12:07:25 -0500 Subject: [PATCH 063/218] [ansible-plugin] back port stdout callback plugin 'yaml' from ansible2.5 (#1005) * Adding the abblity to use yaml plugin with stdout content Signed-off-by: Zhiqian Wu --- ansible/ansible.cfg | 2 +- ansible/plugins/callback/yaml.py | 123 +++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 ansible/plugins/callback/yaml.py diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg index deebc0dd08c..990331eb5b9 100644 --- a/ansible/ansible.cfg +++ b/ansible/ansible.cfg @@ -115,7 +115,7 @@ deprecation_warnings = False # set plugin path directories here, separate with colons action_plugins = plugins/action -# callback_plugins = /usr/share/ansible_plugins/callback_plugins +callback_plugins = plugins/callback connection_plugins = plugins/connection # lookup_plugins = /usr/share/ansible_plugins/lookup_plugins # vars_plugins = /usr/share/ansible_plugins/vars_plugins diff --git a/ansible/plugins/callback/yaml.py b/ansible/plugins/callback/yaml.py new file mode 100644 index 00000000000..bcc94cdf8de --- /dev/null +++ b/ansible/plugins/callback/yaml.py @@ -0,0 +1,123 @@ +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +DOCUMENTATION = ''' + callback: yaml + type: stdout + short_description: yaml-ized Ansible screen output + version_added: 2.5 + description: + - Ansible output that can be quite a bit easier to read than the + default JSON formatting. + extends_documentation_fragment: + - default_callback + requirements: + - set as stdout in configuration +''' + +import yaml +import json +import re +import string +import sys +from ansible.plugins.callback import CallbackBase, strip_internal_keys +from ansible.plugins.callback.default import CallbackModule as Default + +# simple workaroud for using yaml callback plugin +from ansible.vars.unsafe_proxy import AnsibleUnsafeText +represent_unicode = yaml.representer.SafeRepresenter.represent_unicode +from ansible.parsing.yaml.dumper import AnsibleDumper +AnsibleDumper.add_representer( + AnsibleUnsafeText, + represent_unicode, +) + +# from http://stackoverflow.com/a/15423007/115478 +def should_use_block(value): + """Returns true if string should be in block format""" + for c in u"\u000a\u000d\u001c\u001d\u001e\u0085\u2028\u2029": + if c in value: + return True + return False + + +def my_represent_scalar(self, tag, value, style=None): + """Uses block style for multi-line strings""" + if style is None: + if should_use_block(value): + style = '|' + # we care more about readable than accuracy, so... + # ...no trailing space + value = value.rstrip() + # ...and non-printable characters + value = ''.join(x for x in value if x in string.printable) + # ...tabs prevent blocks from expanding + value = value.expandtabs() + # ...and odd bits of whitespace + value = re.sub(r'[\x0b\x0c\r]', '', value) + # ...as does trailing space + value = re.sub(r' +\n', '\n', value) + else: + style = self.default_style + node = yaml.representer.ScalarNode(tag, value, style=style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + return node + + +class CallbackModule(Default): + + """ + Variation of the Default output which uses nicely readable YAML instead + of JSON for printing results. + """ + + CALLBACK_VERSION = 2.0 + CALLBACK_TYPE = 'stdout' + CALLBACK_NAME = 'yaml' + + def __init__(self): + super(CallbackModule, self).__init__() + yaml.representer.BaseRepresenter.represent_scalar = my_represent_scalar + + def _dump_results(self, result, indent=None, sort_keys=True, keep_invocation=False): + if result.get('_ansible_no_log', False): + return json.dumps(dict(censored="the output has been hidden due to the fact that 'no_log: true' was specified for this result")) + + # All result keys stating with _ansible_ are internal, so remove them from the result before we output anything. + abridged_result = strip_internal_keys(result) + + # remove invocation unless specifically wanting it + if not keep_invocation and self._display.verbosity < 3 and 'invocation' in result: + del abridged_result['invocation'] + + # remove diff information from screen output + if self._display.verbosity < 3 and 'diff' in result: + del abridged_result['diff'] + + # remove exception from screen output + if 'exception' in abridged_result: + del abridged_result['exception'] + + dumped = '' + + # put changed and skipped into a header line + if 'changed' in abridged_result: + dumped += 'changed=' + str(abridged_result['changed']).lower() + ' ' + del abridged_result['changed'] + + if 'skipped' in abridged_result: + dumped += 'skipped=' + str(abridged_result['skipped']).lower() + ' ' + del abridged_result['skipped'] + + # if we already have stdout, we don't need stdout_lines + if 'stdout' in abridged_result and 'stdout_lines' in abridged_result: + abridged_result['stdout_lines'] = '' + + if abridged_result: + dumped += '\n' + dumped += yaml.dump(abridged_result, Dumper=AnsibleDumper, allow_unicode=True, width=1000, default_flow_style=False) + + # indent by a couple of spaces + dumped = '\n '.join(dumped.split('\n')).rstrip() + return dumped From b2a215611c25a0422077ca7573b48ab1479b120b Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Thu, 18 Jul 2019 13:12:26 -0700 Subject: [PATCH 064/218] [fast/warm reboot] check the existence of correct file as stated (#1018) The check is to gate removing a line in known_hosts file, so the check needs to be checking /root/.ssh/known_hosts. Signed-off-by: Ying Xie --- ansible/roles/test/tasks/advanced-reboot.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/tasks/advanced-reboot.yml b/ansible/roles/test/tasks/advanced-reboot.yml index 65e6ab500d7..c1ff572a9bb 100644 --- a/ansible/roles/test/tasks/advanced-reboot.yml +++ b/ansible/roles/test/tasks/advanced-reboot.yml @@ -75,7 +75,7 @@ delegate_to: "{{ ptf_host }}" - name: Check that file /root/.ssh/known_hosts exists - stat: path=/etc/shorewall/rules + stat: path=/root/.ssh/known_hosts delegate_to: "{{ ptf_host }}" register: known_hosts From d9afea5919529f9f70070c12db0c7e4958588941 Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Thu, 18 Jul 2019 13:13:47 -0700 Subject: [PATCH 065/218] [sonic_upgrade] remove none-current and non-next images (#1020) As the hard drive permits, we keep a few history images in the past so that we could easily go back to them. Recent test failure shows a downside of that decision. When a test failed and leave an installed image in broken state. We likely restore the system by booting into another working image. However, if the broken image is not removed before installation happens again, because the target image exists, the installation could be skipped or not fixing the existing issue. So when we boot into the image again, the device is still in broken state. Removing all non-current and non-next images give the DUT a better chance to start a clean test. Signed-off-by: Ying Xie --- .../library/reduce_and_add_sonic_images.py | 39 +------------------ 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/ansible/library/reduce_and_add_sonic_images.py b/ansible/library/reduce_and_add_sonic_images.py index baf99702c89..83924a7a9a9 100644 --- a/ansible/library/reduce_and_add_sonic_images.py +++ b/ansible/library/reduce_and_add_sonic_images.py @@ -33,24 +33,6 @@ def exec_command(module, cmd, ignore_error=False, msg="executing command"): return out -def get_sonic_image_removal_candidates(module): - keep = set() - images = set() - - out = exec_command(module, cmd="sonic_installer list", - msg="listing sonic images") - - lines = out.split('\n') - for line in lines: - line = line.strip() - if line.startswith("Current:") or line.startswith("Next:"): - keep.add(line.split()[1].strip()) - elif line != "Available:" and len(line) > 0: - images.add(line) - - return (images - keep) - - def get_disk_free_size(module, partition): out = exec_command(module, cmd="df -BM --output=avail %s" % partition, msg="checking disk available size") @@ -59,27 +41,8 @@ def get_disk_free_size(module, partition): return avail -def get_disk_used_percent(module, partition): - out = exec_command(module, cmd="df -BM --output=pcent %s" % partition, - msg="checking disk available percent") - pcent = int(out.split('\n')[1][:-1]) - - return pcent - - def reduce_installed_sonic_images(module, disk_used_pcent): - images = get_sonic_image_removal_candidates(module) - - while len(images) > 0: - pcent = get_disk_used_percent(module, "/host") - if pcent < disk_used_pcent: - break - # Randomly choose an old image to remove. On a system with - # developer built images and offical build images mix-installed - # it is hard to compare image tag to find 'oldest' image. - img = images.pop() - exec_command(module, cmd="sonic_installer remove %s -y" % img, - ignore_error=True) + exec_command(module, cmd="sonic_installer cleanup -y", ignore_error=True) def install_new_sonic_image(module, new_image_url): From f218801c9442cf53eb9c9ee125f782d305dad40a Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Fri, 19 Jul 2019 09:00:32 -0700 Subject: [PATCH 066/218] [minigraph] allow generating minigraph without data plane acl defined (#1019) * [minigraph] allow generating minigraph without data plane acl defined Signed-off-by: Ying Xie * Change the default behavior to enable data plane acl --- ansible/templates/minigraph_dpg.j2 | 2 ++ ansible/testbed-cli.sh | 43 +++++++++++++++++++++++------- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/ansible/templates/minigraph_dpg.j2 b/ansible/templates/minigraph_dpg.j2 index 49a1de0a0b7..266019087af 100644 --- a/ansible/templates/minigraph_dpg.j2 +++ b/ansible/templates/minigraph_dpg.j2 @@ -117,6 +117,7 @@ ssh-only SSH +{%- if enable_data_plane_acl|default('true')|bool %} {%- for index in range(vms_number) %} @@ -133,6 +134,7 @@ PortChannel{{ ((index+1) |string).zfill(4) }}{% if not loop.last %};{% endif %} DataAcl DataPlane +{% endif -%} diff --git a/ansible/testbed-cli.sh b/ansible/testbed-cli.sh index a64f94abfb8..0979dd381c0 100755 --- a/ansible/testbed-cli.sh +++ b/ansible/testbed-cli.sh @@ -41,6 +41,10 @@ function usage echo "To configure a VM on a server: $0 config-vm 'topo-name' 'vm-name' ~/.password" echo "To generate minigraph for DUT in a topology: $0 gen-mg 'topo-name' 'inventory' ~/.password" echo "To deploy minigraph to DUT in a topology: $0 deploy-mg 'topo-name' 'inventory' ~/.password" + echo " gen-mg, deploy-mg, test-mg supports enabling/disabling data ACL with parameter" + echo " -e enable_data_plane_acl=true" + echo " -e enable_data_plane_acl=false" + echo " by default, data acl is enabled" echo echo "You should define your topology in testbed CSV file" echo @@ -195,33 +199,54 @@ function disconnect_vms function generate_minigraph { - echo "Generating minigraph '$1'" + topology=$1 + inventory=$2 + passfile=$3 + shift + shift + shift - read_file $1 + echo "Generating minigraph '$topology'" - ansible-playbook -i "$2" config_sonic_basedon_testbed.yml --vault-password-file="$3" -l "$dut" -e testbed_name="$1" -e testbed_file=$tbfile -v + read_file $topology + + ansible-playbook -i "$inventory" config_sonic_basedon_testbed.yml --vault-password-file="$passfile" -l "$dut" -e testbed_name="$topology" -e testbed_file=$tbfile -e local_minigraph=true $@ echo Done } function deploy_minigraph { - echo "Deploying minigraph '$1'" + topology=$1 + inventory=$2 + passfile=$3 + shift + shift + shift - read_file $1 + echo "Deploying minigraph '$topology'" + + read_file $topology - ansible-playbook -i "$2" config_sonic_basedon_testbed.yml --vault-password-file="$3" -l "$dut" -e testbed_name="$1" -e testbed_file=$tbfile -e deploy=true -e save=true + ansible-playbook -i "$inventory" config_sonic_basedon_testbed.yml --vault-password-file="$passfile" -l "$dut" -e testbed_name="$topology" -e testbed_file=$tbfile -e deploy=true -e save=true $@ echo Done } function test_minigraph { - echo "Test minigraph generation '$1'" + topology=$1 + inventory=$2 + passfile=$3 + shift + shift + shift - read_file $1 + echo "Test minigraph generation '$topology'" + + read_file $topology - ansible-playbook -i "$2" --diff --connection=local --check config_sonic_basedon_testbed.yml --vault-password-file="$3" -l "$dut" -e testbed_name="$1" -e testbed_file=$tbfile -e local_minigraph=true + ansible-playbook -i "$inventory" --diff --connection=local --check config_sonic_basedon_testbed.yml --vault-password-file="$passfile" -l "$dut" -e testbed_name="$topology" -e testbed_file=$tbfile -e local_minigraph=true $@ echo Done } From f1462af3987cc1a233069eb6eb72021d59cabea1 Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Mon, 1 Jul 2019 11:41:34 +0300 Subject: [PATCH 067/218] [linkstate] fix linkstate scripts (#965) Fix linkstate ptf_proxy script which parsed sonic_str_links, etc files which were removed. Now use lab_connection_graph.xml to get fanout ports to dut to ptf to vms mappings. Fix mlnx/fanout_listner.py script for split ports and correct names. Signed-off-by: Stepan Blyschak --- .../linkstate/scripts/mlnx/fanout_listener.py | 22 +++-- ansible/linkstate/scripts/ptf_proxy.py | 87 ++++++++----------- ansible/linkstate/up.yml | 5 +- 3 files changed, 52 insertions(+), 62 deletions(-) diff --git a/ansible/linkstate/scripts/mlnx/fanout_listener.py b/ansible/linkstate/scripts/mlnx/fanout_listener.py index d4e31c358fd..164fd57e83a 100644 --- a/ansible/linkstate/scripts/mlnx/fanout_listener.py +++ b/ansible/linkstate/scripts/mlnx/fanout_listener.py @@ -133,14 +133,26 @@ def getIntfName(self, sxLogPort): log("Failed to get SX ports information, rc %d." % rc) exit(rc) + name = None port_cnt = uint32_t_p_value(port_cnt_p) - for i in range(0, port_cnt): port_attributes = sx_port_attributes_t_arr_getitem(port_attributes_list, i) - if port_attributes.log_port == sxLogPort: - return 'Ethernet{0}'.format(port_attributes.port_mapping.module_port + 1) - - return None + if port_attributes.log_port != sxLogPort: + continue + name = 'ethernet 1/{0}'.format(port_attributes.port_mapping.module_port + 1) + lanes = port_attributes.port_mapping.lane_bmap + width = port_attributes.port_mapping.width + if width == 2: + name = '{}/{}'.format(name, 1 if lanes % 2 else 2) + elif width == 1: + idx = 1 + while lanes: + lanes <<= 2 + idx += 1 + name = '{}/{}'.format(name, idx) + break + + return name def sendLinkChangeToPtfHost(self, intf, linkStatus): conn = PtfHostConn() diff --git a/ansible/linkstate/scripts/ptf_proxy.py b/ansible/linkstate/scripts/ptf_proxy.py index ba285d482dd..16ba1d392be 100644 --- a/ansible/linkstate/scripts/ptf_proxy.py +++ b/ansible/linkstate/scripts/ptf_proxy.py @@ -3,6 +3,7 @@ import socket import argparse import yaml +import xml.etree.ElementTree as ET import datetime import os.path from pprint import pprint @@ -61,64 +62,45 @@ def write(self, data): fp.close() -def parse_links(dut): - candidates = ['sonic_str_links.csv', 'sonic_lab_links.csv'] - # find what files exists before opening - target = None - for filename in candidates: - if os.path.exists(filename): - target = filename - break - with open(target) as fp: - all = fp.read() - rows = all.split("\n")[1:] +def parse_lab_connection_graph(lab_connection_file, dut): + tree = ET.parse(lab_connection_file) + root = tree.getroot() devices = [] dut_ports = [] mapping = {} + ip_names = {} - for r in rows: - if r == '': - continue - if dut not in r: + for link in root.findall('./PhysicalNetworkGraphDeclaration/DeviceInterfaceLinks/DeviceInterfaceLink'): + if link.attrib['StartDevice'] != dut: continue - values = r.split(',') - target_device = values[0] - target_port = values[1] - fanout_device = values[2] - fanout_port = values[3] - if target_device == dut: - devices.append(fanout_device) - mapping[(fanout_device, fanout_port)] = target_port - dut_ports.append(target_port) + + target_device = link.attrib['StartDevice'] + fanout_device = link.attrib['EndDevice'] + target_port = link.attrib['StartPort'] + fanout_port = link.attrib['EndPort'] + + devices.append(fanout_device) + dut_ports.append(target_port) + mapping[(fanout_device, fanout_port)] = target_port dut_ports = sorted(dut_ports, cmp=lambda x,y: cmp(int(x.replace('Ethernet', '')), int(y.replace('Ethernet', '')))) - return devices, dut_ports, mapping - -def parse_devices(device_names): - ip_name = {} - candidates = ['sonic_str_devices.csv', 'sonic_lab_devices.csv'] - # find what files exists before opening - target = None - for filename in candidates: - if os.path.exists(filename): - target = filename - break - with open(target) as fp: - all = fp.read() - rows = all.split("\n") - for r in rows: - if r == '': - continue - values = r.split(',') - name = values[0] - if name not in device_names: + for l3info in root.findall('./DataPlaneGraph/DevicesL3Info'): + if l3info.attrib['Hostname'] not in devices: continue - ip_prefix = values[1] - ip_name[name] = ip_prefix.split('/')[0] - return ip_name + mgmtinfo = l3info.findall('ManagementIPInterface') + if not mgmtinfo: + raise Exception("No management information about fanout in lab_connection_graph.xml") + + pfx = mgmtinfo[0].attrib['Prefix'] + ip_name, mask = pfx.split('/') + + ip_names[l3info.attrib['Hostname']] = ip_name + + return devices, dut_ports, mapping, ip_names + def parse_veos(vms): mapping = {} @@ -131,11 +113,11 @@ def parse_veos(vms): continue if not r.startswith('VM'): continue - name, ansible_host = r.split(' ') + name, ansible_host = r.split() if name not in vms: continue address = ansible_host.split('=')[1] - mapping[name] = address + mapping[name] = address return mapping @@ -144,7 +126,7 @@ def generate_vm_mappings(vms, base_vm, dut_ports, vm_2_ip): required_ports = {} for vm_offset, ports in vms.items(): vm = 'VM%04d' % (base_vm_id + vm_offset) - vm_ip = vm_2_ip[vm] + vm_ip = vm_2_ip[vm] p = {dut_ports[port]: (vm_ip, 'Ethernet%d' % (offset + 1)) for offset, port in enumerate(ports)} required_ports.update(p) @@ -160,13 +142,12 @@ def generate_vm_port_mapping(vm_base): vm_list = ["VM%04d" % (base + p) for p in sorted(vm_ports.keys())] return vm_ports, vm_list - + def merge(fanout_mappings, fanout_name_2_ip, vm_mappings): return {(fanout_name_2_ip[fanout_name], fanout_port) : vm_mappings[dut_port] for (fanout_name, fanout_port), dut_port in fanout_mappings.iteritems() if dut_port in vm_mappings} def generate_x_table(base_vm, dut): - devices, dut_ports, mapping = parse_links(dut) - fanout_name_2_ip = parse_devices(devices) + devices, dut_ports, mapping, fanout_name_2_ip = parse_lab_connection_graph('lab_connection_graph.xml', dut) vm_ports, vm_list = generate_vm_port_mapping(base_vm) vm_2_ip = parse_veos(vm_list) vm_mappings = generate_vm_mappings(vm_ports, base_vm, dut_ports, vm_2_ip) diff --git a/ansible/linkstate/up.yml b/ansible/linkstate/up.yml index 8312dbee78f..5b2679f3917 100644 --- a/ansible/linkstate/up.yml +++ b/ansible/linkstate/up.yml @@ -40,10 +40,7 @@ src: "{{ item }}" dest: /root with_items: - - ../files/sonic_str_links.csv - - ../files/sonic_str_devices.csv - - ../files/sonic_lab_links.csv - - ../files/sonic_lab_devices.csv + - ../files/lab_connection_graph.xml - ../veos - scripts/ptf_proxy.py - ../vars/topo_{{ topo }}.yml From 041fbd9118791e627135f43975fcb6d70746fc32 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Sun, 21 Jul 2019 18:45:40 +0800 Subject: [PATCH 068/218] [bgp-gr-helper] Add script for testing the BGP graceful restart helper function (#996) * [bgp-gr-helper] Add bgp-gr-helper test case Add script for testing the BGP graceful restart helper function. Signed-off-by: Xin Wang * [bgp-gr-helper] Add supported topo t1-64-lag * [bgp-gr-helper] Improve the wording * Add checking IPv6 route * [bgp-gr-helper] Enable graceful restart for t1 topo * [bgp-gr-helper] Improve script structure * Add more comments * Organize the code to make the two test cases more obvious * Remove the uncessary configuration change of graceful-restart stalepath-time Signed-off-by: Xin Wang --- ansible/group_vars/eos/eos.yml | 2 +- ansible/roles/eos/templates/t1-64-lag-tor.j2 | 3 + ansible/roles/eos/templates/t1-lag-tor.j2 | 3 + ansible/roles/eos/templates/t1-tor.j2 | 3 + ansible/roles/test/tasks/bgp_gr_helper.yml | 146 ++++++++++++++++++ .../test/tasks/bgp_gr_helper/get_vm_info.yml | 55 +++++++ .../bgp_gr_helper/routes_update_expect.txt | 1 + .../bgp_gr_helper/routes_update_match.txt | 3 + ansible/roles/test/vars/testcases.yml | 3 + 9 files changed, 218 insertions(+), 1 deletion(-) create mode 100644 ansible/roles/test/tasks/bgp_gr_helper.yml create mode 100644 ansible/roles/test/tasks/bgp_gr_helper/get_vm_info.yml create mode 100644 ansible/roles/test/tasks/bgp_gr_helper/routes_update_expect.txt create mode 100644 ansible/roles/test/tasks/bgp_gr_helper/routes_update_match.txt diff --git a/ansible/group_vars/eos/eos.yml b/ansible/group_vars/eos/eos.yml index 220e0a77901..a18236f8af7 100644 --- a/ansible/group_vars/eos/eos.yml +++ b/ansible/group_vars/eos/eos.yml @@ -1,4 +1,4 @@ # snmp variables snmp_rocommunity: strcommunity snmp_location: str - +bgp_gr_timer: 700 diff --git a/ansible/roles/eos/templates/t1-64-lag-tor.j2 b/ansible/roles/eos/templates/t1-64-lag-tor.j2 index 6c871fc7070..eb720966201 100644 --- a/ansible/roles/eos/templates/t1-64-lag-tor.j2 +++ b/ansible/roles/eos/templates/t1-64-lag-tor.j2 @@ -100,6 +100,9 @@ interface {{ bp_ifname }} router bgp {{ host['bgp']['asn'] }} router-id {{ host['interfaces']['Loopback0']['ipv4'] | ipaddr('address') }} ! + graceful-restart restart-time {{ bgp_gr_timer }} + graceful-restart + ! {% for asn, remote_ips in host['bgp']['peers'].items() %} {% for remote_ip in remote_ips %} neighbor {{ remote_ip }} remote-as {{ asn }} diff --git a/ansible/roles/eos/templates/t1-lag-tor.j2 b/ansible/roles/eos/templates/t1-lag-tor.j2 index 64c2722c10f..dcdd7753459 100644 --- a/ansible/roles/eos/templates/t1-lag-tor.j2 +++ b/ansible/roles/eos/templates/t1-lag-tor.j2 @@ -92,6 +92,9 @@ interface {{ bp_ifname }} router bgp {{ host['bgp']['asn'] }} router-id {{ host['interfaces']['Loopback0']['ipv4'] | ipaddr('address') }} ! + graceful-restart restart-time {{ bgp_gr_timer }} + graceful-restart + ! {% for asn, remote_ips in host['bgp']['peers'].items() %} {% for remote_ip in remote_ips %} neighbor {{ remote_ip }} remote-as {{ asn }} diff --git a/ansible/roles/eos/templates/t1-tor.j2 b/ansible/roles/eos/templates/t1-tor.j2 index 98436007cef..2a515847655 100644 --- a/ansible/roles/eos/templates/t1-tor.j2 +++ b/ansible/roles/eos/templates/t1-tor.j2 @@ -92,6 +92,9 @@ interface {{ bp_ifname }} router bgp {{ host['bgp']['asn'] }} router-id {{ host['interfaces']['Loopback0']['ipv4'] | ipaddr('address') }} ! + graceful-restart restart-time {{ bgp_gr_timer }} + graceful-restart + ! {% for asn, remote_ips in host['bgp']['peers'].items() %} {% for remote_ip in remote_ips %} neighbor {{ remote_ip }} remote-as {{ asn }} diff --git a/ansible/roles/test/tasks/bgp_gr_helper.yml b/ansible/roles/test/tasks/bgp_gr_helper.yml new file mode 100644 index 00000000000..0a03c0f8a90 --- /dev/null +++ b/ansible/roles/test/tasks/bgp_gr_helper.yml @@ -0,0 +1,146 @@ +#----------------------------------------- +# Run BGP GR helper mode test and Perform log analysis. +#----------------------------------------- + +- fail: msg="testbed_type is not defined." + when: testbed_type is not defined + +- fail: msg="testbed_type {{testbed_type}} is unsupported." + when: testbed_type not in ['t1', 't1-lag', 't1-64-lag'] + +- name: Get VM info. + include: "roles/test/tasks/bgp_gr_helper/get_vm_info.yml" + +- name: Gather facts from bgp container. + bgp_facts: + +- name: Get VM GR timer. + set_fact: + bgp_gr_timer: "{{ bgp_neighbors[peer_ipv4]['capabilities']['peer restart timer'] }}" + +- name: Set default value for GR simulation time in seconds. + set_fact: + bgp_gr_simulation_timer: 100 + +- set_fact: + testname: "bgp_gr_helper" + run_dir: /tmp + out_dir: /tmp/ansible-loganalyzer-results + tests_location: "{{ 'roles/test/tasks' }}" + +# Test case 1: Verify that routes are preserved when peer performed graceful restart +- block: + - set_fact: + testname_unique: "{{ testname }}.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}" + + - set_fact: + test_out_dir: "{{ out_dir }}/{{ testname_unique }}" + + - include: roles/test/files/tools/loganalyzer/loganalyzer_init.yml + vars: + test_match_file: routes_update_match.txt + + - name: Set log level to INFO bo be able to catch route update messages from orchagent + command: "swssloglevel -l INFO -c orchagent" + + # When RIBD up and send bgp open message it will set F bit to 1. Which means that during restart + # all routes were preserved in FIB. When DUT receives open message with F bit set to 1 it also + # should preserve all routes (no route update should happens). + - name: Force stop RIBD to simulate GR. + shell: "killall -9 ribd; sleep 0.5; ifconfig et1 down" + delegate_to: "{{ vm_ip }}" + + - name: Simulate GR. + pause: + seconds: "{{ bgp_gr_simulation_timer if (bgp_gr_timer|int - 30) > bgp_gr_simulation_timer else (bgp_gr_timer|int - 30) }}" + + - name: Up interface to allow RIBD to send open message. End of GR. + command: ifconfig et1 up + delegate_to: "{{ vm_ip }}" + + - name: Wait for BGP session state update. + pause: + seconds: 10 + + - name: Gather facts from bgp container. + bgp_facts: + + - name: Verify IPv4 bgp session is established + assert: { that: "'{{ bgp_neighbors[peer_ipv4]['state'] }}' == 'established'" } + + - name: Verify IPv6 bgp session is established + assert: { that: "'{{ bgp_neighbors[peer_ipv6]['state'] }}' == 'established'" } + + - name: Verify that IPv4 unicast routes were preserved during GR. + assert: { that: "'{{ bgp_neighbors[peer_ipv4]['capabilities']['peer af ipv4 unicast'] }}' == 'preserved'" } + + - name: Verify that IPv6 unicast routes were preserved during GR. + assert: { that: "'{{ bgp_neighbors[peer_ipv6]['capabilities']['peer af ipv6 unicast'] }}' == 'preserved'" } + + # Analyze syslog, no log message related with routes update should be observed + - include: roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml + vars: + test_match_file: routes_update_match.txt + + - include: roles/test/files/tools/loganalyzer/loganalyzer_end.yml + + always: + - name: Set log level back to NOTICE + command: "swssloglevel -l NOTICE -c orchagent" + +# Test case 2: Verify that routes are not preserved when peer performed normal reboot +- block: + - set_fact: + testname_unique: "{{ testname }}.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}" + + - set_fact: + test_out_dir: "{{ out_dir }}/{{ testname_unique }}" + + - include: roles/test/files/tools/loganalyzer/loganalyzer_init.yml + vars: + test_expect_file: routes_update_expect.txt + + - name: Set log level to INFO bo be able to catch route update messages from orchagent + command: "swssloglevel -l INFO -c orchagent" + + # Reboot the VM, this is not a graceful restart + - name: Reboot the VM + shell: killall -9 ribd ; reboot + delegate_to: "{{ vm_ip }}" + + - name: Wait for the VM to go down + pause: + seconds: 90 + + - name: Wait for the VM to come back + local_action: wait_for port=22 host="{{ vm_ip }}" delay=20 timeout="{{ bgp_gr_timer|int - 90 }}" state=started + + - name: Wait for BGP session state update. + pause: + seconds: 30 + + - name: Gather facts from bgp container. + bgp_facts: + + - name: Verify IPv4 bgp session is established + assert: { that: "'{{ bgp_neighbors[peer_ipv4]['state'] }}' == 'established'" } + + - name: Verify IPv6 bgp session is established + assert: { that: "'{{ bgp_neighbors[peer_ipv6]['state'] }}' == 'established'" } + + - name: Verify that IPv4 unicast routes were not preserved during GR. FIB should be updated. + assert: { that: "'{{ bgp_neighbors[peer_ipv4]['capabilities']['peer af ipv4 unicast'] }}' == 'not preserved'" } + + - name: Verify that IPv6 unicast routes were not preserved during GR. FIB should be updated. + assert: { that: "'{{ bgp_neighbors[peer_ipv6]['capabilities']['peer af ipv6 unicast'] }}' == 'not preserved'" } + + # Analyze syslog, log messages related with routes update are expected + - include: roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml + vars: + test_expect_file: routes_update_expect.txt + + - include: roles/test/files/tools/loganalyzer/loganalyzer_end.yml + + always: + - name: Set log level back to NOTICE + command: "swssloglevel -l NOTICE -c orchagent" diff --git a/ansible/roles/test/tasks/bgp_gr_helper/get_vm_info.yml b/ansible/roles/test/tasks/bgp_gr_helper/get_vm_info.yml new file mode 100644 index 00000000000..2f89162d18c --- /dev/null +++ b/ansible/roles/test/tasks/bgp_gr_helper/get_vm_info.yml @@ -0,0 +1,55 @@ +- name: Gathering lab graph facts about the device + conn_graph_facts: host={{ ansible_host }} + connection: local + tags: always + +- name: Init variables. + set_fact: + vm_name: "" + vm_intf: "" + vm_ip: "" + peer_ipv4: "" + peer_ipv6: "" + +- name: Get neighbor VM info. + set_fact: + vm_name: "{{ item.value.name }}" + vm_intf: "{{ item.key }}" + with_dict: "{{ minigraph_neighbors }}" + when: "testbed_type in ['t1', 't1-lag', 't1-64-lag'] and 'T0' in item.value.name and not vm_name" + +- name: Get neighbor IPv4 address. + set_fact: + peer_ipv4: "{{ item.addr }}" + with_items: "{{ minigraph_bgp }}" + when: "item.name == vm_name and item.addr|ipv4" + +- name: Get neighbor IPv6 address. + set_fact: + peer_ipv6: "{{ item.addr|lower }}" + with_items: "{{ minigraph_bgp }}" + when: "item.name == vm_name and item.addr|ipv6" + +- name: Gather information from LLDP + lldp: + vars: + ansible_shell_type: docker + ansible_python_interpreter: docker exec -i lldp python + +- name: Get VM IP address. + set_fact: + vm_ip: "{{ lldp[vm_intf]['chassis']['mgmt-ip'] }}" + +- name: Add host + add_host: + name: "{{ vm_ip }}" + groups: "lldp_neighbors,eos" + +- debug: + var: vm_name +- debug: + var: vm_intf +- debug: + var: vm_ipv4 +- debug: + var: vm_ipv6 diff --git a/ansible/roles/test/tasks/bgp_gr_helper/routes_update_expect.txt b/ansible/roles/test/tasks/bgp_gr_helper/routes_update_expect.txt new file mode 100644 index 00000000000..b62ec11268a --- /dev/null +++ b/ansible/roles/test/tasks/bgp_gr_helper/routes_update_expect.txt @@ -0,0 +1 @@ +r, ".*Create route.*" \ No newline at end of file diff --git a/ansible/roles/test/tasks/bgp_gr_helper/routes_update_match.txt b/ansible/roles/test/tasks/bgp_gr_helper/routes_update_match.txt new file mode 100644 index 00000000000..534337d93d9 --- /dev/null +++ b/ansible/roles/test/tasks/bgp_gr_helper/routes_update_match.txt @@ -0,0 +1,3 @@ +r, ".*Remove route.*" +r, ".*Create route.*" +r, ".*Set route.*" \ No newline at end of file diff --git a/ansible/roles/test/vars/testcases.yml b/ansible/roles/test/vars/testcases.yml index bc1c1af85b1..e0f9373636c 100644 --- a/ansible/roles/test/vars/testcases.yml +++ b/ansible/roles/test/vars/testcases.yml @@ -16,6 +16,9 @@ testcases: filename: bgp_fact.yml topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] + bgp_gr_helper: + filename: bgp_gr_helper.yml + topologies: [t1, t1-lag, t1-64-lag] bgp_multipath_relax: filename: bgp_multipath_relax.yml From 6429d3674bf867fb1078723fea07e2d5ef536697 Mon Sep 17 00:00:00 2001 From: Neetha John Date: Tue, 23 Jul 2019 16:58:03 -0700 Subject: [PATCH 069/218] Add time diff check between BGP v4 and v6 peers (#1014) Signed-off-by: Neetha John --- ansible/roles/test/files/ptftests/arista.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ansible/roles/test/files/ptftests/arista.py b/ansible/roles/test/files/ptftests/arista.py index e0bef368a54..fcd4b2ecf8f 100644 --- a/ansible/roles/test/files/ptftests/arista.py +++ b/ansible/roles/test/files/ptftests/arista.py @@ -248,6 +248,16 @@ def parse_logs(self, data): assert(events[-1][1] == 'Established') + # verify BGP establishment time between v4 and v6 peer is not more than 20s + if self.reboot_type == 'warm-reboot': + estab_time = 0 + for ip in result_bgp: + if estab_time > 0: + diff = abs(result_bgp[ip][-1][0] - estab_time) + assert(diff <= 20) + break + estab_time = result_bgp[ip][-1][0] + # first state is down, last state is up for events in result_if.values(): assert(events[0][1] == 'down') From ce5aaef0548c66ebf33375ea3925e464509041e0 Mon Sep 17 00:00:00 2001 From: Guohan Lu Date: Wed, 30 Jan 2019 04:10:15 +0000 Subject: [PATCH 070/218] rename sonic password variable names Signed-off-by: Guohan Lu --- ansible/roles/vm_set/tasks/start_sonic_vm.yml | 4 ++-- ansible/roles/vm_set/vars/main.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ansible/roles/vm_set/tasks/start_sonic_vm.yml b/ansible/roles/vm_set/tasks/start_sonic_vm.yml index b176b563b58..35ad1703231 100644 --- a/ansible/roles/vm_set/tasks/start_sonic_vm.yml +++ b/ansible/roles/vm_set/tasks/start_sonic_vm.yml @@ -39,11 +39,11 @@ - name: Wait until vm {{ dut_name }} is loaded sonic_kickstart: telnet_port={{ serial_port }} login={{ sonic_login }} - passwords={{ sonic_passwords }} + passwords={{ sonic_default_passwords }} hostname={{ dut_name }} mgmt_ip="{{ mgmt_ip_address }}/{{ mgmt_prefixlen }}" mgmt_gw={{ vm_mgmt_gw | default(mgmt_gw) }} - new_password={{ sonic_new_password }} + new_password={{ sonic_password }} register: kickstart_output - name: Fail if kickstart gives error for {{ dut_name }} diff --git a/ansible/roles/vm_set/vars/main.yml b/ansible/roles/vm_set/vars/main.yml index 727f59e5a38..6111407195a 100644 --- a/ansible/roles/vm_set/vars/main.yml +++ b/ansible/roles/vm_set/vars/main.yml @@ -5,10 +5,10 @@ eos_password: 123456 eos_root_password: 123456 sonic_login: "admin" -sonic_passwords: +sonic_default_passwords: - "YourPaSsWoRd" - "password" -sonic_new_password: "password" +sonic_password: "password" tor_memory: 1572864 spine_memory: 2097152 From ab9361d3426aa4f316d8ea3db0bfab4a655aa09e Mon Sep 17 00:00:00 2001 From: Guohan Lu Date: Wed, 30 Jan 2019 04:16:01 +0000 Subject: [PATCH 071/218] move eos/sonic creds into group_vars/all to share them among different roles Signed-off-by: Guohan Lu --- ansible/group_vars/all/creds.yml | 11 +++++++++++ ansible/roles/vm_set/vars/main.yml | 12 ------------ 2 files changed, 11 insertions(+), 12 deletions(-) create mode 100644 ansible/group_vars/all/creds.yml diff --git a/ansible/group_vars/all/creds.yml b/ansible/group_vars/all/creds.yml new file mode 100644 index 00000000000..2b4d1c8ea09 --- /dev/null +++ b/ansible/group_vars/all/creds.yml @@ -0,0 +1,11 @@ +eos_default_login: "admin" +eos_default_password: "" +eos_login: admin +eos_password: 123456 +eos_root_password: 123456 + +sonic_login: "admin" +sonic_default_passwords: + - "YourPaSsWoRd" + - "password" +sonic_password: "password" diff --git a/ansible/roles/vm_set/vars/main.yml b/ansible/roles/vm_set/vars/main.yml index 6111407195a..70e2ee8bcd3 100644 --- a/ansible/roles/vm_set/vars/main.yml +++ b/ansible/roles/vm_set/vars/main.yml @@ -1,15 +1,3 @@ -eos_default_login: "admin" -eos_default_password: "" -eos_login: admin -eos_password: 123456 -eos_root_password: 123456 - -sonic_login: "admin" -sonic_default_passwords: - - "YourPaSsWoRd" - - "password" -sonic_password: "password" - tor_memory: 1572864 spine_memory: 2097152 From 998b9599621066e0b5dd2cb27231b4fa1f6c133e Mon Sep 17 00:00:00 2001 From: Neetha John Date: Wed, 24 Jul 2019 10:54:37 -0700 Subject: [PATCH 072/218] [warm-reboot] Add Preboot n BGP member down and n Lag down tests (#1004) * Add Preboot n BGP member down and n Lag down tests Signed-off-by: Neetha John --- .../test/files/ptftests/advanced-reboot.py | 32 ++- ansible/roles/test/files/ptftests/sad_path.py | 258 ++++++++++-------- ansible/roles/test/tasks/advanced-reboot.yml | 5 + .../advanced_reboot/validate_preboot_list.yml | 13 + .../test/tasks/warm-reboot-multi-sad.yml | 11 + ansible/roles/test/vars/testcases.yml | 7 + 6 files changed, 206 insertions(+), 120 deletions(-) create mode 100644 ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml create mode 100644 ansible/roles/test/tasks/warm-reboot-multi-sad.yml diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 89681ed658e..35ead6d1309 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -143,7 +143,7 @@ def __init__(self): self.check_param('warm_up_timeout_secs', 300, required=False) self.check_param('dut_stabilize_secs', 30, required=False) self.check_param('preboot_files', None, required = False) - self.check_param('preboot_oper', None, required = False) + self.check_param('preboot_oper', None, required = False) # preboot sad path to inject before warm-reboot self.check_param('allow_vlan_flooding', False, required = False) self.check_param('sniff_time_incr', 60, required = False) if not self.test_params['preboot_oper'] or self.test_params['preboot_oper'] == 'None': @@ -344,6 +344,12 @@ def build_peer_mapping(self): self.get_neigh_port_info() self.get_portchannel_info() + def populate_fail_info(self, fails): + for key in fails: + if key not in self.fails: + self.fails[key] = set() + self.fails[key] |= fails[key] + def setUp(self): self.fails['dut'] = set() self.port_indices = self.read_port_indices() @@ -376,14 +382,12 @@ def setUp(self): if self.preboot_oper is not None: self.log("Preboot Operations:") self.pre_handle = sp.PrebootTest(self.preboot_oper, self.ssh_targets, self.portchannel_ports, self.vm_dut_map, self.test_params, self.dut_ssh) - (self.ssh_targets, self.portchannel_ports, self.neigh_vm), (log_info, fails_dut, fails_vm) = self.pre_handle.setup() - self.fails['dut'] |= fails_dut - self.fails[self.neigh_vm] = fails_vm + (self.ssh_targets, self.portchannel_ports, self.neigh_vm), (log_info, fails) = self.pre_handle.setup() + self.populate_fail_info(fails) for log in log_info: self.log(log) - log_info, fails_dut, fails_vm = self.pre_handle.verify() - self.fails['dut'] |= fails_dut - self.fails[self.neigh_vm] |= fails_vm + log_info, fails = self.pre_handle.verify() + self.populate_fail_info(fails) for log in log_info: self.log(log) self.log(" ") @@ -418,7 +422,14 @@ def setUp(self): self.generate_arp_ping_packet() if self.reboot_type == 'warm-reboot': - self.log("Preboot Oper: %s" % self.preboot_oper) + # get the number of members down for sad path + if self.preboot_oper: + if ':' in self.preboot_oper: + oper_type, cnt = self.preboot_oper.split(':') + else: + oper_type, cnt = self.preboot_oper, 1 + self.log("Preboot Oper: %s Number down: %s" % (oper_type, cnt)) + # Pre-generate list of packets to be sent in send_in_background method. generate_start = datetime.datetime.now() self.generate_bidirectional() @@ -736,9 +747,8 @@ def wait_for_ssh_threads(): if self.reboot_type == 'warm-reboot' and self.preboot_oper is not None: if self.pre_handle is not None: self.log("Postboot checks:") - log_info, fails_dut, fails_vm = self.pre_handle.verify(pre_check=False) - self.fails[self.neigh_vm] |= fails_vm - self.fails['dut'] |= fails_dut + log_info, fails = self.pre_handle.verify(pre_check=False) + self.populate_fail_info(fails) for log in log_info: self.log(log) self.log(" ") diff --git a/ansible/roles/test/files/ptftests/sad_path.py b/ansible/roles/test/files/ptftests/sad_path.py index fec1f64fd41..958e4be2e58 100644 --- a/ansible/roles/test/files/ptftests/sad_path.py +++ b/ansible/roles/test/files/ptftests/sad_path.py @@ -36,16 +36,17 @@ def revert(self): class SadPath(object): def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args): - self.oper_type = oper_type + (self.oper_type, self.cnt) = oper_type.split(':') if ':' in oper_type else (oper_type, 1) + self.cnt = int(self.cnt) self.vm_list = vm_list self.portchannel_ports = portchannel_ports self.vm_dut_map = vm_dut_map self.test_args = test_args - self.neigh_vm = None - self.neigh_name = None - self.vm_handle = None - self.neigh_bgp = None - self.dut_bgp = None + self.neigh_vms = [] + self.neigh_names = dict() + self.vm_handles = dict() + self.neigh_bgps = dict() + self.dut_bgps = dict() self.log = [] self.fails = dict() self.fails['dut'] = set() @@ -62,78 +63,91 @@ def cmd(self, cmds): def select_vm(self): self.vm_list.sort() - # use the day of the month to select a VM from the list for the sad pass operation - vm_index = datetime.datetime.now().day % len(self.vm_list) - self.neigh_vm = self.vm_list.pop(vm_index) + vm_len = len(self.vm_list) + # use the day of the month to select start VM from the list for the sad pass operation + # neigh_vms list will contain cnt number of VMs starting from the start VM. vm_list will have the rest of the VMs + vm_index = datetime.datetime.now().day % vm_len if vm_len > 0 else 0 + exceed_len = vm_index + self.cnt - vm_len + if exceed_len <= 0: + self.neigh_vms.extend(self.vm_list[vm_index:vm_index+self.cnt]) + self.vm_list = self.vm_list[0:vm_index] + self.vm_list[vm_index+self.cnt:] + else: + self.neigh_vms.extend(self.vm_list[vm_index:]) + self.neigh_vms.extend(self.vm_list[0:exceed_len]) + self.vm_list = self.vm_list[exceed_len:vm_len - self.cnt] def get_neigh_name(self): - for key in self.vm_dut_map.keys(): - if self.vm_dut_map[key]['mgmt_addr'] == self.neigh_vm: - self.neigh_name = key - break + for key in self.vm_dut_map: + for neigh_vm in self.neigh_vms: + if self.vm_dut_map[key]['mgmt_addr'] == neigh_vm: + self.neigh_names[neigh_vm] = key # VM address to name mapping + break def down_neigh_port(self): - # extract ptf ports for the selected VM and mark them down - for port in self.vm_dut_map[self.neigh_name]['ptf_ports']: - self.portchannel_ports.remove(port) + # extract ptf ports for the selected VMs and mark them down + for neigh_name in self.neigh_names.values(): + for port in self.vm_dut_map[neigh_name]['ptf_ports']: + self.portchannel_ports.remove(port) def vm_connect(self): - self.vm_handle = Arista(self.neigh_vm, None, self.test_args) - self.vm_handle.connect() + for neigh_vm in self.neigh_vms: + self.vm_handles[neigh_vm] = Arista(neigh_vm, None, self.test_args) + self.vm_handles[neigh_vm].connect() def __del__(self): self.vm_disconnect() def vm_disconnect(self): - self.vm_handle.disconnect() + for vm in self.vm_handles: + self.vm_handles[vm].disconnect() def setup(self): self.select_vm() self.get_neigh_name() self.down_neigh_port() self.vm_connect() - self.neigh_bgp, self.dut_bgp = self.vm_handle.get_bgp_info() - self.fails[self.neigh_vm] = set() - self.log.append('Neighbor AS: %s' % self.neigh_bgp['asn']) - self.log.append('BGP v4 neighbor: %s' % self.neigh_bgp['v4']) - self.log.append('BGP v6 neighbor: %s' % self.neigh_bgp['v6']) - self.log.append('DUT BGP v4: %s' % self.dut_bgp['v4']) - self.log.append('DUT BGP v6: %s' % self.dut_bgp['v6']) + for vm in self.vm_handles: + self.neigh_bgps[vm], self.dut_bgps[vm] = self.vm_handles[vm].get_bgp_info() + self.fails[vm] = set() + self.log.append('Neighbor AS: %s' % self.neigh_bgps[vm]['asn']) + self.log.append('BGP v4 neighbor: %s' % self.neigh_bgps[vm]['v4']) + self.log.append('BGP v6 neighbor: %s' % self.neigh_bgps[vm]['v6']) + self.log.append('DUT BGP v4: %s' % self.dut_bgps[vm]['v4']) + self.log.append('DUT BGP v6: %s' % self.dut_bgps[vm]['v6']) def retreive_test_info(self): - return self.vm_list, self.portchannel_ports, self.neigh_vm + return self.vm_list, self.portchannel_ports, self.neigh_vms def retreive_logs(self): - return self.log, self.fails['dut'], self.fails[self.neigh_vm] + return self.log, self.fails class SadOper(SadPath): def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh): super(SadOper, self).__init__(oper_type, vm_list, portchannel_ports, vm_dut_map, test_args) self.dut_ssh = dut_ssh - self.dut_needed = None - self.lag_members_down = None + self.dut_needed = dict() + self.lag_members_down = dict() self.neigh_lag_state = None + self.po_neigh_map = dict() self.msg_prefix = ['Postboot', 'Preboot'] def populate_bgp_state(self): + [self.dut_needed.setdefault(vm, self.dut_bgps[vm]) for vm in self.neigh_vms] if self.oper_type == 'neigh_bgp_down': - self.neigh_bgp['changed_state'] = 'down' - self.dut_bgp['changed_state'] = 'Active' - self.dut_needed = None + self.neigh_bgps['changed_state'] = 'down' + self.dut_bgps['changed_state'] = 'Active' + [self.dut_needed.update({vm:None}) for vm in self.neigh_vms] elif self.oper_type == 'dut_bgp_down': - self.neigh_bgp['changed_state'] = 'Active' - self.dut_bgp['changed_state'] = 'Idle' - self.dut_needed = self.dut_bgp + self.neigh_bgps['changed_state'] = 'Active' + self.dut_bgps['changed_state'] = 'Idle' elif self.oper_type == 'neigh_lag_down': # on the DUT side, bgp states are different pre and post boot. hence passing multiple values - self.neigh_bgp['changed_state'] = 'Idle' - self.dut_bgp['changed_state'] = 'Connect,Active,Idle' - self.dut_needed = self.dut_bgp + self.neigh_bgps['changed_state'] = 'Idle' + self.dut_bgps['changed_state'] = 'Connect,Active,Idle' elif self.oper_type == 'dut_lag_down': - self.neigh_bgp['changed_state'] = 'Idle' - self.dut_bgp['changed_state'] = 'Active,Connect,Idle' - self.dut_needed = self.dut_bgp + self.neigh_bgps['changed_state'] = 'Idle' + self.dut_bgps['changed_state'] = 'Active,Connect,Idle' def sad_setup(self, is_up=True): self.log = [] @@ -145,18 +159,21 @@ def sad_setup(self, is_up=True): self.populate_lag_state() if 'bgp' in self.oper_type: - self.log.append('BGP state change will be for %s' % self.neigh_vm) + self.log.append('BGP state change will be for %s' % ", ".join(self.neigh_vms)) if self.oper_type == 'neigh_bgp_down': - self.log.append('Changing state of AS %s to shut' % self.neigh_bgp['asn']) - self.vm_handle.change_bgp_neigh_state(self.neigh_bgp['asn'], is_up=is_up) + for vm in self.neigh_vms: + self.log.append('Changing state of AS %s to shut' % self.neigh_bgps[vm]['asn']) + self.vm_handles[vm].change_bgp_neigh_state(self.neigh_bgps[vm]['asn'], is_up=is_up) elif self.oper_type == 'dut_bgp_down': self.change_bgp_dut_state(is_up=is_up) time.sleep(30) + elif 'lag' in self.oper_type: - self.log.append('LAG state change will be for %s' % self.neigh_vm) + self.log.append('LAG state change will be for %s' % ", ".join(self.neigh_vms)) if self.oper_type == 'neigh_lag_down': - self.log.append('Changing state of LAG %s to shut' % self.vm_dut_map[self.neigh_name]['neigh_portchannel']) - self.vm_handle.change_neigh_lag_state(self.vm_dut_map[self.neigh_name]['neigh_portchannel'], is_up=is_up) + for vm in self.neigh_vms: + self.log.append('Changing state of LAG %s to shut' % self.vm_dut_map[self.neigh_names[vm]]['neigh_portchannel']) + self.vm_handles[vm].change_neigh_lag_state(self.vm_dut_map[self.neigh_names[vm]]['neigh_portchannel'], is_up=is_up) elif self.oper_type == 'dut_lag_down': self.change_dut_lag_state(is_up=is_up) # wait for sometime for lag members state to sync @@ -164,48 +181,55 @@ def sad_setup(self, is_up=True): def change_bgp_dut_state(self, is_up=True): state = ['shutdown', 'startup'] - for key in self.neigh_bgp.keys(): - if key not in ['v4', 'v6']: - continue - - self.log.append('Changing state of BGP peer %s from DUT side to %s' % (self.neigh_bgp[key], state[is_up])) - stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config bgp %s neighbor %s' % (state[is_up], self.neigh_bgp[key])]) - if return_code != 0: - self.fails['dut'].add('State change not successful from DUT side for peer %s' % self.neigh_bgp[key]) - self.fails['dut'].add('Return code: %d' % return_code) - self.fails['dut'].add('Stderr: %s' % stderr) + for vm in self.neigh_vms: + for key in self.neigh_bgps[vm].keys(): + if key not in ['v4', 'v6']: + continue + + self.log.append('Changing state of BGP peer %s from DUT side to %s' % (self.neigh_bgps[vm][key], state[is_up])) + stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config bgp %s neighbor %s' % (state[is_up], self.neigh_bgps[vm][key])]) + if return_code != 0: + self.fails['dut'].add('State change not successful from DUT side for peer %s' % self.neigh_bgps[vm][key]) + self.fails['dut'].add('Return code: %d' % return_code) + self.fails['dut'].add('Stderr: %s' % stderr) def verify_bgp_dut_state(self, state='Idle'): states = state.split(',') bgp_state = {} - bgp_state['v4'] = bgp_state['v6'] = False - for key in self.neigh_bgp.keys(): - if key not in ['v4', 'v6']: - continue - self.log.append('Verifying if the DUT side BGP peer %s is %s' % (self.neigh_bgp[key], states)) - stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'show ip bgp neighbor %s' % self.neigh_bgp[key]]) - if return_code == 0: - for line in stdout.split('\n'): - if 'BGP state' in line: - curr_state = re.findall('BGP state = (\w+)', line)[0] - bgp_state[key] = (curr_state in states) - break - else: - self.fails['dut'].add('Retreiving BGP info for peer %s from DUT side failed' % self.neigh_bgp[key]) - self.fails['dut'].add('Return code: %d' % return_code) - self.fails['dut'].add('Stderr: %s' % stderr) + for vm in self.neigh_vms: + bgp_state[vm] = dict() + bgp_state[vm]['v4'] = bgp_state[vm]['v6'] = False + for key in self.neigh_bgps[vm].keys(): + if key not in ['v4', 'v6']: + continue + self.log.append('Verifying if the DUT side BGP peer %s is %s' % (self.neigh_bgps[vm][key], states)) + stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'show ip bgp neighbor %s' % self.neigh_bgps[vm][key]]) + if return_code == 0: + for line in stdout.split('\n'): + if 'BGP state' in line: + curr_state = re.findall('BGP state = (\w+)', line)[0] + bgp_state[vm][key] = (curr_state in states) + break + else: + self.fails['dut'].add('Retreiving BGP info for peer %s from DUT side failed' % self.neigh_bgps[vm][key]) + self.fails['dut'].add('Return code: %d' % return_code) + self.fails['dut'].add('Stderr: %s' % stderr) return bgp_state def sad_bgp_verify(self): self.log = [] - fails_vm, bgp_state = self.vm_handle.verify_bgp_neigh_state(dut=self.dut_needed, state=self.neigh_bgp['changed_state']) - self.fails[self.neigh_vm] |= fails_vm - if bgp_state['v4'] and bgp_state['v6']: - self.log.append('BGP state down as expected for %s' % self.neigh_vm) - else: - self.fails[self.neigh_vm].add('BGP state not down for %s' % self.neigh_vm) - bgp_state = self.verify_bgp_dut_state(state=self.dut_bgp['changed_state']) - if bgp_state['v4'] and bgp_state['v6']: + for vm in self.neigh_vms: + fails_vm, bgp_state = self.vm_handles[vm].verify_bgp_neigh_state(dut=self.dut_needed[vm], state=self.neigh_bgps['changed_state']) + self.fails[vm] |= fails_vm + if bgp_state['v4'] and bgp_state['v6']: + self.log.append('BGP state down as expected for %s' % vm) + else: + self.fails[vm].add('BGP state not down for %s' % vm) + bgp_state = self.verify_bgp_dut_state(state=self.dut_bgps['changed_state']) + state = True + for vm in self.neigh_vms: + state &= bgp_state[vm]['v4'] and bgp_state[vm]['v6'] + if state: self.log.append('BGP state down as expected on DUT') else: self.fails['dut'].add('BGP state not down on DUT') @@ -213,28 +237,35 @@ def sad_bgp_verify(self): def populate_lag_state(self): if self.oper_type == 'neigh_lag_down': self.neigh_lag_state = 'disabled' - self.lag_members_down = self.vm_dut_map[self.neigh_name]['dut_ports'] elif self.oper_type == 'dut_lag_down': - self.lag_members_down = self.vm_dut_map[self.neigh_name]['dut_ports'] self.neigh_lag_state = 'notconnect' + for neigh_name in self.neigh_names.values(): + # build portchannel to down members mapping + po_name = self.vm_dut_map[neigh_name]['dut_portchannel'] + self.lag_members_down[po_name] = self.vm_dut_map[neigh_name]['dut_ports'] + def change_dut_lag_state(self, is_up=True): state = ['shutdown', 'startup'] - dut_portchannel = self.vm_dut_map[self.neigh_name]['dut_portchannel'] - if not re.match('(PortChannel|Ethernet)\d+', dut_portchannel): return - self.log.append('Changing state of %s from DUT side to %s' % (dut_portchannel, state[is_up])) - stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], dut_portchannel)]) - if return_code != 0: - self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], dut_portchannel)) - self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code)) - self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[1 - is_up], stderr)) - else: - self.log.append('State change successful on DUT') + for neigh_name in self.neigh_names.values(): + dut_portchannel = self.vm_dut_map[neigh_name]['dut_portchannel'] + if not re.match('(PortChannel|Ethernet)\d+', dut_portchannel): continue + self.log.append('Changing state of %s from DUT side to %s' % (dut_portchannel, state[is_up])) + stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], dut_portchannel)]) + if return_code != 0: + self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], dut_portchannel)) + self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code)) + self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[1 - is_up], stderr)) + else: + self.log.append('%s: State change successful on DUT for %s' % (self.msg_prefix[1 - is_up], dut_portchannel)) - def verify_dut_lag_member_state(self, lag_memb_output, pre_check=True): + def verify_dut_lag_member_state(self, match, pre_check=True): success = True - for member in self.vm_dut_map[self.neigh_name]['dut_ports']: - if self.lag_members_down is not None and member in self.lag_members_down: + po_name = match.group(1) + lag_memb_output = match.group(2) + neigh_name = self.po_neigh_map[po_name] + for member in self.vm_dut_map[neigh_name]['dut_ports']: + if po_name in self.lag_members_down and member in self.lag_members_down[po_name]: search_str = '%s(D)' % member else: search_str = '%s(S)' % member @@ -247,30 +278,39 @@ def verify_dut_lag_member_state(self, lag_memb_output, pre_check=True): return success def verify_dut_lag_state(self, pre_check=True): - pat = re.compile(".*%s\s+LACP\(A\)\(Dw\)\s+(.*)" % self.vm_dut_map[self.neigh_name]['dut_portchannel']) + # pattern match eg: '0001 PortChannel0001 LACP(A)(Up) Ethernet0(S) Ethernet4(S)'. extract the portchannel name and members + pat = re.compile("\s+\d+\s+(\w+\d+)\s+LACP\(A\)\(Dw\)\s+(.*)") + + # get list of down portchannels and build portchannel to neigh mapping + po_list = [] + for vm in self.neigh_vms: + po_name = self.vm_dut_map[self.neigh_names[vm]]['dut_portchannel'] + po_list.append(po_name) + self.po_neigh_map[po_name] = self.neigh_names[vm] + stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'show interfaces portchannel']) if return_code == 0: for line in stdout.split('\n'): - if self.vm_dut_map[self.neigh_name]['dut_portchannel'] in line: + if any(po_name in line for po_name in po_list): is_match = pat.match(line) - if is_match and self.verify_dut_lag_member_state(is_match.group(1), pre_check=pre_check): - self.log.append('Lag state is down as expected on the DUT') + if is_match and self.verify_dut_lag_member_state(is_match, pre_check=pre_check): + self.log.append('Lag state is down as expected on the DUT for %s' % is_match.group(1)) self.log.append('Pattern check: %s' % line) else: - self.fails['dut'].add('%s: Lag state is not down on the DUT' % self.msg_prefix[pre_check]) + self.fails['dut'].add('%s: Lag state is not down on the DUT for %s' % (self.msg_prefix[pre_check], is_match.group(1))) self.fails['dut'].add('%s: Obtained: %s' % (self.msg_prefix[pre_check], line)) - break else: self.fails['dut'].add('%s: Retreiving LAG info from DUT side failed' % self.msg_prefix[pre_check]) self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[pre_check], return_code)) self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[pre_check], stderr)) def sad_lag_verify(self, pre_check=True): - fails_vm, lag_state = self.vm_handle.verify_neigh_lag_state(self.vm_dut_map[self.neigh_name]['neigh_portchannel'], state=self.neigh_lag_state, pre_check=pre_check) - self.fails[self.neigh_vm] |= fails_vm - if lag_state: - self.log.append('LAG state down as expected for %s' % self.neigh_vm) - else: - self.fails[self.neigh_vm].add('%s: LAG state not down for %s' % (self.msg_prefix[pre_check], self.neigh_vm)) + for vm in self.neigh_vms: + fails_vm, lag_state = self.vm_handles[vm].verify_neigh_lag_state(self.vm_dut_map[self.neigh_names[vm]]['neigh_portchannel'], state=self.neigh_lag_state, pre_check=pre_check) + self.fails[vm] |= fails_vm + if lag_state: + self.log.append('LAG state down as expected for %s' % vm) + else: + self.fails[vm].add('%s: LAG state not down for %s' % (self.msg_prefix[pre_check], vm)) self.log.append('Verifying LAG state on the dut end') self.verify_dut_lag_state(pre_check=pre_check) diff --git a/ansible/roles/test/tasks/advanced-reboot.yml b/ansible/roles/test/tasks/advanced-reboot.yml index c1ff572a9bb..31df0f62e4a 100644 --- a/ansible/roles/test/tasks/advanced-reboot.yml +++ b/ansible/roles/test/tasks/advanced-reboot.yml @@ -23,6 +23,11 @@ - name: Preboot-list initialization set_fact: preboot_list={% if preboot_list is not defined %}[None]{% else %}{{ preboot_list }}{% endif %} + - name: Validate preboot list + include: roles/test/tasks/advanced_reboot/validate_preboot_list.yml + with_items: "{{ preboot_list }}" + when: item and ':' in item + - name: Preboot files initialization set_fact: preboot_files={% if preboot_files is not defined %}None{% else %}{{ preboot_files }}{% endif %} diff --git a/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml b/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml new file mode 100644 index 00000000000..bf6f88f113d --- /dev/null +++ b/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml @@ -0,0 +1,13 @@ +- set_fact: + item_cnt: "{{ item.split(':')[1]|int }}" + host_max_len: "{{ vm_hosts|length - 1 }}" + member_max_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}" + +- fail: msg="Bgp neigh down count is greater than or equal to number of VM hosts. Current val = {{ item_cnt }} Max val = {{ host_max_len }}" + when: "{{ 'bgp_down' in item and item_cnt > host_max_len }}" + +- fail: msg="Lag count is greater than or equal to number of VM hosts. Current val = {{ item_cnt }} Max val = {{ host_max_len }}" + when: "{{ 'lag_down' in item and item_cnt > host_max_len }}" + +- fail: msg="Lag member count is greater than available number of lag members. Current val = {{ item_cnt }} Available cnt = {{ member_max_cnt }}" + when: "{{ 'lag_member_down' in item and item_cnt > member_max_cnt }}" diff --git a/ansible/roles/test/tasks/warm-reboot-multi-sad.yml b/ansible/roles/test/tasks/warm-reboot-multi-sad.yml new file mode 100644 index 00000000000..9555da8ca35 --- /dev/null +++ b/ansible/roles/test/tasks/warm-reboot-multi-sad.yml @@ -0,0 +1,11 @@ +- name: set default reboot_limit in seconds + set_fact: + reboot_limit: 1 + when: reboot_limit is not defined + +- name: Warm-reboot test + include: advanced-reboot.yml + vars: + reboot_type: warm-reboot + preboot_list: ['neigh_bgp_down:2', 'dut_bgp_down:3', 'dut_lag_down:2', 'neigh_lag_down:3'] + preboot_files: "peer_dev_info,neigh_port_info" diff --git a/ansible/roles/test/vars/testcases.yml b/ansible/roles/test/vars/testcases.yml index e0f9373636c..d179e41f8bd 100644 --- a/ansible/roles/test/vars/testcases.yml +++ b/ansible/roles/test/vars/testcases.yml @@ -94,6 +94,13 @@ testcases: ptf_host: vm_hosts: + warm-reboot-multi-sad: + filename: warm-reboot-multi-sad.yml + topologies: [t0, t0-64, t0-64-32, t0-116] + required_vars: + ptf_host: + vm_hosts: + fib: filename: simple-fib.yml topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] From 2b2f85ec7dad7532e8fe21b508d098dbbc8bf2ec Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Tue, 30 Jul 2019 10:21:02 +0300 Subject: [PATCH 073/218] [advanced-reboot] help switch to populate fdb table before test (#1035) --- .../test/files/ptftests/advanced-reboot.py | 33 +++++++++++++++++++ .../roles/test/tasks/ptf_runner_reboot.yml | 1 + 2 files changed, 34 insertions(+) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 35ead6d1309..b13cfaa4463 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -155,6 +155,11 @@ def __init__(self): self.log_file_name = '/tmp/%s.log' % self.test_params['reboot_type'] self.log_fp = open(self.log_file_name, 'w') + # a flag whether to populate FDB by sending traffic from simulated servers + # usually ARP responder will make switch populate its FDB table, but Mellanox on 201803 has + # no L3 ARP support, so this flag is used to W/A this issue + self.setup_fdb_before_test = self.test_params.get('setup_fdb_before_test', False) + # Default settings self.ping_dut_pkts = 10 self.arp_ping_pkts = 1 @@ -450,7 +455,30 @@ def setUp(self): return + def setup_fdb(self): + """ simulate traffic generated from servers to help populate FDB """ + + vlan_map = self.vlan_host_map + + from_servers_pkt = testutils.simple_tcp_packet( + eth_dst=self.dut_mac, + ip_dst=self.from_server_dst_addr, + ) + + for port in vlan_map: + for addr in vlan_map[port]: + mac = vlan_map[port][addr] + + from_servers_pkt[scapy.Ether].src = self.hex_to_mac(mac) + from_servers_pkt[scapy.IP].src = addr + + testutils.send(self, port, from_servers_pkt) + + # make sure orchagent processed new FDBs + time.sleep(1) + def tearDown(self): + self.log("Disabling arp_responder") self.cmd(["supervisorctl", "stop", "arp_responder"]) @@ -624,6 +652,11 @@ def runTest(self): thr.setDaemon(True) try: + if self.setup_fdb_before_test: + self.log("Run some server traffic to populate FDB table...") + self.setup_fdb() + + self.log("Starting reachability state watch thread...") self.watching = True self.light_probe = False diff --git a/ansible/roles/test/tasks/ptf_runner_reboot.yml b/ansible/roles/test/tasks/ptf_runner_reboot.yml index 7966f81a31e..a8fe127bc84 100644 --- a/ansible/roles/test/tasks/ptf_runner_reboot.yml +++ b/ansible/roles/test/tasks/ptf_runner_reboot.yml @@ -26,6 +26,7 @@ - preboot_oper='{{ item }}' - allow_vlan_flooding='{{ allow_vlan_flooding }}' - sniff_time_incr={{ sniff_time_incr }} + - setup_fdb_before_test=True always: From 57e41203e47c6c08b0f58810aa7a199abd557939 Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Wed, 31 Jul 2019 17:38:53 +0300 Subject: [PATCH 074/218] [mlnx][fanout] disable mac learning on fanout (#1041) Change-Id: Ib1e7b8c35a20ecb1aae8629749e3ad10e6269add Signed-off-by: Stepan Blyschak --- ansible/roles/fanout/templates/mlnx_fanout.j2 | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/ansible/roles/fanout/templates/mlnx_fanout.j2 b/ansible/roles/fanout/templates/mlnx_fanout.j2 index 81f36278e5f..7116041f979 100644 --- a/ansible/roles/fanout/templates/mlnx_fanout.j2 +++ b/ansible/roles/fanout/templates/mlnx_fanout.j2 @@ -98,6 +98,12 @@ protocol openflow {% for i in range(1, eth_ports|length) %} interface ethernet {{ eth_ports[i] }} openflow mode hybrid + +# Disable mac learning to avoid issue when VM MAC address appears on DUT port during SONiC port flap or restart and also issue in FDB reload test for which rules matching 0x1234 ethertype were introduces. +# The idea is that forwarding is controlled by openflow rules. +# For normal openflow rule since there are only two ports in same vlan (one access port connected to DUT, one trunk port connected to server) +# - one is always a source port and one is always a destination port, so no flooding actually occurs. +interface ethernet {{ eth_ports[i] }} mac-learning disable {% endfor %} {% set of_counter = 0 -%} @@ -149,7 +155,10 @@ openflow add-flows {{ of_counter + i }} table={{ open_flow_tableid }},priority={ openflow add-flows {{ of_counter + i }} table={{ open_flow_tableid }},priority={{ dut_to_server_flow_priority }},dl_type={{ eth_typ_test }},in_port={{ of_ports[i] }},actions=output:{{ of_ports[uplink_port_id] }} {% endfor %} -openflow add-flows {{ last_flowid }} table={{ open_flow_tableid }},priority={{ low_priority }},actions=normal +{% set of_counter = of_counter + eth_ports|length-2 -%} + +# apply normal openflow rule +openflow add-flows {{ of_counter + 1 }} table={{ open_flow_tableid }},priority={{ low_priority }},actions=normal docker no shutdown From d9ec5f80bfdfc53d6e140abdc21114c46a2ed490 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Fri, 2 Aug 2019 13:23:05 +0800 Subject: [PATCH 075/218] [add-topo] Add support for specifying PTF docker image tag (#1046) Signed-off-by: Xin Wang --- ansible/roles/vm_set/tasks/add_topo.yml | 16 +++++++++++++++- ansible/testbed-cli.sh | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/ansible/roles/vm_set/tasks/add_topo.yml b/ansible/roles/vm_set/tasks/add_topo.yml index d80577c8999..88274ec4bb1 100644 --- a/ansible/roles/vm_set/tasks/add_topo.yml +++ b/ansible/roles/vm_set/tasks/add_topo.yml @@ -1,10 +1,24 @@ + +# The PTF image built from different branches may be incompatible. The ptf_imagetag variable added here is to +# support using different PTF images for different branches. When the ptf_imagetag variable is not specified, +# the PTF image with default "201811" tag will be used in this sonic-mgmt 201811 branch. When a different PTF +# image version is required, we can specify a different value for the ptf_imagetag variable somewhere to +# override the default value, for example, specify from command line: +# ./testbed-cli.sh add-topo - vault -e ptf_imagetag=myversion +# By using this practice, we suggest to add different tags for different PTF image versions in docker registry. +# And we suggest to add tag "201811" for PTF image built from the 201811 branch. +- name: Set default value for ptf_imagetag + set_fact: + ptf_imagetag: "201811" + when: ptf_imagetag is not defined + - name: Create a docker container ptf_{{ vm_set_name }} docker: registry: "{{ docker_registry_host }}" username: "{{ docker_registry_username }}" password: "{{ docker_registry_password }}" name: ptf_{{ vm_set_name }} - image: "{{ docker_registry_host }}/{{ ptf_imagename }}" + image: "{{ docker_registry_host }}/{{ ptf_imagename }}:{{ ptf_imagetag }}" pull: always state: reloaded net: none diff --git a/ansible/testbed-cli.sh b/ansible/testbed-cli.sh index 0979dd381c0..5d416b2ad63 100755 --- a/ansible/testbed-cli.sh +++ b/ansible/testbed-cli.sh @@ -34,6 +34,8 @@ function usage echo " $0 start-vms server-name vault-password-file -e autostart=yes" echo "To stop VMs on a server: $0 stop-vms 'server-name' ~/.password" echo "To deploy a topology on a server: $0 add-topo 'topo-name' ~/.password" + echo " Optional argument for add-topo:" + echo " -e ptf_imagetag= # Use PTF image with specified tag for creating PTF container" echo "To remove a topology on a server: $0 remove-topo 'topo-name' ~/.password" echo "To renumber a topology on a server: $0 renumber-topo 'topo-name' ~/.password" echo "To connect a topology: $0 connect-topo 'topo-name' ~/.password" From 454ed3c07d7d3b6efd417a7546dd985442c72d76 Mon Sep 17 00:00:00 2001 From: bbinxie Date: Fri, 2 Aug 2019 01:56:32 +0800 Subject: [PATCH 076/218] [dhcp_relay] Set Broadcast flag for all test packets (#1030) --- .../test/files/ptftests/dhcp_relay_test.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/ansible/roles/test/files/ptftests/dhcp_relay_test.py b/ansible/roles/test/files/ptftests/dhcp_relay_test.py index 358d811805a..2b315418372 100644 --- a/ansible/roles/test/files/ptftests/dhcp_relay_test.py +++ b/ansible/roles/test/files/ptftests/dhcp_relay_test.py @@ -154,7 +154,7 @@ def tearDown(self): """ def create_dhcp_discover_packet(self): - return testutils.dhcp_discover_packet(eth_client=self.client_mac) + return testutils.dhcp_discover_packet(eth_client=self.client_mac, set_broadcast_bit=True) def create_dhcp_discover_relayed_packet(self): my_chaddr = ''.join([chr(int(octet, 16)) for octet in self.client_mac.split(':')]) @@ -183,7 +183,7 @@ def create_dhcp_discover_relayed_packet(self): hops=1, xid=0, secs=0, - flags=0, + flags=0x8000, ciaddr=self.DEFAULT_ROUTE_IP, yiaddr=self.DEFAULT_ROUTE_IP, siaddr=self.DEFAULT_ROUTE_IP, @@ -212,7 +212,8 @@ def create_dhcp_offer_packet(self): ip_gateway=self.relay_iface_ip, netmask_client=self.client_subnet, dhcp_lease=self.LEASE_TIME, - padding_bytes=0) + padding_bytes=0, + set_broadcast_bit=True) def create_dhcp_offer_relayed_packet(self): my_chaddr = ''.join([chr(int(octet, 16)) for octet in self.client_mac.split(':')]) @@ -233,7 +234,7 @@ def create_dhcp_offer_relayed_packet(self): hops=0, xid=0, secs=0, - flags=0, + flags=0x8000, ciaddr=self.DEFAULT_ROUTE_IP, yiaddr=self.client_ip, siaddr=self.server_ip, @@ -257,7 +258,8 @@ def create_dhcp_offer_relayed_packet(self): def create_dhcp_request_packet(self): return testutils.dhcp_request_packet(eth_client=self.client_mac, ip_server=self.server_ip, - ip_requested=self.client_ip) + ip_requested=self.client_ip, + set_broadcast_bit=True) def create_dhcp_request_relayed_packet(self): my_chaddr = ''.join([chr(int(octet, 16)) for octet in self.client_mac.split(':')]) @@ -279,7 +281,7 @@ def create_dhcp_request_relayed_packet(self): hops=1, xid=0, secs=0, - flags=0, + flags=0x8000, ciaddr=self.DEFAULT_ROUTE_IP, yiaddr=self.DEFAULT_ROUTE_IP, siaddr=self.DEFAULT_ROUTE_IP, @@ -310,7 +312,8 @@ def create_dhcp_ack_packet(self): ip_gateway=self.relay_iface_ip, netmask_client=self.client_subnet, dhcp_lease=self.LEASE_TIME, - padding_bytes=0) + padding_bytes=0, + set_broadcast_bit=True) def create_dhcp_ack_relayed_packet(self): my_chaddr = ''.join([chr(int(octet, 16)) for octet in self.client_mac.split(':')]) @@ -331,7 +334,7 @@ def create_dhcp_ack_relayed_packet(self): hops=0, xid=0, secs=0, - flags=0, + flags=0x8000, ciaddr=self.DEFAULT_ROUTE_IP, yiaddr=self.client_ip, siaddr=self.server_ip, From 374c5d1c0c627dadb45e6f8b71578365d55e785d Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Tue, 6 Aug 2019 02:33:25 +0800 Subject: [PATCH 077/218] [CRM] Wait more time for CRM stats to update (#1053) CRM testing may fail because CRM stats was not updated yet while checking the counters. Main purpose of this change is to increase the time waiting for CRM update from 2 seconds to 4 seconds. Two other improvements: * Decrease the time waiting for CRM counters update from 300 seconds to 30 seconds. The 300 seconds waiting is unnecessary. * Use a single redis-cli command to get used and available counters rather than two commands. Signed-off-by: Xin Wang --- ansible/roles/test/tasks/crm.yml | 3 +- .../test/tasks/crm/crm_test_acl_counter.yml | 41 +++++++++--------- .../test/tasks/crm/crm_test_acl_entry.yml | 38 +++++++--------- .../test/tasks/crm/crm_test_fdb_entry.yml | 43 ++++++++----------- .../test/tasks/crm/crm_test_ipv4_neighbor.yml | 43 ++++++++----------- .../test/tasks/crm/crm_test_ipv4_nexthop.yml | 43 ++++++++----------- .../test/tasks/crm/crm_test_ipv4_route.yml | 43 ++++++++----------- .../test/tasks/crm/crm_test_ipv6_neighbor.yml | 43 ++++++++----------- .../test/tasks/crm/crm_test_ipv6_nexthop.yml | 43 ++++++++----------- .../test/tasks/crm/crm_test_ipv6_route.yml | 43 ++++++++----------- .../test/tasks/crm/crm_test_nexthop_group.yml | 37 ++++++++-------- .../crm/crm_test_nexthop_group_member.yml | 43 ++++++++----------- 12 files changed, 193 insertions(+), 270 deletions(-) diff --git a/ansible/roles/test/tasks/crm.yml b/ansible/roles/test/tasks/crm.yml index d9561013283..a502dcd19c1 100644 --- a/ansible/roles/test/tasks/crm.yml +++ b/ansible/roles/test/tasks/crm.yml @@ -16,12 +16,13 @@ - set_fact: ansible_date_time: "{{ansible_date_time}}" + crm_update_time: 4 - name: Set polling interval command: crm config polling interval 1 - name: Make sure CRM counters updated - pause: seconds=300 + pause: seconds=30 - name: Run test case "CRM IPv4 route resource" include: roles/test/tasks/crm/crm_test_ipv4_route.yml diff --git a/ansible/roles/test/tasks/crm/crm_test_acl_counter.yml b/ansible/roles/test/tasks/crm/crm_test_acl_counter.yml index 4ee805e8427..9b536a3ee6a 100644 --- a/ansible/roles/test/tasks/crm/crm_test_acl_counter.yml +++ b/ansible/roles/test/tasks/crm/crm_test_acl_counter.yml @@ -6,17 +6,22 @@ - name: Copy ACL JSON config to switch. copy: src=roles/test/tasks/crm/acl.json dest=/tmp + - name: Get original "crm_stats_acl_counter_available" counter value + command: redis-cli -n 2 HGET {{acl_tbl_key}} crm_stats_acl_counter_available + register: out + - set_fact: original_crm_stats_acl_counter_available={{out.stdout}} + - name: Add ACL command: acl-loader update full /tmp/acl.json become: yes - name: Get ACL entry keys - command: bash -c "docker exec -i database redis-cli --raw -n 1 KEYS *SAI_OBJECT_TYPE_ACL_ENTRY*" + command: redis-cli --raw -n 1 KEYS *SAI_OBJECT_TYPE_ACL_ENTRY* register: out - set_fact: acl_tbl_keys={{out.stdout.split()}} - name: Get ethertype for ACL entry in order to match ACL which was configured - command: bash -c "docker exec -i database redis-cli -n 1 HGET {{item}} SAI_ACL_ENTRY_ATTR_FIELD_ETHER_TYPE" + command: redis-cli -n 1 HGET {{item}} SAI_ACL_ENTRY_ATTR_FIELD_ETHER_TYPE with_items: "{{acl_tbl_keys}}" register: out @@ -26,22 +31,19 @@ when: item.stdout|search("2048") - name: Get ACL table key - command: bash -c "docker exec -i database redis-cli -n 1 HGET {{key}} SAI_ACL_ENTRY_ATTR_TABLE_ID" + command: redis-cli -n 1 HGET {{key}} SAI_ACL_ENTRY_ATTR_TABLE_ID register: out - set_fact: acl_tbl_key={{"CRM:ACL_TABLE_STATS:{0}".format(out.stdout|replace("oid:", ""))}} - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_acl_counter_used" counter value - command: docker exec -i database redis-cli -n 2 HGET {{acl_tbl_key}} crm_stats_acl_counter_used - register: out - - set_fact: new_crm_stats_acl_counter_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_acl_counter_available" counter value - command: docker exec -i database redis-cli -n 2 HGET {{acl_tbl_key}} crm_stats_acl_counter_available + - name: Get new "crm_stats_acl_counter" used and available counter value + command: redis-cli --raw -n 2 HMGET {{acl_tbl_key}} crm_stats_acl_counter_used crm_stats_acl_counter_available register: out - - set_fact: new_crm_stats_acl_counter_available={{out.stdout}} + - set_fact: + new_crm_stats_acl_counter_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_acl_counter_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_acl_counter_used" counter was incremented assert: {that: "{{new_crm_stats_acl_counter_used|int - crm_stats_acl_counter_used|int == 2}}"} @@ -60,17 +62,14 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_acl_counter_used" counter value - command: docker exec -i database redis-cli -n 2 HGET {{acl_tbl_key}} crm_stats_acl_counter_used - register: out - - set_fact: new_crm_stats_acl_counter_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_acl_counter_available" counter value - command: docker exec -i database redis-cli -n 2 HGET {{acl_tbl_key}} crm_stats_acl_counter_available + - name: Get new "crm_stats_acl_counter" used and available counter value + command: redis-cli --raw -n 2 HMGET {{acl_tbl_key}} crm_stats_acl_counter_used crm_stats_acl_counter_available register: out - - set_fact: new_crm_stats_acl_counter_available={{out.stdout}} + - set_fact: + new_crm_stats_acl_counter_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_acl_counter_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_acl_counter_used" counter was decremented assert: {that: "{{new_crm_stats_acl_counter_used|int - crm_stats_acl_counter_used|int == 0}}"} diff --git a/ansible/roles/test/tasks/crm/crm_test_acl_entry.yml b/ansible/roles/test/tasks/crm/crm_test_acl_entry.yml index 2c134567f8e..12d3968edb0 100644 --- a/ansible/roles/test/tasks/crm/crm_test_acl_entry.yml +++ b/ansible/roles/test/tasks/crm/crm_test_acl_entry.yml @@ -11,15 +11,15 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 + pause: seconds={{ crm_update_time }} - name: Get ACL entry keys - command: bash -c "docker exec -i database redis-cli --raw -n 1 KEYS *SAI_OBJECT_TYPE_ACL_ENTRY*" + command: redis-cli --raw -n 1 KEYS *SAI_OBJECT_TYPE_ACL_ENTRY* register: out - set_fact: acl_tbl_keys={{out.stdout.split()}} - name: Get ethertype for ACL entry in order to match ACL which was configured - command: bash -c "docker exec -i database redis-cli -n 1 HGET {{item}} SAI_ACL_ENTRY_ATTR_FIELD_ETHER_TYPE" + command: redis-cli -n 1 HGET {{item}} SAI_ACL_ENTRY_ATTR_FIELD_ETHER_TYPE with_items: "{{acl_tbl_keys}}" register: out @@ -29,22 +29,19 @@ when: item.stdout|search("2048") - name: Get ACL table key - command: bash -c "docker exec -i database redis-cli -n 1 HGET {{key}} SAI_ACL_ENTRY_ATTR_TABLE_ID" + command: redis-cli -n 1 HGET {{key}} SAI_ACL_ENTRY_ATTR_TABLE_ID register: out - set_fact: acl_tbl_key={{"CRM:ACL_TABLE_STATS:{0}".format(out.stdout|replace("oid:", ""))}} - name: Make sure CRM counters updated - pause: seconds=2 + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_acl_entry_used" counter value - command: docker exec -i database redis-cli -n 2 HGET {{acl_tbl_key}} crm_stats_acl_entry_used + - name: Get new "crm_stats_acl_entry" used and available counter value + command: redis-cli --raw -n 2 HMGET {{acl_tbl_key}} crm_stats_acl_entry_used crm_stats_acl_entry_available register: out - - set_fact: new_crm_stats_acl_entry_used={{out.stdout}} - - - name: Get new "crm_stats_acl_entry_available" counter value - command: docker exec -i database redis-cli -n 2 HGET {{acl_tbl_key}} crm_stats_acl_entry_available - register: out - - set_fact: new_crm_stats_acl_entry_available={{out.stdout}} + - set_fact: + new_crm_stats_acl_entry_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_acl_entry_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_acl_entry_used" counter was incremented assert: {that: "{{new_crm_stats_acl_entry_used|int - crm_stats_acl_entry_used|int == 2}}"} @@ -63,17 +60,14 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_acl_entry_used" counter value - command: docker exec -i database redis-cli -n 2 HGET {{acl_tbl_key}} crm_stats_acl_entry_used - register: out - - set_fact: new_crm_stats_acl_entry_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_acl_entry_available" counter value - command: docker exec -i database redis-cli -n 2 HGET {{acl_tbl_key}} crm_stats_acl_entry_available + - name: Get new "crm_stats_acl_entry" used and available counter value + command: redis-cli --raw -n 2 HMGET {{acl_tbl_key}} crm_stats_acl_entry_used crm_stats_acl_entry_available register: out - - set_fact: new_crm_stats_acl_entry_available={{out.stdout}} + - set_fact: + new_crm_stats_acl_entry_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_acl_entry_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_acl_entry_used" counter was decremented assert: {that: "{{new_crm_stats_acl_entry_used|int - crm_stats_acl_entry_used|int == 0}}"} diff --git a/ansible/roles/test/tasks/crm/crm_test_fdb_entry.yml b/ansible/roles/test/tasks/crm/crm_test_fdb_entry.yml index 7abb952dd83..c4e80c25978 100644 --- a/ansible/roles/test/tasks/crm/crm_test_fdb_entry.yml +++ b/ansible/roles/test/tasks/crm/crm_test_fdb_entry.yml @@ -1,14 +1,11 @@ - block: - - name: Get "crm_stats_fdb_entry_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_fdb_entry_used + - name: Get "crm_stats_fdb_entry" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_fdb_entry_used crm_stats_fdb_entry_available register: out - - set_fact: crm_stats_fdb_entry_used={{out.stdout}} - - - name: Get "crm_stats_fdb_entry_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_fdb_entry_available - register: out - - set_fact: crm_stats_fdb_entry_available={{out.stdout}} + - set_fact: + crm_stats_fdb_entry_used: "{{ out.stdout_lines[0] }}" + crm_stats_fdb_entry_available: "{{ out.stdout_lines[1] }}" - name: Copy FDB JSON config to switch. copy: src=roles/test/tasks/crm/fdb.json dest=/tmp @@ -28,17 +25,14 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_fdb_entry_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_fdb_entry_used - register: out - - set_fact: new_crm_stats_fdb_entry_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_fdb_entry_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_fdb_entry_available + - name: Get new "crm_stats_fdb_entry" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_fdb_entry_used crm_stats_fdb_entry_available register: out - - set_fact: new_crm_stats_fdb_entry_available={{out.stdout}} + - set_fact: + new_crm_stats_fdb_entry_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_fdb_entry_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_fdb_entry_used" counter was incremented assert: {that: "{{new_crm_stats_fdb_entry_used|int - crm_stats_fdb_entry_used|int == 1}}"} @@ -57,17 +51,14 @@ command: fdbclear - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_fdb_entry_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_fdb_entry_used - register: out - - set_fact: new_crm_stats_fdb_entry_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_fdb_entry_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_fdb_entry_available + - name: Get new "crm_stats_fdb_entry" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_fdb_entry_used crm_stats_fdb_entry_available register: out - - set_fact: new_crm_stats_fdb_entry_available={{out.stdout}} + - set_fact: + new_crm_stats_fdb_entry_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_fdb_entry_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_fdb_entry_used" counter was decremented assert: {that: "{{new_crm_stats_fdb_entry_used|int == 0}}"} diff --git a/ansible/roles/test/tasks/crm/crm_test_ipv4_neighbor.yml b/ansible/roles/test/tasks/crm/crm_test_ipv4_neighbor.yml index 8e5740b994c..62221ec2f11 100644 --- a/ansible/roles/test/tasks/crm/crm_test_ipv4_neighbor.yml +++ b/ansible/roles/test/tasks/crm/crm_test_ipv4_neighbor.yml @@ -1,31 +1,25 @@ - block: - - name: Get "crm_stats_ipv4_neighbor_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_neighbor_used + - name: Get "crm_stats_ipv4_neighbor" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv4_neighbor_used crm_stats_ipv4_neighbor_available register: out - - set_fact: crm_stats_ipv4_neighbor_used={{out.stdout}} - - - name: Get "crm_stats_ipv4_neighbor_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_neighbor_available - register: out - - set_fact: crm_stats_ipv4_neighbor_available={{out.stdout}} + - set_fact: + crm_stats_ipv4_neighbor_used: "{{ out.stdout_lines[0] }}" + crm_stats_ipv4_neighbor_available: "{{ out.stdout_lines[1] }}" - name: Add IPv4 neighbor command: ip neigh replace 2.2.2.2 lladdr 11:22:33:44:55:66 dev {{crm_intf}} become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_ipv4_neighbor_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_neighbor_used - register: out - - set_fact: new_crm_stats_ipv4_neighbor_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_ipv4_neighbor_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_neighbor_available + - name: Get new "crm_stats_ipv4_neighbor" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv4_neighbor_used crm_stats_ipv4_neighbor_available register: out - - set_fact: new_crm_stats_ipv4_neighbor_available={{out.stdout}} + - set_fact: + new_crm_stats_ipv4_neighbor_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_ipv4_neighbor_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_ipv4_neighbor_used" counter was incremented assert: {that: "{{new_crm_stats_ipv4_neighbor_used|int - crm_stats_ipv4_neighbor_used|int >= 1}}"} @@ -38,17 +32,14 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_ipv4_neighbor_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_neighbor_used - register: out - - set_fact: new_crm_stats_ipv4_neighbor_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_ipv4_neighbor_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_neighbor_available + - name: Get new "crm_stats_ipv4_neighbor" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv4_neighbor_used crm_stats_ipv4_neighbor_available register: out - - set_fact: new_crm_stats_ipv4_neighbor_available={{out.stdout}} + - set_fact: + new_crm_stats_ipv4_neighbor_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_ipv4_neighbor_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_ipv4_neighbor_used" counter was decremented assert: {that: "{{new_crm_stats_ipv4_neighbor_used|int - crm_stats_ipv4_neighbor_used|int >= 0}}"} diff --git a/ansible/roles/test/tasks/crm/crm_test_ipv4_nexthop.yml b/ansible/roles/test/tasks/crm/crm_test_ipv4_nexthop.yml index 3280e98bd98..733341268c5 100644 --- a/ansible/roles/test/tasks/crm/crm_test_ipv4_nexthop.yml +++ b/ansible/roles/test/tasks/crm/crm_test_ipv4_nexthop.yml @@ -1,31 +1,25 @@ - block: - - name: Get "crm_stats_ipv4_nexthop_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_nexthop_used + - name: Get "crm_stats_ipv4_nexthop" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv4_nexthop_used crm_stats_ipv4_nexthop_available register: out - - set_fact: crm_stats_ipv4_nexthop_used={{out.stdout}} - - - name: Get "crm_stats_ipv4_nexthop_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_nexthop_available - register: out - - set_fact: crm_stats_ipv4_nexthop_available={{out.stdout}} + - set_fact: + crm_stats_ipv4_nexthop_used: "{{ out.stdout_lines[0] }}" + crm_stats_ipv4_nexthop_available: "{{ out.stdout_lines[1] }}" - name: Add IPv4 nexthop command: ip neigh replace 2.2.2.2 lladdr 11:22:33:44:55:66 dev {{crm_intf}} become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_ipv4_nexthop_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_nexthop_used - register: out - - set_fact: new_crm_stats_ipv4_nexthop_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_ipv4_nexthop_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_nexthop_available + - name: Get new "crm_stats_ipv4_nexthop" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv4_nexthop_used crm_stats_ipv4_nexthop_available register: out - - set_fact: new_crm_stats_ipv4_nexthop_available={{out.stdout}} + - set_fact: + new_crm_stats_ipv4_nexthop_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_ipv4_nexthop_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_ipv4_nexthop_used" counter was incremented assert: {that: "{{new_crm_stats_ipv4_nexthop_used|int - crm_stats_ipv4_nexthop_used|int == 1}}"} @@ -38,17 +32,14 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_ipv4_nexthop_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_nexthop_used - register: out - - set_fact: new_crm_stats_ipv4_nexthop_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_ipv4_nexthop_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_nexthop_available + - name: Get new "crm_stats_ipv4_nexthop" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv4_nexthop_used crm_stats_ipv4_nexthop_available register: out - - set_fact: new_crm_stats_ipv4_nexthop_available={{out.stdout}} + - set_fact: + new_crm_stats_ipv4_nexthop_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_ipv4_nexthop_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_ipv4_nexthop_used" counter was decremented assert: {that: "{{new_crm_stats_ipv4_nexthop_used|int - crm_stats_ipv4_nexthop_used|int == 0}}"} diff --git a/ansible/roles/test/tasks/crm/crm_test_ipv4_route.yml b/ansible/roles/test/tasks/crm/crm_test_ipv4_route.yml index 1af11bddedc..5fad99aa95a 100644 --- a/ansible/roles/test/tasks/crm/crm_test_ipv4_route.yml +++ b/ansible/roles/test/tasks/crm/crm_test_ipv4_route.yml @@ -1,14 +1,11 @@ - block: - - name: Get "crm_stats_ipv4_route_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_route_used + - name: Get "crm_stats_ipv4_route" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv4_route_used crm_stats_ipv4_route_available register: out - - set_fact: crm_stats_ipv4_route_used={{out.stdout}} - - - name: Get "crm_stats_ipv4_route_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_route_available - register: out - - set_fact: crm_stats_ipv4_route_available={{out.stdout}} + - set_fact: + crm_stats_ipv4_route_used: "{{ out.stdout_lines[0] }}" + crm_stats_ipv4_route_available: "{{ out.stdout_lines[1] }}" - name: Get NH IP command: ip -4 neigh show dev {{crm_intf}} nud reachable nud stale @@ -20,17 +17,14 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_ipv4_route_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_route_used - register: out - - set_fact: new_crm_stats_ipv4_route_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_ipv4_route_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_route_available + - name: Get new "crm_stats_ipv4_route" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv4_route_used crm_stats_ipv4_route_available register: out - - set_fact: new_crm_stats_ipv4_route_available={{out.stdout}} + - set_fact: + new_crm_stats_ipv4_route_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_ipv4_route_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_ipv4_route_used" counter was incremented assert: {that: "{{new_crm_stats_ipv4_route_used|int - crm_stats_ipv4_route_used|int == 1}}"} @@ -43,17 +37,14 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_ipv4_route_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_route_used - register: out - - set_fact: new_crm_stats_ipv4_route_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_ipv4_route_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv4_route_available + - name: Get new "crm_stats_ipv4_route" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv4_route_used crm_stats_ipv4_route_available register: out - - set_fact: new_crm_stats_ipv4_route_available={{out.stdout}} + - set_fact: + new_crm_stats_ipv4_route_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_ipv4_route_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_ipv4_route_used" counter was decremented assert: {that: "{{new_crm_stats_ipv4_route_used|int - crm_stats_ipv4_route_used|int == 0}}"} diff --git a/ansible/roles/test/tasks/crm/crm_test_ipv6_neighbor.yml b/ansible/roles/test/tasks/crm/crm_test_ipv6_neighbor.yml index 4d0879bd49e..58f8b8aa67f 100644 --- a/ansible/roles/test/tasks/crm/crm_test_ipv6_neighbor.yml +++ b/ansible/roles/test/tasks/crm/crm_test_ipv6_neighbor.yml @@ -1,31 +1,25 @@ - block: - - name: Get "crm_stats_ipv6_neighbor_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_neighbor_used + - name: Get "crm_stats_ipv6_neighbor" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv6_neighbor_used crm_stats_ipv6_neighbor_available register: out - - set_fact: crm_stats_ipv6_neighbor_used={{out.stdout}} - - - name: Get "crm_stats_ipv6_neighbor_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_neighbor_available - register: out - - set_fact: crm_stats_ipv6_neighbor_available={{out.stdout}} + - set_fact: + crm_stats_ipv6_neighbor_used: "{{ out.stdout_lines[0] }}" + crm_stats_ipv6_neighbor_available: "{{ out.stdout_lines[1] }}" - name: Add IPv6 neighbor command: ip neigh replace 2001::1 lladdr 11:22:33:44:55:66 dev {{crm_intf}} become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_ipv6_neighbor_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_neighbor_used - register: out - - set_fact: new_crm_stats_ipv6_neighbor_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_ipv6_neighbor_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_neighbor_available + - name: Get new "crm_stats_ipv6_neighbor" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv6_neighbor_used crm_stats_ipv6_neighbor_available register: out - - set_fact: new_crm_stats_ipv6_neighbor_available={{out.stdout}} + - set_fact: + new_crm_stats_ipv6_neighbor_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_ipv6_neighbor_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_ipv6_neighbor_used" counter was incremented assert: {that: "{{new_crm_stats_ipv6_neighbor_used|int - crm_stats_ipv6_neighbor_used|int >= 1}}"} @@ -38,17 +32,14 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_ipv6_neighbor_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_neighbor_used - register: out - - set_fact: new_crm_stats_ipv6_neighbor_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_ipv6_neighbor_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_neighbor_available + - name: Get new "crm_stats_ipv6_neighbor" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv6_neighbor_used crm_stats_ipv6_neighbor_available register: out - - set_fact: new_crm_stats_ipv6_neighbor_available={{out.stdout}} + - set_fact: + new_crm_stats_ipv6_neighbor_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_ipv6_neighbor_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_ipv6_neighbor_used" counter was decremented assert: {that: "{{new_crm_stats_ipv6_neighbor_used|int - crm_stats_ipv6_neighbor_used|int >= 0}}"} diff --git a/ansible/roles/test/tasks/crm/crm_test_ipv6_nexthop.yml b/ansible/roles/test/tasks/crm/crm_test_ipv6_nexthop.yml index f253302e61e..15147374908 100644 --- a/ansible/roles/test/tasks/crm/crm_test_ipv6_nexthop.yml +++ b/ansible/roles/test/tasks/crm/crm_test_ipv6_nexthop.yml @@ -1,31 +1,25 @@ - block: - - name: Get "crm_stats_ipv6_nexthop_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_nexthop_used + - name: Get "crm_stats_ipv6_nexthop" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv6_nexthop_used crm_stats_ipv6_nexthop_available register: out - - set_fact: crm_stats_ipv6_nexthop_used={{out.stdout}} - - - name: Get "crm_stats_ipv6_nexthop_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_nexthop_available - register: out - - set_fact: crm_stats_ipv6_nexthop_available={{out.stdout}} + - set_fact: + crm_stats_ipv6_nexthop_used: "{{ out.stdout_lines[0] }}" + crm_stats_ipv6_nexthop_available: "{{ out.stdout_lines[1] }}" - name: Add IPv6 nexthop command: ip neigh replace 2001::1 lladdr 11:22:33:44:55:66 dev {{crm_intf}} become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_ipv6_nexthop_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_nexthop_used - register: out - - set_fact: new_crm_stats_ipv6_nexthop_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_ipv6_nexthop_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_nexthop_available + - name: Get new "crm_stats_ipv6_nexthop" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv6_nexthop_used crm_stats_ipv6_nexthop_available register: out - - set_fact: new_crm_stats_ipv6_nexthop_available={{out.stdout}} + - set_fact: + new_crm_stats_ipv6_nexthop_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_ipv6_nexthop_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_ipv6_nexthop_used" counter was incremented assert: {that: "{{new_crm_stats_ipv6_nexthop_used|int - crm_stats_ipv6_nexthop_used|int == 1}}"} @@ -38,17 +32,14 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_ipv6_nexthop_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_nexthop_used - register: out - - set_fact: new_crm_stats_ipv6_nexthop_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_ipv6_nexthop_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_nexthop_available + - name: Get new "crm_stats_ipv6_nexthop" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv6_nexthop_used crm_stats_ipv6_nexthop_available register: out - - set_fact: new_crm_stats_ipv6_nexthop_available={{out.stdout}} + - set_fact: + new_crm_stats_ipv6_nexthop_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_ipv6_nexthop_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_ipv6_nexthop_used" counter was decremented assert: {that: "{{new_crm_stats_ipv6_nexthop_used|int - crm_stats_ipv6_nexthop_used|int == 0}}"} diff --git a/ansible/roles/test/tasks/crm/crm_test_ipv6_route.yml b/ansible/roles/test/tasks/crm/crm_test_ipv6_route.yml index c4ebacfd6d9..5cf7b662b97 100644 --- a/ansible/roles/test/tasks/crm/crm_test_ipv6_route.yml +++ b/ansible/roles/test/tasks/crm/crm_test_ipv6_route.yml @@ -1,14 +1,11 @@ - block: - - name: Get "crm_stats_ipv6_route_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_route_used + - name: Get "crm_stats_ipv6_route" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv6_route_used crm_stats_ipv6_route_available register: out - - set_fact: crm_stats_ipv6_route_used={{out.stdout}} - - - name: Get "crm_stats_ipv6_route_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_route_available - register: out - - set_fact: crm_stats_ipv6_route_available={{out.stdout}} + - set_fact: + crm_stats_ipv6_route_used: "{{ out.stdout_lines[0] }}" + crm_stats_ipv6_route_available: "{{ out.stdout_lines[1] }}" - name: Get NH IP shell: ip -6 neigh show dev {{crm_intf}} nud reachable nud stale | grep -v fe80 @@ -20,17 +17,14 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_ipv6_route_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_route_used - register: out - - set_fact: new_crm_stats_ipv6_route_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_ipv6_route_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_route_available + - name: Get new "crm_stats_ipv6_route" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv6_route_used crm_stats_ipv6_route_available register: out - - set_fact: new_crm_stats_ipv6_route_available={{out.stdout}} + - set_fact: + new_crm_stats_ipv6_route_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_ipv6_route_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_ipv6_route_used" counter was incremented assert: {that: "{{new_crm_stats_ipv6_route_used|int - crm_stats_ipv6_route_used|int == 1}}"} @@ -43,17 +37,14 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_ipv6_route_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_route_used - register: out - - set_fact: new_crm_stats_ipv6_route_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_ipv6_route_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_ipv6_route_available + - name: Get new "crm_stats_ipv6_route" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_ipv6_route_used crm_stats_ipv6_route_available register: out - - set_fact: new_crm_stats_ipv6_route_available={{out.stdout}} + - set_fact: + new_crm_stats_ipv6_route_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_ipv6_route_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_ipv6_route_used" counter was decremented assert: {that: "{{new_crm_stats_ipv6_route_used|int - crm_stats_ipv6_route_used|int == 0}}"} diff --git a/ansible/roles/test/tasks/crm/crm_test_nexthop_group.yml b/ansible/roles/test/tasks/crm/crm_test_nexthop_group.yml index 38d30cd7db5..6325064fbaf 100644 --- a/ansible/roles/test/tasks/crm/crm_test_nexthop_group.yml +++ b/ansible/roles/test/tasks/crm/crm_test_nexthop_group.yml @@ -1,14 +1,11 @@ - block: - - name: Get "crm_stats_nexthop_group_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_nexthop_group_used - register: out - - set_fact: crm_stats_nexthop_group_used={{out.stdout}} - - - name: Get "crm_stats_nexthop_group_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_nexthop_group_available + - name: Get "crm_stats_nexthop_group" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_nexthop_group_used crm_stats_nexthop_group_available register: out - - set_fact: crm_stats_nexthop_group_available={{out.stdout}} + - set_fact: + crm_stats_nexthop_group_used: "{{ out.stdout_lines[0] }}" + crm_stats_nexthop_group_available: "{{ out.stdout_lines[1] }}" - name: Get NH IP 1 command: ip -4 neigh show dev {{crm_intf}} nud reachable nud stale @@ -25,7 +22,7 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 + pause: seconds={{ crm_update_time }} - name: Get new "crm_stats_nexthop_group_used" counter value command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_nexthop_group_used @@ -37,6 +34,13 @@ register: out - set_fact: new_crm_stats_nexthop_group_available={{out.stdout}} + - name: Get new "crm_stats_nexthop_group" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_nexthop_group_used crm_stats_nexthop_group_available + register: out + - set_fact: + new_crm_stats_nexthop_group_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_nexthop_group_available: "{{ out.stdout_lines[1] }}" + - name: Verify "crm_stats_nexthop_group_used" counter was incremented assert: {that: "{{new_crm_stats_nexthop_group_used|int - crm_stats_nexthop_group_used|int == 1}}"} @@ -48,17 +52,14 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_nexthop_group_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_nexthop_group_used - register: out - - set_fact: new_crm_stats_nexthop_group_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_nexthop_group_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_nexthop_group_available + - name: Get new "crm_stats_nexthop_group" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_nexthop_group_used crm_stats_nexthop_group_available register: out - - set_fact: new_crm_stats_nexthop_group_available={{out.stdout}} + - set_fact: + new_crm_stats_nexthop_group_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_nexthop_group_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_nexthop_group_used" counter was decremented assert: {that: "{{new_crm_stats_nexthop_group_used|int - crm_stats_nexthop_group_used|int == 0}}"} diff --git a/ansible/roles/test/tasks/crm/crm_test_nexthop_group_member.yml b/ansible/roles/test/tasks/crm/crm_test_nexthop_group_member.yml index 6181b87a0c3..1a24af51da8 100644 --- a/ansible/roles/test/tasks/crm/crm_test_nexthop_group_member.yml +++ b/ansible/roles/test/tasks/crm/crm_test_nexthop_group_member.yml @@ -1,14 +1,11 @@ - block: - - name: Get "crm_stats_nexthop_group_member_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_nexthop_group_member_used + - name: Get "crm_stats_nexthop_group_member" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_nexthop_group_member_used crm_stats_nexthop_group_member_available register: out - - set_fact: crm_stats_nexthop_group_member_used={{out.stdout}} - - - name: Get "crm_stats_nexthop_group_member_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_nexthop_group_member_available - register: out - - set_fact: crm_stats_nexthop_group_member_available={{out.stdout}} + - set_fact: + crm_stats_nexthop_group_member_used: "{{ out.stdout_lines[0] }}" + crm_stats_nexthop_group_member_available: "{{ out.stdout_lines[1] }}" - name: Get NH IP 1 command: ip -4 neigh show dev {{crm_intf}} nud reachable nud stale @@ -25,17 +22,14 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_nexthop_group_member_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_nexthop_group_member_used - register: out - - set_fact: new_crm_stats_nexthop_group_member_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_nexthop_group_member_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_nexthop_group_member_available + - name: Get new "crm_stats_nexthop_group_member" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_nexthop_group_member_used crm_stats_nexthop_group_member_available register: out - - set_fact: new_crm_stats_nexthop_group_member_available={{out.stdout}} + - set_fact: + new_crm_stats_nexthop_group_member_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_nexthop_group_member_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_nexthop_group_member_used" counter was incremented assert: {that: "{{new_crm_stats_nexthop_group_member_used|int - crm_stats_nexthop_group_member_used|int == 2}}"} @@ -48,17 +42,14 @@ become: yes - name: Make sure CRM counters updated - pause: seconds=2 - - - name: Get new "crm_stats_nexthop_group_member_used" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_nexthop_group_member_used - register: out - - set_fact: new_crm_stats_nexthop_group_member_used={{out.stdout}} + pause: seconds={{ crm_update_time }} - - name: Get new "crm_stats_nexthop_group_member_available" counter value - command: docker exec -i database redis-cli -n 2 HGET CRM:STATS crm_stats_nexthop_group_member_available + - name: Get new "crm_stats_nexthop_group_member" used and available counter value + command: redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_nexthop_group_member_used crm_stats_nexthop_group_member_available register: out - - set_fact: new_crm_stats_nexthop_group_member_available={{out.stdout}} + - set_fact: + new_crm_stats_nexthop_group_member_used: "{{ out.stdout_lines[0] }}" + new_crm_stats_nexthop_group_member_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_nexthop_group_member_used" counter was decremented assert: {that: "{{new_crm_stats_nexthop_group_member_used|int - crm_stats_nexthop_group_member_used|int == 0}}"} From 3a008e936629288b19edf4e850fa98af65e3b8a4 Mon Sep 17 00:00:00 2001 From: pavel-shirshov Date: Wed, 15 May 2019 18:58:57 -0700 Subject: [PATCH 078/218] Improve vxlan decap test. Make a test after vxlan removal (#914) * Improve vxlan decap test. Make a test after vxlan removal * Increase timeout time for vxlan-decap test * Use docker exec to run redis-cli --- .../roles/test/files/ptftests/vxlan-decap.py | 6 ++--- ansible/roles/test/tasks/vxlan-decap.yml | 25 +++++++++++++++++-- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/ansible/roles/test/files/ptftests/vxlan-decap.py b/ansible/roles/test/files/ptftests/vxlan-decap.py index 005e3132d47..389461f51f7 100644 --- a/ansible/roles/test/files/ptftests/vxlan-decap.py +++ b/ansible/roles/test/files/ptftests/vxlan-decap.py @@ -228,7 +228,7 @@ def checkRegularRegularVLANtoLAG(self, acc_port, pc_ports, dst_ip, test): for i in xrange(self.nr): testutils.send_packet(self, acc_port, packet) - nr_rcvd = testutils.count_matched_packets_all_ports(self, exp_packet, pc_ports, timeout=0.1) + nr_rcvd = testutils.count_matched_packets_all_ports(self, exp_packet, pc_ports, timeout=0.2) rv = rv and (nr_rcvd == self.nr) return rv @@ -257,7 +257,7 @@ def checkRegularRegularLAGtoVLAN(self, acc_port, net_port, test): for i in xrange(self.nr): testutils.send_packet(self, net_port, packet) - nr_rcvd = testutils.count_matched_packets(self, exp_packet, acc_port, timeout=0.1) + nr_rcvd = testutils.count_matched_packets(self, exp_packet, acc_port, timeout=0.2) rv = rv and (nr_rcvd == self.nr) return rv @@ -291,7 +291,7 @@ def checkVxlan(self, acc_port, net_port, test): ) for i in xrange(self.nr): testutils.send_packet(self, net_port, packet) - nr_rcvd = testutils.count_matched_packets(self, inpacket, acc_port, timeout=0.1) + nr_rcvd = testutils.count_matched_packets(self, inpacket, acc_port, timeout=0.2) rv = rv and (nr_rcvd == self.nr) return rv diff --git a/ansible/roles/test/tasks/vxlan-decap.yml b/ansible/roles/test/tasks/vxlan-decap.yml index 707a4c9d241..c4439c9bc7f 100644 --- a/ansible/roles/test/tasks/vxlan-decap.yml +++ b/ansible/roles/test/tasks/vxlan-decap.yml @@ -44,7 +44,7 @@ - include: ptf_runner.yml vars: - ptf_test_name: Vxlan decap test + ptf_test_name: Vxlan decap test - No vxlan configuration ptf_test_dir: ptftests ptf_test_path: vxlan-decap.Vxlan ptf_platform: remote @@ -61,7 +61,7 @@ - include: ptf_runner.yml vars: - ptf_test_name: Vxlan decap test + ptf_test_name: Vxlan decap test - vxlan configuration applied ptf_test_dir: ptftests ptf_test_path: vxlan-decap.Vxlan ptf_platform: remote @@ -71,3 +71,24 @@ - vxlan_enabled=True - config_file='/tmp/vxlan_decap.json' - count=1 + + - name: Remove vxlan tunnel map configuration for {{ item }} + shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL_MAP|tunnel{{ item }}|map1" + with_items: minigraph_vlans + + - name: Remove vxlan tunnel configuration for {{ item }} + shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL|tunnel{{ item }}" + with_items: minigraph_vlans + + - include: ptf_runner.yml + vars: + ptf_test_name: Vxlan decap test - vxlan configuration removed + ptf_test_dir: ptftests + ptf_test_path: vxlan-decap.Vxlan + ptf_platform: remote + ptf_platform_dir: ptftests + ptf_qlen: 1000 + ptf_test_params: + - vxlan_enabled=False + - config_file='/tmp/vxlan_decap.json' + - repetitions=1 From a2acb9266727222bef61e108b4c0844bf2c85a01 Mon Sep 17 00:00:00 2001 From: pavel-shirshov Date: Fri, 2 Aug 2019 12:46:40 -0700 Subject: [PATCH 079/218] Add extended statistics output for vxlan-decap test (#1049) --- .../roles/test/files/ptftests/vxlan-decap.py | 107 +++++++++++------- 1 file changed, 68 insertions(+), 39 deletions(-) diff --git a/ansible/roles/test/files/ptftests/vxlan-decap.py b/ansible/roles/test/files/ptftests/vxlan-decap.py index 389461f51f7..990373b7b42 100644 --- a/ansible/roles/test/files/ptftests/vxlan-decap.py +++ b/ansible/roles/test/files/ptftests/vxlan-decap.py @@ -24,7 +24,9 @@ from ptf.mask import Mask import datetime import subprocess +import traceback from pprint import pprint +from pprint import pformat class Vxlan(BaseTest): def __init__(self): @@ -162,49 +164,66 @@ def tearDown(self): def runTest(self): print - for test in self.tests: - print test['name'] - res_v = self.Vxlan(test) - print " Vxlan = ", res_v - res_f = self.RegularLAGtoVLAN(test) - print " RegularLAGtoVLAN = ", res_f - res_t = self.RegularVLANtoLAG(test) - print " RegularVLANtoLAG = ", res_t + err = '' + trace = '' + ret = 0 + try: + for test in self.tests: + print test['name'] + res_v, out_v = self.Vxlan(test) + print " Vxlan = ", res_v + res_f, out_f = self.RegularLAGtoVLAN(test) + print " RegularLAGtoVLAN = ", res_f + res_t, out_t = self.RegularVLANtoLAG(test) + print " RegularVLANtoLAG = ", res_t + print + if self.vxlan_enabled: + self.assertTrue(res_v, "VxlanTest failed:\n %s\n\ntest:\n%s" % (out_v, pformat(test))) + else: + self.assertFalse(res_v, "VxlanTest: vxlan works, but it must have been disabled!\n\ntest:%s" % pformat(test)) + self.assertTrue(res_f, "RegularLAGtoVLAN test failed:\n %s\n\ntest:\n%s" % (out_f, pformat(test))) + self.assertTrue(res_t, "RegularVLANtoLAG test failed:\n %s\n\ntest:\n%s" % (out_t, pformat(test))) + except AssertionError as e: + err = str(e) + trace = traceback.format_exc() + ret = -1 + if ret != 0: + print "The test failed" print - if self.vxlan_enabled: - self.assertTrue(res_v, "VxlanTest failed") - else: - self.assertFalse(res_v, "VxlanTest must be disabled") - self.assertTrue(res_f, "RegularLAGtoVLAN test failed") - self.assertTrue(res_t, "RegularVLANtoLAG test failed") + print "Error: %s" % err + print + print trace + else: + print "The test was successful" + sys.stdout.flush() + if ret != 0: + raise AssertionError(err) def Vxlan(self, test): - rv = True for n in self.net_ports: for a in test['acc_ports']: - res = self.checkVxlan(a, n, test) - rv = rv and res - - return rv + res, out = self.checkVxlan(a, n, test) + if not res: + return False, out + return True, "" def RegularLAGtoVLAN(self, test): - rv = True for n in self.net_ports: for a in test['acc_ports']: - res = self.checkRegularRegularLAGtoVLAN(a, n, test) - rv = rv and res - return rv + res, out = self.checkRegularRegularLAGtoVLAN(a, n, test) + if not res: + return False, out + return True, "" def RegularVLANtoLAG(self, test): - rv = True for dst, ports in self.pc_info: for a in test['acc_ports']: - res = self.checkRegularRegularVLANtoLAG(a, ports, dst, test) - rv = rv and res - return rv + res, out = self.checkRegularRegularVLANtoLAG(a, ports, dst, test) + if not res: + return False, out + return True, "" def checkRegularRegularVLANtoLAG(self, acc_port, pc_ports, dst_ip, test): - rv = True src_mac = self.ptf_mac_addrs['eth%d' % acc_port] dst_mac = self.dut_mac src_ip = test['vlan_ip_prefix'] % acc_port @@ -228,13 +247,16 @@ def checkRegularRegularVLANtoLAG(self, acc_port, pc_ports, dst_ip, test): for i in xrange(self.nr): testutils.send_packet(self, acc_port, packet) - nr_rcvd = testutils.count_matched_packets_all_ports(self, exp_packet, pc_ports, timeout=0.2) - rv = rv and (nr_rcvd == self.nr) - return rv + nr_rcvd = testutils.count_matched_packets_all_ports(self, exp_packet, pc_ports, timeout=0.5) + rv = nr_rcvd == self.nr + out = "" + if not rv: + arg = self.nr, nr_rcvd, str(acc_port), str(pc_ports), src_mac, dst_mac, src_ip, dst_ip + out = "sent = %d rcvd = %d | src_port=%s dst_ports=%s | src_mac=%s dst_mac=%s src_ip=%s dst_ip=%s" % arg + return rv, out def checkRegularRegularLAGtoVLAN(self, acc_port, net_port, test): - rv = True src_mac = self.random_mac dst_mac = self.dut_mac src_ip = test['src_ip'] @@ -257,12 +279,15 @@ def checkRegularRegularLAGtoVLAN(self, acc_port, net_port, test): for i in xrange(self.nr): testutils.send_packet(self, net_port, packet) - nr_rcvd = testutils.count_matched_packets(self, exp_packet, acc_port, timeout=0.2) - rv = rv and (nr_rcvd == self.nr) - return rv + nr_rcvd = testutils.count_matched_packets(self, exp_packet, acc_port, timeout=0.5) + rv = nr_rcvd == self.nr + out = "" + if not rv: + arg = self.nr, nr_rcvd, str(net_port), str(acc_port), src_mac, dst_mac, src_ip, dst_ip + out = "sent = %d rcvd = %d | src_port=%s dst_port=%s | src_mac=%s dst_mac=%s src_ip=%s dst_ip=%s" % arg + return rv, out def checkVxlan(self, acc_port, net_port, test): - rv = True inner_dst_mac = self.ptf_mac_addrs['eth%d' % acc_port] inner_src_mac = self.dut_mac inner_src_ip = test['vlan_gw'] @@ -291,8 +316,12 @@ def checkVxlan(self, acc_port, net_port, test): ) for i in xrange(self.nr): testutils.send_packet(self, net_port, packet) - nr_rcvd = testutils.count_matched_packets(self, inpacket, acc_port, timeout=0.2) - rv = rv and (nr_rcvd == self.nr) - return rv + nr_rcvd = testutils.count_matched_packets(self, inpacket, acc_port, timeout=0.5) + rv = nr_rcvd == self.nr + out = "" + if not rv: + arg = self.nr, nr_rcvd, str(net_port), str(acc_port), src_mac, dst_mac, test['src_ip'], ip_dst, inner_src_mac, inner_dst_mac, inner_src_ip, inner_dst_ip, test['vni'] + out = "sent = %d rcvd = %d | src_port=%s dst_port=%s | src_mac=%s dst_mac=%s src_ip=%s dst_ip=%s | Inner: src_mac=%s dst_mac=%s src_ip=%s dst_ip=%s vni=%s" % arg + return rv, out From a26bca899a8c731bb9c285485ed0c4e2aa60f347 Mon Sep 17 00:00:00 2001 From: pavel-shirshov Date: Thu, 8 Aug 2019 14:29:49 -0700 Subject: [PATCH 080/218] [vxlan decap]: Use one tunnel with N tunnel maps. (#1057) * Improvements: 1. start arp_responder just once. 2. Use one tunnel two tunnel maps approach --- .../roles/test/files/ptftests/vxlan-decap.py | 13 ++---- ansible/roles/test/tasks/vxlan-decap.yml | 41 +++++++++++++++---- ansible/roles/test/templates/vxlan_db.json.j2 | 15 ------- .../test/templates/vxlan_db.maps.json.j2 | 9 ++++ .../test/templates/vxlan_db.tunnel.json.j2 | 8 ++++ 5 files changed, 54 insertions(+), 32 deletions(-) delete mode 100644 ansible/roles/test/templates/vxlan_db.json.j2 create mode 100644 ansible/roles/test/templates/vxlan_db.maps.json.j2 create mode 100644 ansible/roles/test/templates/vxlan_db.tunnel.json.j2 diff --git a/ansible/roles/test/files/ptftests/vxlan-decap.py b/ansible/roles/test/files/ptftests/vxlan-decap.py index 990373b7b42..e20b0a15687 100644 --- a/ansible/roles/test/files/ptftests/vxlan-decap.py +++ b/ansible/roles/test/files/ptftests/vxlan-decap.py @@ -104,14 +104,13 @@ def setUp(self): self.tests = [] vni_base = 336 - src_ip = "8.8.%d.%d" for name, data in graph['minigraph_vlans'].items(): test = {} test['name'] = name test['acc_ports'] = [graph['minigraph_port_indices'][member] for member in data['members']] vlan_id = int(name.replace('Vlan', '')) test['vni'] = vni_base + vlan_id - test['src_ip'] = src_ip % (vlan_id / 256, vlan_id % 254 + 1) + test['src_ip'] = "8.8.8.8" gw = None prefixlen = None @@ -151,15 +150,11 @@ def setUp(self): self.generate_ArpResponderConfig() - self.cmd(["supervisorctl", "start", "arp_responder"]) - self.dataplane.flush() return def tearDown(self): - self.cmd(["supervisorctl", "stop", "arp_responder"]) - return def runTest(self): @@ -247,7 +242,7 @@ def checkRegularRegularVLANtoLAG(self, acc_port, pc_ports, dst_ip, test): for i in xrange(self.nr): testutils.send_packet(self, acc_port, packet) - nr_rcvd = testutils.count_matched_packets_all_ports(self, exp_packet, pc_ports, timeout=0.5) + nr_rcvd = testutils.count_matched_packets_all_ports(self, exp_packet, pc_ports, timeout=0.2) rv = nr_rcvd == self.nr out = "" if not rv: @@ -279,7 +274,7 @@ def checkRegularRegularLAGtoVLAN(self, acc_port, net_port, test): for i in xrange(self.nr): testutils.send_packet(self, net_port, packet) - nr_rcvd = testutils.count_matched_packets(self, exp_packet, acc_port, timeout=0.5) + nr_rcvd = testutils.count_matched_packets(self, exp_packet, acc_port, timeout=0.2) rv = nr_rcvd == self.nr out = "" if not rv: @@ -316,7 +311,7 @@ def checkVxlan(self, acc_port, net_port, test): ) for i in xrange(self.nr): testutils.send_packet(self, net_port, packet) - nr_rcvd = testutils.count_matched_packets(self, inpacket, acc_port, timeout=0.5) + nr_rcvd = testutils.count_matched_packets(self, inpacket, acc_port, timeout=0.2) rv = nr_rcvd == self.nr out = "" if not rv: diff --git a/ansible/roles/test/tasks/vxlan-decap.yml b/ansible/roles/test/tasks/vxlan-decap.yml index c4439c9bc7f..a5750e84382 100644 --- a/ansible/roles/test/tasks/vxlan-decap.yml +++ b/ansible/roles/test/tasks/vxlan-decap.yml @@ -34,14 +34,24 @@ shell: supervisorctl update delegate_to: "{{ ptf_host }}" + - name: Start arpresponder + supervisorctl: state=restarted name=arp_responder + delegate_to: "{{ ptf_host }}" + - name: Render DUT parameters to json file for the test template: src=vxlan_decap.json.j2 dest=/tmp/vxlan_decap.json delegate_to: "{{ ptf_host }}" - - name: Render DUT vxlan configuration - template: src=vxlan_db.json.j2 dest=/tmp/vxlan_db.{{ item }}.json + - name: Render DUT vxlan configuration. Tunnel + template: src=vxlan_db.tunnel.json.j2 dest=/tmp/vxlan_db.tunnel.json + + - name: Render DUT vxlan configuration. Tunnel Maps + template: src=vxlan_db.maps.json.j2 dest=/tmp/vxlan_db.maps.{{ item }}.json with_items: minigraph_vlans + - name: Wait for some time until arp cache is ready + pause: seconds=50 + - include: ptf_runner.yml vars: ptf_test_name: Vxlan decap test - No vxlan configuration @@ -55,8 +65,11 @@ - config_file='/tmp/vxlan_decap.json' - repetitions=1 - - name: Configure vxlan decap for {{ item }} - shell: sonic-cfggen -j /tmp/vxlan_db.{{ item }}.json --write-to-db + - name: Configure vxlan decap tunnel + shell: sonic-cfggen -j /tmp/vxlan_db.tunnel.json --write-to-db + + - name: Configure vxlan decap tunnel map for {{ item }} + shell: sonic-cfggen -j /tmp/vxlan_db.maps.{{ item }}.json --write-to-db with_items: minigraph_vlans - include: ptf_runner.yml @@ -73,12 +86,11 @@ - count=1 - name: Remove vxlan tunnel map configuration for {{ item }} - shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL_MAP|tunnel{{ item }}|map1" + shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL_MAP|tunnelVxlan|map{{ item }}" with_items: minigraph_vlans - - name: Remove vxlan tunnel configuration for {{ item }} - shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL|tunnel{{ item }}" - with_items: minigraph_vlans + - name: Remove vxlan tunnel configuration + shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL|tunnelVxlan" - include: ptf_runner.yml vars: @@ -92,3 +104,16 @@ - vxlan_enabled=False - config_file='/tmp/vxlan_decap.json' - repetitions=1 + + +- always: + - name: Remove vxlan tunnel map configuration for {{ item }} + shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL_MAP|tunnelVxlan|map{{ item }}" + with_items: minigraph_vlans + + - name: Remove vxlan tunnel configuration + shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL|tunnelVxlan" + + - name: Stop arpresponder + supervisorctl: state=stopped name=arp_responder + delegate_to: "{{ ptf_host }}" diff --git a/ansible/roles/test/templates/vxlan_db.json.j2 b/ansible/roles/test/templates/vxlan_db.json.j2 deleted file mode 100644 index 697c3aba3e6..00000000000 --- a/ansible/roles/test/templates/vxlan_db.json.j2 +++ /dev/null @@ -1,15 +0,0 @@ -{ - "VXLAN_TUNNEL": { - "tunnel{{ item }}": { - "src_ip": "{{ minigraph_lo_interfaces[0]['addr'] }}", - "dst_ip": "8.8.{{ item | replace("Vlan", "") | int // 256 }}.{{ item | replace("Vlan", "") | int % 254 + 1 }}" - } - }, - "VXLAN_TUNNEL_MAP": { - "tunnel{{ item }}|map1": { - "vni": "{{ item | replace("Vlan", "") | int + 336 }}", - "vlan": "{{ item }}" - } - } -} - diff --git a/ansible/roles/test/templates/vxlan_db.maps.json.j2 b/ansible/roles/test/templates/vxlan_db.maps.json.j2 new file mode 100644 index 00000000000..1be0cf7c6ea --- /dev/null +++ b/ansible/roles/test/templates/vxlan_db.maps.json.j2 @@ -0,0 +1,9 @@ +{ + "VXLAN_TUNNEL_MAP": { + "tunnelVxlan|map{{ item }}": { + "vni": "{{ item | replace("Vlan", "") | int + 336 }}", + "vlan": "{{ item }}" + } + } +} + diff --git a/ansible/roles/test/templates/vxlan_db.tunnel.json.j2 b/ansible/roles/test/templates/vxlan_db.tunnel.json.j2 new file mode 100644 index 00000000000..f4671fe6e21 --- /dev/null +++ b/ansible/roles/test/templates/vxlan_db.tunnel.json.j2 @@ -0,0 +1,8 @@ +{ + "VXLAN_TUNNEL": { + "tunnelVxlan": { + "src_ip": "{{ minigraph_lo_interfaces[0]['addr'] }}", + "dst_ip": "8.8.8.8" + } + } +} From 7ba1287ed45ffde8a2f5d84c3a22e50abb2dfafb Mon Sep 17 00:00:00 2001 From: Samuel Angebault Date: Tue, 9 Apr 2019 07:49:14 -0700 Subject: [PATCH 081/218] Rename fan driver for Arista 7170-64C and 7260CX3-64 (#861) --- ansible/group_vars/sonic/sku-sensors-data.yml | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/ansible/group_vars/sonic/sku-sensors-data.yml b/ansible/group_vars/sonic/sku-sensors-data.yml index 5f32c2d7b47..1741fc1c626 100644 --- a/ansible/group_vars/sonic/sku-sensors-data.yml +++ b/ansible/group_vars/sonic/sku-sensors-data.yml @@ -1016,10 +1016,10 @@ sensors_checks: - pmbus-i2c-4-58/fan1/fan1_alarm - pmbus-i2c-3-58/fan1/fan1_fault - pmbus-i2c-4-58/fan1/fan1_fault - - rook_cpld-i2c-85-60/fan1/fan1_fault - - rook_cpld-i2c-85-60/fan2/fan2_fault - - rook_cpld-i2c-85-60/fan3/fan3_fault - - rook_cpld-i2c-85-60/fan4/fan4_fault + - la_cpld-i2c-85-60/fan1/fan1_fault + - la_cpld-i2c-85-60/fan2/fan2_fault + - la_cpld-i2c-85-60/fan3/fan3_fault + - la_cpld-i2c-85-60/fan4/fan4_fault power: - pmbus-i2c-3-58/iin/curr1_max_alarm - pmbus-i2c-3-58/iout1/curr2_max_alarm @@ -1089,10 +1089,10 @@ sensors_checks: fan: - pmbus-i2c-3-58/fan1/fan1_input - pmbus-i2c-4-58/fan1/fan1_input - - rook_cpld-i2c-85-60/fan1/fan1_input - - rook_cpld-i2c-85-60/fan2/fan2_input - - rook_cpld-i2c-85-60/fan3/fan3_input - - rook_cpld-i2c-85-60/fan4/fan4_input + - la_cpld-i2c-85-60/fan1/fan1_input + - la_cpld-i2c-85-60/fan2/fan2_input + - la_cpld-i2c-85-60/fan3/fan3_input + - la_cpld-i2c-85-60/fan4/fan4_input power: [] temp: [] @@ -1791,9 +1791,13 @@ sensors_checks: Arista-7170-64C: alarms: - fan: + fan: - dps1900-i2c-6-58/fan1/fan1_alarm - dps1900-i2c-7-58/fan1/fan1_alarm + - la_cpld-i2c-93-60/fan1/fan1_fault + - la_cpld-i2c-93-60/fan2/fan2_fault + - la_cpld-i2c-93-60/fan3/fan3_fault + - la_cpld-i2c-93-60/fan4/fan4_fault power: - dps1900-i2c-6-58/iin/curr1_max_alarm - dps1900-i2c-6-58/iout1/curr2_crit_alarm @@ -1856,10 +1860,10 @@ sensors_checks: non_zero: fan: - - rook_cpld-i2c-93-60/fan1/fan1_input - - rook_cpld-i2c-93-60/fan2/fan2_input - - rook_cpld-i2c-93-60/fan3/fan3_input - - rook_cpld-i2c-93-60/fan4/fan4_input + - la_cpld-i2c-93-60/fan1/fan1_input + - la_cpld-i2c-93-60/fan2/fan2_input + - la_cpld-i2c-93-60/fan3/fan3_input + - la_cpld-i2c-93-60/fan4/fan4_input power: [] temp: [] From 1dc9cf744d67e27fcbe1ce7f685584890cb004f8 Mon Sep 17 00:00:00 2001 From: Gord Chen Date: Fri, 16 Aug 2019 07:04:07 +0800 Subject: [PATCH 082/218] [Config Test] swss exit after remove port channel due to SAI not found intf (#1028) When executing the config test case, it has the possibility to cause swss exit. It is due to the intfOrch may be slower than portOrch When the issue not happens, the timeline would be 1. Playbook remove IP from port channel intfOrch want to remove l3 intf, but neighOrch still have reference and the intfOrch will wait to next run 2. neighOrch decrease the reference count neighOrch finish the reference count decrement and then intfOrch remove l3 intf to SAI 3. Playbook remove port channel portOrch remove this port in SAI When the issue happens, the timeline would be 1. Playbook remove IP from port channel intfOrch want to remove l3 intf, but neighOrch still have reference and the intfOrch will wait to next run 2. Playbook remove port channel portOrch remove this port and its l3 intf in SAI 3. neighOrch decrease the reference count neighOrch finish the reference count decrement and then intfOrch remove l3 intf to SAI whcih was removed by step 2. Then the swss will exception --- ansible/roles/test/tasks/config.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ansible/roles/test/tasks/config.yml b/ansible/roles/test/tasks/config.yml index 001a82d475d..c0aa4c3ab31 100644 --- a/ansible/roles/test/tasks/config.yml +++ b/ansible/roles/test/tasks/config.yml @@ -101,12 +101,16 @@ become: yes when: add_tmp_portchannel_ip + - pause: seconds=5 + - name: Remove {{ portchannel_members }} from {{ tmp_portchannel }} shell: config portchannel member del {{ tmp_portchannel }} {{ item }} become: yes when: add_tmp_portchannel_members with_items: "{{portchannel_members}}" + - pause: seconds=5 + - name: Remove {{ tmp_portchannel }} shell: config portchannel del {{ tmp_portchannel }} become: yes From a9d3196f01438841b7b2b3f898f0cc8447c26ab9 Mon Sep 17 00:00:00 2001 From: pavel-shirshov Date: Thu, 15 Aug 2019 18:17:41 -0700 Subject: [PATCH 083/218] Some improvements for vxlan decap test (#1071) 1. Do reboot before the test 2. Send 10 packets instead of 1 packets --- .../roles/test/tasks/common_tasks/reboot_sonic.yml | 9 +++++++-- ansible/roles/test/tasks/vxlan-decap.yml | 12 ++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/ansible/roles/test/tasks/common_tasks/reboot_sonic.yml b/ansible/roles/test/tasks/common_tasks/reboot_sonic.yml index 5c844a68723..b241d4b59ac 100644 --- a/ansible/roles/test/tasks/common_tasks/reboot_sonic.yml +++ b/ansible/roles/test/tasks/common_tasks/reboot_sonic.yml @@ -7,6 +7,11 @@ reboot_type: "reboot" when: reboot_type is not defined +- name: set default value for sonic ready timeout + set_fact: + ready_timeout: 180 + when: ready_timeout is not defined + - fail: msg: "Reboot type {{ reboot_type }} is invalid. Must be one of {{ reboot_types }}" when: reboot_type not in reboot_types @@ -57,8 +62,8 @@ timeout: 180 changed_when: false -- name: wait for 2 minute for prcesses and interfaces to be stable - pause: seconds=120 +- name: wait for {{ ready_timeout }} seconds for prcesses and interfaces to be stable + pause: seconds={{ ready_timeout }} - name: Wait for warmboot-finalizer service to finish become: true diff --git a/ansible/roles/test/tasks/vxlan-decap.yml b/ansible/roles/test/tasks/vxlan-decap.yml index a5750e84382..00bddac3122 100644 --- a/ansible/roles/test/tasks/vxlan-decap.yml +++ b/ansible/roles/test/tasks/vxlan-decap.yml @@ -38,6 +38,11 @@ supervisorctl: state=restarted name=arp_responder delegate_to: "{{ ptf_host }}" + - name: Restart DUT. Wait 240 seconds after SONiC started ssh + include: reboot.yml + vars: + ready_timeout: 240 + - name: Render DUT parameters to json file for the test template: src=vxlan_decap.json.j2 dest=/tmp/vxlan_decap.json delegate_to: "{{ ptf_host }}" @@ -63,7 +68,7 @@ ptf_test_params: - vxlan_enabled=False - config_file='/tmp/vxlan_decap.json' - - repetitions=1 + - count=10 - name: Configure vxlan decap tunnel shell: sonic-cfggen -j /tmp/vxlan_db.tunnel.json --write-to-db @@ -83,7 +88,7 @@ ptf_test_params: - vxlan_enabled=True - config_file='/tmp/vxlan_decap.json' - - count=1 + - count=10 - name: Remove vxlan tunnel map configuration for {{ item }} shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL_MAP|tunnelVxlan|map{{ item }}" @@ -103,8 +108,7 @@ ptf_test_params: - vxlan_enabled=False - config_file='/tmp/vxlan_decap.json' - - repetitions=1 - + - count=10 - always: - name: Remove vxlan tunnel map configuration for {{ item }} From 82835d2f176d2d9cb067550b170336268500c253 Mon Sep 17 00:00:00 2001 From: Neetha John Date: Mon, 19 Aug 2019 11:20:04 -0700 Subject: [PATCH 084/218] [warm-reboot] Add lag flap check after warm boot (#1066) * Add lag flap check after warm boot Signed-off-by: Neetha John --- .../test/files/ptftests/advanced-reboot.py | 23 +++++++++++++++++-- ansible/roles/test/files/ptftests/arista.py | 20 ++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index b13cfaa4463..7a94bc4d4b6 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -777,8 +777,8 @@ def wait_for_ssh_threads(): if self.reboot_type == 'fast-reboot' and no_cp_replies < 0.95 * self.nr_vl_pkts: self.fails['dut'].add("Dataplane didn't route to all servers, when control-plane was down: %d vs %d" % (no_cp_replies, self.nr_vl_pkts)) - if self.reboot_type == 'warm-reboot' and self.preboot_oper is not None: - if self.pre_handle is not None: + if self.reboot_type == 'warm-reboot': + if self.preboot_oper is not None and self.pre_handle is not None: self.log("Postboot checks:") log_info, fails = self.pre_handle.verify(pre_check=False) self.populate_fail_info(fails) @@ -786,6 +786,10 @@ def wait_for_ssh_threads(): self.log(log) self.log(" ") + else: + # verify there are no interface flaps after warm boot + self.neigh_lag_status_check() + except Exception as e: self.fails['dut'].add(e) finally: @@ -864,6 +868,21 @@ def wait_for_ssh_threads(): self.assertTrue(is_good, errors) + def neigh_lag_status_check(self): + """ + Ensure there are no interface flaps after warm-boot + """ + for neigh in self.ssh_targets: + self.neigh_handle = Arista(neigh, None, self.test_params) + self.neigh_handle.connect() + fails, flap_cnt = self.neigh_handle.verify_neigh_lag_no_flap() + self.neigh_handle.disconnect() + self.fails[neigh] |= fails + if not flap_cnt: + self.log("No LAG flaps seen on %s after warm boot" % neigh) + else: + self.fails[neigh].add("LAG flapped %s times on %s after warm boot" % (flap_cnt, neigh)) + def extract_no_cpu_replies(self, arr): """ This function tries to extract number of replies from dataplane, when control plane is non working diff --git a/ansible/roles/test/files/ptftests/arista.py b/ansible/roles/test/files/ptftests/arista.py index fcd4b2ecf8f..04459417849 100644 --- a/ansible/roles/test/files/ptftests/arista.py +++ b/ansible/roles/test/files/ptftests/arista.py @@ -137,6 +137,9 @@ def run(self): sample["po_changetime"] = json.loads(portchannel_output, strict=False)['interfaces']['Port-Channel1']['lastStatusChangeTimestamp'] if not run_once: + # clear Portchannel counters + self.do_cmd("clear counters Port-Channel 1") + self.ipv4_gr_enabled, self.ipv6_gr_enabled, self.gr_timeout = self.parse_bgp_neighbor_once(bgp_neig_output) if self.gr_timeout is not None: log_first_line = "session_begins_%f" % cur_time @@ -423,6 +426,23 @@ def verify_neigh_lag_state(self, lag, state="connected", pre_check=True): self.fails.add('%s: Invalid interface name' % msg_prefix[pre_check]) return self.fails, lag_state + def verify_neigh_lag_no_flap(self): + flap_cnt = sys.maxint + output = self.do_cmd('show interfaces Po1 | json') + if 'Invalid' not in output: + data = '\n'.join(output.split('\r\n')[1:-1]) + obj = json.loads(data) + + if 'interfaces' in obj and 'Port-Channel1' in obj['interfaces']: + intf_cnt_info = obj['interfaces']['Port-Channel1']['interfaceCounters'] + flap_cnt = intf_cnt_info['linkStatusChanges'] + else: + self.fails.add('Object missing in output for Port-Channel1') + return self.fails, flap_cnt + + self.fails.add('Invalid interface name - Po1') + return self.fails, flap_cnt + def check_gr_peer_status(self, output): # [0] True 'ipv4_gr_enabled', [1] doesn't matter 'ipv6_enabled', [2] should be >= 120 if not self.ipv4_gr_enabled: From 5c297f0d603c23fc9800ac1cb5f170bfe2db8d74 Mon Sep 17 00:00:00 2001 From: Neetha John Date: Mon, 19 Aug 2019 14:12:10 -0700 Subject: [PATCH 085/218] Add sensor data for Arista-7260CX3-Q64 (#1074) Signed-off-by: Neetha John --- ansible/group_vars/sonic/sku-sensors-data.yml | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/ansible/group_vars/sonic/sku-sensors-data.yml b/ansible/group_vars/sonic/sku-sensors-data.yml index 1741fc1c626..029ed4fc7a8 100644 --- a/ansible/group_vars/sonic/sku-sensors-data.yml +++ b/ansible/group_vars/sonic/sku-sensors-data.yml @@ -1868,3 +1868,99 @@ sensors_checks: temp: [] psu_skips: {} + + Arista-7260CX3-Q64: + alarms: + fan: + - pmbus-i2c-3-58/fan1/fan1_alarm + - pmbus-i2c-4-58/fan1/fan1_alarm + - pmbus-i2c-3-58/fan1/fan1_fault + - pmbus-i2c-4-58/fan1/fan1_fault + - la_cpld-i2c-85-60/fan1/fan1_fault + - la_cpld-i2c-85-60/fan2/fan2_fault + - la_cpld-i2c-85-60/fan3/fan3_fault + - la_cpld-i2c-85-60/fan4/fan4_fault + power: + - pmbus-i2c-3-58/iin/curr1_max_alarm + - pmbus-i2c-3-58/iout1/curr2_max_alarm + - pmbus-i2c-3-58/iout1/curr2_crit_alarm + - pmbus-i2c-3-58/iout2/curr3_crit_alarm + - pmbus-i2c-3-58/vin/in1_alarm + - pmbus-i2c-3-58/vout1/in2_lcrit_alarm + - pmbus-i2c-3-58/vout1/in2_crit_alarm + - pmbus-i2c-4-58/iin/curr1_max_alarm + - pmbus-i2c-4-58/iout1/curr2_max_alarm + - pmbus-i2c-4-58/iout1/curr2_crit_alarm + - pmbus-i2c-4-58/iout2/curr3_crit_alarm + - pmbus-i2c-4-58/vin/in1_alarm + - pmbus-i2c-4-58/vout1/in2_lcrit_alarm + - pmbus-i2c-4-58/vout1/in2_crit_alarm + temp: + - coretemp-isa-0000/Physical id 0/temp1_crit_alarm + - coretemp-isa-0000/Core 0/temp2_crit_alarm + - coretemp-isa-0000/Core 1/temp3_crit_alarm + - lm73-i2c-88-48/Front panel temp sensor/temp1_min_alarm + - lm73-i2c-88-48/Front panel temp sensor/temp1_max_alarm + - max6658-i2c-1-4c/Asic temp sensor/temp1_min_alarm + - max6658-i2c-1-4c/Asic temp sensor/temp1_max_alarm + - max6658-i2c-1-4c/Asic temp sensor/temp1_crit_alarm + - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_min_alarm + - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_max_alarm + - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_crit_alarm + - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_min_alarm + - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_max_alarm + - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_crit_alarm + - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_fault + - pmbus-i2c-3-58/Power supply 1 exhaust temp sensor/temp3_alarm + - pmbus-i2c-3-58/Power supply 1 inlet temp sensor/temp2_alarm + - pmbus-i2c-3-58/Power supply 1 hotspot sensor/temp1_alarm + - pmbus-i2c-4-58/Power supply 2 exhaust temp sensor/temp3_alarm + - pmbus-i2c-4-58/Power supply 2 inlet temp sensor/temp2_alarm + - pmbus-i2c-4-58/Power supply 2 hotspot sensor/temp1_alarm + + compares: + fan: [] + power: + - - pmbus-i2c-3-58/iin/curr1_input + - pmbus-i2c-3-58/iin/curr1_max + - - pmbus-i2c-3-58/iout1/curr2_input + - pmbus-i2c-3-58/iout1/curr2_max + - - pmbus-i2c-4-58/iin/curr1_input + - pmbus-i2c-4-58/iin/curr1_max + - - pmbus-i2c-4-58/iout1/curr2_input + - pmbus-i2c-4-58/iout1/curr2_max + temp: + - - coretemp-isa-0000/Physical id 0/temp1_input + - coretemp-isa-0000/Physical id 0/temp1_max + - - coretemp-isa-0000/Core 0/temp2_input + - coretemp-isa-0000/Core 0/temp2_max + - - coretemp-isa-0000/Core 1/temp3_input + - coretemp-isa-0000/Core 1/temp3_max + - - lm73-i2c-88-48/Front panel temp sensor/temp1_input + - lm73-i2c-88-48/Front panel temp sensor/temp1_max + - - max6658-i2c-1-4c/Asic temp sensor/temp1_input + - max6658-i2c-1-4c/Asic temp sensor/temp1_max + - - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_input + - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_max + - - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_input + - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_max + + non_zero: + fan: + - pmbus-i2c-3-58/fan1/fan1_input + - pmbus-i2c-4-58/fan1/fan1_input + - la_cpld-i2c-85-60/fan1/fan1_input + - la_cpld-i2c-85-60/fan2/fan2_input + - la_cpld-i2c-85-60/fan3/fan3_input + - la_cpld-i2c-85-60/fan4/fan4_input + power: + - pmbus-i2c-4-58/pin/power1_input + - pmbus-i2c-4-58/pout1/power2_input + - pmbus-i2c-4-58/pout2/power3_input + - pmbus-i2c-3-58/pin/power1_input + - pmbus-i2c-3-58/pout1/power2_input + - pmbus-i2c-3-58/pout2/power3_input + temp: + - pch_haswell-virtual-0/temp1/temp1_input + + psu_skips: {} From d6f2ebee2721e2040239e0cbee26a4895b06c987 Mon Sep 17 00:00:00 2001 From: pavel-shirshov Date: Mon, 19 Aug 2019 19:03:04 -0700 Subject: [PATCH 086/218] [vxlan-decap]: Improvements of the test (#1075) * Output offset of ports in data structures * Warmup DUT before the test * Increase wating time from 0.2 to 0.5 * Quote minigraph_vlans otherwise ansible could remove this variable --- .../roles/test/files/ptftests/vxlan-decap.py | 77 ++++++++++++++----- ansible/roles/test/tasks/vxlan-decap.yml | 10 +-- 2 files changed, 62 insertions(+), 25 deletions(-) diff --git a/ansible/roles/test/files/ptftests/vxlan-decap.py b/ansible/roles/test/files/ptftests/vxlan-decap.py index e20b0a15687..24058f63be7 100644 --- a/ansible/roles/test/files/ptftests/vxlan-decap.py +++ b/ansible/roles/test/files/ptftests/vxlan-decap.py @@ -157,8 +157,36 @@ def setUp(self): def tearDown(self): return - def runTest(self): - print + def warmup(self): + print "Warming up" + err = '' + trace = '' + ret = 0 + try: + for test in self.tests: + if self.vxlan_enabled: + self.Vxlan(test, True) + self.RegularLAGtoVLAN(test, True) + self.RegularVLANtoLAG(test, True) + + except Exception as e: + err = str(e) + trace = traceback.format_exc() + ret = -1 + if ret != 0: + print "The warmup failed" + print + print "Error: %s" % err + print + print trace + else: + print "Warmup successful\n" + sys.stdout.flush() + if ret != 0: + raise AssertionError("Warmup failed") + + def work_test(self): + print "Testing" err = '' trace = '' ret = 0 @@ -194,28 +222,37 @@ def runTest(self): if ret != 0: raise AssertionError(err) - def Vxlan(self, test): - for n in self.net_ports: - for a in test['acc_ports']: + + def runTest(self): + print + # Warm-up first + self.warmup() + # test itself + self.work_test() + + + def Vxlan(self, test, wu = False): + for i, n in enumerate(self.net_ports): + for j, a in enumerate(test['acc_ports']): res, out = self.checkVxlan(a, n, test) - if not res: - return False, out + if not res and not wu: + return False, out + " | net_port_rel=%d acc_port_rel=%d" % (i, j) return True, "" - def RegularLAGtoVLAN(self, test): - for n in self.net_ports: - for a in test['acc_ports']: + def RegularLAGtoVLAN(self, test, wu = False): + for i, n in enumerate(self.net_ports): + for j, a in enumerate(test['acc_ports']): res, out = self.checkRegularRegularLAGtoVLAN(a, n, test) - if not res: - return False, out + if not res and not wu: + return False, out + " | net_port_rel=%d acc_port_rel=%d" % (i, j) return True, "" - def RegularVLANtoLAG(self, test): - for dst, ports in self.pc_info: - for a in test['acc_ports']: + def RegularVLANtoLAG(self, test, wu = False): + for i, (dst, ports) in enumerate(self.pc_info): + for j, a in enumerate(test['acc_ports']): res, out = self.checkRegularRegularVLANtoLAG(a, ports, dst, test) - if not res: - return False, out + if not res and not wu: + return False, out + " | pc_info_rel=%d acc_port_rel=%d" % (i, j) return True, "" def checkRegularRegularVLANtoLAG(self, acc_port, pc_ports, dst_ip, test): @@ -242,7 +279,7 @@ def checkRegularRegularVLANtoLAG(self, acc_port, pc_ports, dst_ip, test): for i in xrange(self.nr): testutils.send_packet(self, acc_port, packet) - nr_rcvd = testutils.count_matched_packets_all_ports(self, exp_packet, pc_ports, timeout=0.2) + nr_rcvd = testutils.count_matched_packets_all_ports(self, exp_packet, pc_ports, timeout=0.5) rv = nr_rcvd == self.nr out = "" if not rv: @@ -274,7 +311,7 @@ def checkRegularRegularLAGtoVLAN(self, acc_port, net_port, test): for i in xrange(self.nr): testutils.send_packet(self, net_port, packet) - nr_rcvd = testutils.count_matched_packets(self, exp_packet, acc_port, timeout=0.2) + nr_rcvd = testutils.count_matched_packets(self, exp_packet, acc_port, timeout=0.5) rv = nr_rcvd == self.nr out = "" if not rv: @@ -311,7 +348,7 @@ def checkVxlan(self, acc_port, net_port, test): ) for i in xrange(self.nr): testutils.send_packet(self, net_port, packet) - nr_rcvd = testutils.count_matched_packets(self, inpacket, acc_port, timeout=0.2) + nr_rcvd = testutils.count_matched_packets(self, inpacket, acc_port, timeout=0.5) rv = nr_rcvd == self.nr out = "" if not rv: diff --git a/ansible/roles/test/tasks/vxlan-decap.yml b/ansible/roles/test/tasks/vxlan-decap.yml index 00bddac3122..eeb08f0da4f 100644 --- a/ansible/roles/test/tasks/vxlan-decap.yml +++ b/ansible/roles/test/tasks/vxlan-decap.yml @@ -73,7 +73,7 @@ - name: Configure vxlan decap tunnel shell: sonic-cfggen -j /tmp/vxlan_db.tunnel.json --write-to-db - - name: Configure vxlan decap tunnel map for {{ item }} + - name: Configure vxlan decap tunnel maps shell: sonic-cfggen -j /tmp/vxlan_db.maps.{{ item }}.json --write-to-db with_items: minigraph_vlans @@ -90,9 +90,9 @@ - config_file='/tmp/vxlan_decap.json' - count=10 - - name: Remove vxlan tunnel map configuration for {{ item }} + - name: Remove vxlan tunnel maps configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL_MAP|tunnelVxlan|map{{ item }}" - with_items: minigraph_vlans + with_items: "{{ minigraph_vlans }}" - name: Remove vxlan tunnel configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL|tunnelVxlan" @@ -111,9 +111,9 @@ - count=10 - always: - - name: Remove vxlan tunnel map configuration for {{ item }} + - name: Remove vxlan tunnel maps configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL_MAP|tunnelVxlan|map{{ item }}" - with_items: minigraph_vlans + with_items: "{{ minigraph_vlans }}" - name: Remove vxlan tunnel configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL|tunnelVxlan" From bb17dcf91f7b3911d7b23d7f3864d463c8dd1335 Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Thu, 8 Aug 2019 23:48:09 +0300 Subject: [PATCH 087/218] [everflow/policer] remove route after session is removed (#1060) On Mellanox SPC there are 3 maximum mirror sessions or 2 mirror sessions if session attribute update is performed to avoid no resource error remove first a session and then route, otherwise session will be updated with changed dest mac of default route next hop Signed-off-by: Stepan Blyschak --- .../roles/test/tasks/everflow_testbed/testcase_8.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml index de87836dc3b..57876c6651b 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml @@ -47,11 +47,6 @@ ptf_extra_options: "--relax --debug info" always: - - name: Remove route - shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" - ignore_errors: yes - become: yes - - name: Remove the rule with DSCP value and mask shell: | redis-cli -n 4 del "ACL_RULE|{{dscp_table_name}}|RULE_1" @@ -75,3 +70,8 @@ ignore_errors: yes become: yes + - name: Remove route + shell: vtysh -e "conf t" -e "no ip route {{ session_prefix_1 }} {{ neighbor_info_1['addr'] }}" + ignore_errors: yes + become: yes + From f9c9335417ff02753811f7b2617d14c23039ff9f Mon Sep 17 00:00:00 2001 From: pavel-shirshov Date: Tue, 20 Aug 2019 17:21:12 -0700 Subject: [PATCH 088/218] [vxlan-decap]: Generate mapping between vlan member ports and vlan ip address robustly (#1078) * Use unquoted style of the variable usage * Make the fact cache valid for a day * Rewrite mapping between vlan ports and vlan ip addresses --- ansible/ansible.cfg | 2 +- .../roles/test/files/ptftests/vxlan-decap.py | 58 +++++++++++++------ ansible/roles/test/tasks/vxlan-decap.yml | 15 +---- 3 files changed, 44 insertions(+), 31 deletions(-) diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg index 990331eb5b9..37ac0f14b5d 100644 --- a/ansible/ansible.cfg +++ b/ansible/ansible.cfg @@ -157,7 +157,7 @@ callback_whitelist = profile_tasks # current IP information. fact_caching = jsonfile fact_caching_connection = ~/.ansible/cache -fact_caching_timeout = 1200 +fact_caching_timeout = 86400 # retry files diff --git a/ansible/roles/test/files/ptftests/vxlan-decap.py b/ansible/roles/test/files/ptftests/vxlan-decap.py index 24058f63be7..4a86e274322 100644 --- a/ansible/roles/test/files/ptftests/vxlan-decap.py +++ b/ansible/roles/test/files/ptftests/vxlan-decap.py @@ -25,6 +25,8 @@ import datetime import subprocess import traceback +import socket +import struct from pprint import pprint from pprint import pformat @@ -57,14 +59,40 @@ def readMacs(self): def generate_ArpResponderConfig(self): config = {} for test in self.tests: - for port in test['acc_ports']: - config['eth%d' % port] = [test['vlan_ip_prefix'] % port] + for port, ip in test['vlan_ip_prefixes'].items(): + config['eth%d' % port] = [ip] with open('/tmp/vxlan_arpresponder.conf', 'w') as fp: json.dump(config, fp) return + def generate_VlanPrefixes(self, gw, prefixlen, acc_ports): + res = {} + n_hosts = 2**(32 - prefixlen) - 3 + nr_of_dataplane_ports = len(self.dataplane.ports) + + if nr_of_dataplane_ports > n_hosts: + raise Exception("The prefix len size is too small for the test") + + gw_addr_n = struct.unpack(">I", socket.inet_aton(gw))[0] + mask = (2**32 - 1) ^ (2**(32 - prefixlen) - 1) + net_addr_n = gw_addr_n & mask + + addr = 1 + for port in acc_ports: + while True: + host_addr_n = net_addr_n + addr + host_ip = socket.inet_ntoa(struct.pack(">I", host_addr_n)) + if host_ip != gw: + break + else: + addr += 1 # skip gw + res[port] = host_ip + addr += 1 + + return res + def setUp(self): self.dataplane = ptf.dataplane_instance @@ -117,20 +145,13 @@ def setUp(self): for d in graph['minigraph_vlan_interfaces']: if d['attachto'] == name: gw = d['addr'] - prefixlen = d['prefixlen'] + prefixlen = int(d['prefixlen']) break else: raise Exception("Vlan '%s' is not found" % name) test['vlan_gw'] = gw - - number_of_dataplane_ports = len(self.dataplane.ports) - if number_of_dataplane_ports > 256: - raise Exception("Too much dataplane ports for the test") - if prefixlen > 24: - raise Exception("The prefix len size is too small for the test") - - test['vlan_ip_prefix'] = '.'.join(gw.split('.')[0:3])+".%d" + test['vlan_ip_prefixes'] = self.generate_VlanPrefixes(gw, prefixlen, test['acc_ports']) self.tests.append(test) @@ -150,11 +171,14 @@ def setUp(self): self.generate_ArpResponderConfig() + self.cmd(["supervisorctl", "restart", "arp_responder"]) + self.dataplane.flush() return def tearDown(self): + self.cmd(["supervisorctl", "stop", "arp_responder"]) return def warmup(self): @@ -258,7 +282,7 @@ def RegularVLANtoLAG(self, test, wu = False): def checkRegularRegularVLANtoLAG(self, acc_port, pc_ports, dst_ip, test): src_mac = self.ptf_mac_addrs['eth%d' % acc_port] dst_mac = self.dut_mac - src_ip = test['vlan_ip_prefix'] % acc_port + src_ip = test['vlan_ip_prefixes'][acc_port] packet = simple_tcp_packet( eth_dst=dst_mac, @@ -279,7 +303,7 @@ def checkRegularRegularVLANtoLAG(self, acc_port, pc_ports, dst_ip, test): for i in xrange(self.nr): testutils.send_packet(self, acc_port, packet) - nr_rcvd = testutils.count_matched_packets_all_ports(self, exp_packet, pc_ports, timeout=0.5) + nr_rcvd = testutils.count_matched_packets_all_ports(self, exp_packet, pc_ports, timeout=0.2) rv = nr_rcvd == self.nr out = "" if not rv: @@ -292,7 +316,7 @@ def checkRegularRegularLAGtoVLAN(self, acc_port, net_port, test): src_mac = self.random_mac dst_mac = self.dut_mac src_ip = test['src_ip'] - dst_ip = test['vlan_ip_prefix'] % acc_port + dst_ip = test['vlan_ip_prefixes'][acc_port] packet = simple_tcp_packet( eth_dst=dst_mac, @@ -311,7 +335,7 @@ def checkRegularRegularLAGtoVLAN(self, acc_port, net_port, test): for i in xrange(self.nr): testutils.send_packet(self, net_port, packet) - nr_rcvd = testutils.count_matched_packets(self, exp_packet, acc_port, timeout=0.5) + nr_rcvd = testutils.count_matched_packets(self, exp_packet, acc_port, timeout=0.2) rv = nr_rcvd == self.nr out = "" if not rv: @@ -323,7 +347,7 @@ def checkVxlan(self, acc_port, net_port, test): inner_dst_mac = self.ptf_mac_addrs['eth%d' % acc_port] inner_src_mac = self.dut_mac inner_src_ip = test['vlan_gw'] - inner_dst_ip = test['vlan_ip_prefix'] % acc_port + inner_dst_ip = test['vlan_ip_prefixes'][acc_port] dst_mac = self.dut_mac src_mac = self.random_mac ip_dst = self.loopback_ip @@ -348,7 +372,7 @@ def checkVxlan(self, acc_port, net_port, test): ) for i in xrange(self.nr): testutils.send_packet(self, net_port, packet) - nr_rcvd = testutils.count_matched_packets(self, inpacket, acc_port, timeout=0.5) + nr_rcvd = testutils.count_matched_packets(self, inpacket, acc_port, timeout=0.2) rv = nr_rcvd == self.nr out = "" if not rv: diff --git a/ansible/roles/test/tasks/vxlan-decap.yml b/ansible/roles/test/tasks/vxlan-decap.yml index eeb08f0da4f..4a002308858 100644 --- a/ansible/roles/test/tasks/vxlan-decap.yml +++ b/ansible/roles/test/tasks/vxlan-decap.yml @@ -34,10 +34,6 @@ shell: supervisorctl update delegate_to: "{{ ptf_host }}" - - name: Start arpresponder - supervisorctl: state=restarted name=arp_responder - delegate_to: "{{ ptf_host }}" - - name: Restart DUT. Wait 240 seconds after SONiC started ssh include: reboot.yml vars: @@ -54,9 +50,6 @@ template: src=vxlan_db.maps.json.j2 dest=/tmp/vxlan_db.maps.{{ item }}.json with_items: minigraph_vlans - - name: Wait for some time until arp cache is ready - pause: seconds=50 - - include: ptf_runner.yml vars: ptf_test_name: Vxlan decap test - No vxlan configuration @@ -92,7 +85,7 @@ - name: Remove vxlan tunnel maps configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL_MAP|tunnelVxlan|map{{ item }}" - with_items: "{{ minigraph_vlans }}" + with_items: minigraph_vlans - name: Remove vxlan tunnel configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL|tunnelVxlan" @@ -113,11 +106,7 @@ - always: - name: Remove vxlan tunnel maps configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL_MAP|tunnelVxlan|map{{ item }}" - with_items: "{{ minigraph_vlans }}" + with_items: minigraph_vlans - name: Remove vxlan tunnel configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL|tunnelVxlan" - - - name: Stop arpresponder - supervisorctl: state=stopped name=arp_responder - delegate_to: "{{ ptf_host }}" From c5527099d5f242c21698a5a45ec2cc3c56a0bc64 Mon Sep 17 00:00:00 2001 From: pavel-shirshov Date: Tue, 20 Aug 2019 17:25:20 -0700 Subject: [PATCH 089/218] [vm-set]: Add setting of RCVBUF default parameter (#1076) * Add setting of RCVBUF default parameter * MAke default smaller - 31Mb --- ansible/roles/vm_set/tasks/main.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ansible/roles/vm_set/tasks/main.yml b/ansible/roles/vm_set/tasks/main.yml index 22b502b8881..3a66c6d0729 100644 --- a/ansible/roles/vm_set/tasks/main.yml +++ b/ansible/roles/vm_set/tasks/main.yml @@ -96,13 +96,20 @@ - net.bridge.bridge-nf-call-ip6tables - net.bridge.bridge-nf-call-iptables -- name: Set sysctl RCVBUF parameter for testbed +- name: Set sysctl RCVBUF max parameter for testbed sysctl: name: "net.core.rmem_max" value: 509430500 sysctl_set: yes become: yes +- name: Set sysctl RCVBUF default parameter for testbed + sysctl: + name: "net.core.rmem_default" + value: 31457280 + sysctl_set: yes + become: yes + - name: Setup external front port include: external_port.yml when: external_port is defined From a06596725875850c8df7a0fa77800af2345f5296 Mon Sep 17 00:00:00 2001 From: Shuotian Cheng Date: Wed, 5 Jun 2019 15:58:24 -0700 Subject: [PATCH 090/218] [minigraph]: Add ERSPANV6 ACL slot (#935) Create MIRRORV6 ACL table by default Signed-off-by: Shu0T1an ChenG --- ansible/templates/minigraph_dpg.j2 | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ansible/templates/minigraph_dpg.j2 b/ansible/templates/minigraph_dpg.j2 index 266019087af..83fb41893b3 100644 --- a/ansible/templates/minigraph_dpg.j2 +++ b/ansible/templates/minigraph_dpg.j2 @@ -112,7 +112,12 @@ Everflow Everflow - + + ERSPANV6 + EverflowV6 + EverflowV6 + + VTY_LINE ssh-only SSH From 033aa4525b11b566728d842b3a333e4afa0321e8 Mon Sep 17 00:00:00 2001 From: Shuotian Cheng Date: Wed, 21 Aug 2019 10:32:52 -0700 Subject: [PATCH 091/218] [everflow]: Add Mellanox support due to specific GRE packet format (#1077) Mallnox uses a different GRE protocol 0x8949 and crafts extra information in the packet that we need to take care of specifically. Signed-off-by: Shu0T1an ChenG --- .../files/acstests/everflow_policer_test.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/files/acstests/everflow_policer_test.py b/ansible/roles/test/files/acstests/everflow_policer_test.py index b09e30a74e5..31249a244db 100644 --- a/ansible/roles/test/files/acstests/everflow_policer_test.py +++ b/ansible/roles/test/files/acstests/everflow_policer_test.py @@ -100,7 +100,16 @@ def checkMirroredFlow(self): """ @summary: Send traffic & check how many mirrored packets are received @return: count: number of mirrored packets received + + Note: + Mellanox crafts the GRE packets with extra information: + That is: 22 bytes extra information after the GRE header """ + payload = self.base_pkt + if self.asic_type in ["mellanox"]: + import binascii + payload = binascii.unhexlify("0"*44) + str(payload) # Add the padding + exp_pkt = testutils.simple_gre_packet( eth_src = self.router_mac, ip_src = self.session_src_ip, @@ -109,15 +118,21 @@ def checkMirroredFlow(self): ip_id = 0, #ip_flags = 0x10, # need to upgrade ptf version to support it ip_ttl = self.session_ttl, - inner_frame = self.base_pkt) + inner_frame = payload) - exp_pkt['GRE'].proto = 0x88be + if self.asic_type in ["mellanox"]: + exp_pkt['GRE'].proto = 0x8949 # Mellanox specific + else: + exp_pkt['GRE'].proto = 0x88be masked_exp_pkt = Mask(exp_pkt) masked_exp_pkt.set_do_not_care_scapy(scapy.Ether, "dst") masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "flags") masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "chksum") + if self.asic_type in ["mellanox"]: + masked_exp_pkt.set_do_not_care(304, 176) # Mask the Mellanox specific inner header + self.dataplane.flush() count = 0 From 68f7714405ce78cbce857670ed7eed1aebb085d1 Mon Sep 17 00:00:00 2001 From: Neetha John Date: Wed, 28 Aug 2019 15:22:41 -0700 Subject: [PATCH 092/218] [warm-reboot] Preboot sad path automation for n lag members (#1036) * Preboot sad path automation for n lag members Signed-off-by: Neetha John --- .../test/files/ptftests/advanced-reboot.py | 38 ++++-- ansible/roles/test/files/ptftests/arista.py | 15 ++- ansible/roles/test/files/ptftests/sad_path.py | 119 +++++++++++++----- .../advanced_reboot/validate_preboot_list.yml | 2 +- .../test/tasks/warm-reboot-multi-sad.yml | 13 +- 5 files changed, 142 insertions(+), 45 deletions(-) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 7a94bc4d4b6..9a015aa09ce 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -319,7 +319,7 @@ def get_portchannel_info(self): for member in content[key]['members']: for vm_key in self.vm_dut_map.keys(): if member in self.vm_dut_map[vm_key]['dut_ports']: - self.vm_dut_map[vm_key]['dut_portchannel'] = key + self.vm_dut_map[vm_key]['dut_portchannel'] = str(key) self.vm_dut_map[vm_key]['neigh_portchannel'] = 'Port-Channel1' break @@ -327,8 +327,8 @@ def get_neigh_port_info(self): content = self.read_json('neigh_port_info') for key in content.keys(): if content[key]['name'] in self.vm_dut_map.keys(): - self.vm_dut_map[content[key]['name']]['dut_ports'].append(key) - self.vm_dut_map[content[key]['name']]['neigh_ports'].append(content[key]['port']) + self.vm_dut_map[content[key]['name']]['dut_ports'].append(str(key)) + self.vm_dut_map[content[key]['name']]['neigh_ports'].append(str(content[key]['port'])) self.vm_dut_map[content[key]['name']]['ptf_ports'].append(self.port_indices[key]) def build_peer_mapping(self): @@ -355,6 +355,30 @@ def populate_fail_info(self, fails): self.fails[key] = set() self.fails[key] |= fails[key] + def get_preboot_info(self): + ''' + Prepares the msg string to log when a preboot_oper is defined. + preboot_oper can be represented in the following ways + eg. 'preboot_oper' - a single VM will be selected and preboot_oper will be applied to it + 'neigh_bgp_down:2' - 2 VMs will be selected and preboot_oper will be applied to the selected 2 VMs + 'neigh_lag_member_down:3:1' - this case is used for lag member down operation only. This indicates that + 3 VMs will be selected and 1 of the lag members in the porchannel will be brought down + ''' + msg = '' + if self.preboot_oper: + msg = 'Preboot oper: %s ' % self.preboot_oper + if ':' in self.preboot_oper: + oper_list = self.preboot_oper.split(':') + msg = 'Preboot oper: %s ' % oper_list[0] # extract the preboot oper_type + if len(oper_list) > 2: + # extract the number of VMs and the number of LAG members. preboot_oper will be of the form oper:no of VMS:no of lag members + msg += 'Number of sad path VMs: %s Lag member down in a portchannel: %s' % (oper_list[-2], oper_list[-1]) + else: + # extract the number of VMs. preboot_oper will be of the form oper:no of VMS + msg += 'Number of sad path VMs: %s' % oper_list[-1] + + return msg + def setUp(self): self.fails['dut'] = set() self.port_indices = self.read_port_indices() @@ -427,13 +451,7 @@ def setUp(self): self.generate_arp_ping_packet() if self.reboot_type == 'warm-reboot': - # get the number of members down for sad path - if self.preboot_oper: - if ':' in self.preboot_oper: - oper_type, cnt = self.preboot_oper.split(':') - else: - oper_type, cnt = self.preboot_oper, 1 - self.log("Preboot Oper: %s Number down: %s" % (oper_type, cnt)) + self.log(self.get_preboot_info()) # Pre-generate list of packets to be sent in send_in_background method. generate_start = datetime.datetime.now() diff --git a/ansible/roles/test/files/ptftests/arista.py b/ansible/roles/test/files/ptftests/arista.py index 04459417849..db967eb0bda 100644 --- a/ansible/roles/test/files/ptftests/arista.py +++ b/ansible/roles/test/files/ptftests/arista.py @@ -396,18 +396,23 @@ def verify_bgp_neigh_state(self, dut=None, state="Active"): self.fails.add('Verify BGP %s neighbor: Object missing in output' % ver) return self.fails, bgp_state - def change_neigh_lag_state(self, lag, is_up=True): + def change_neigh_lag_state(self, intf, is_up=True): state = ['shut', 'no shut'] self.do_cmd('configure') - is_match = re.match('(Port-Channel|Ethernet)\d+', lag) + is_match = re.match('(Port-Channel|Ethernet)\d+', intf) if is_match: - output = self.do_cmd('interface %s' % lag) + output = self.do_cmd('interface %s' % intf) if 'Invalid' not in output: self.do_cmd(state[is_up]) self.do_cmd('exit') - self.do_cmd('exit') + self.do_cmd('exit') + + def change_neigh_intfs_state(self, intfs, is_up=True): + for intf in intfs: + self.change_neigh_lag_state(intf, is_up=is_up) def verify_neigh_lag_state(self, lag, state="connected", pre_check=True): + states = state.split(',') lag_state = False msg_prefix = ['Postboot', 'Preboot'] is_match = re.match('(Port-Channel|Ethernet)\d+', lag) @@ -418,7 +423,7 @@ def verify_neigh_lag_state(self, lag, state="connected", pre_check=True): obj = json.loads(data) if 'interfaces' in obj and lag in obj['interfaces']: - lag_state = (obj['interfaces'][lag]['interfaceStatus'] == state) + lag_state = (obj['interfaces'][lag]['interfaceStatus'] in states) else: self.fails.add('%s: Verify LAG %s: Object missing in output' % (msg_prefix[pre_check], lag)) return self.fails, lag_state diff --git a/ansible/roles/test/files/ptftests/sad_path.py b/ansible/roles/test/files/ptftests/sad_path.py index 958e4be2e58..bf722d917f5 100644 --- a/ansible/roles/test/files/ptftests/sad_path.py +++ b/ansible/roles/test/files/ptftests/sad_path.py @@ -36,8 +36,9 @@ def revert(self): class SadPath(object): def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args): - (self.oper_type, self.cnt) = oper_type.split(':') if ':' in oper_type else (oper_type, 1) - self.cnt = int(self.cnt) + self.oper_type = '' + self.cnt = 1 + self.memb_cnt = 0 self.vm_list = vm_list self.portchannel_ports = portchannel_ports self.vm_dut_map = vm_dut_map @@ -50,6 +51,21 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args) self.log = [] self.fails = dict() self.fails['dut'] = set() + self.tot_memb_cnt = 0 + self.memb_index = 0 + self.extract_oper_info(oper_type) + + def extract_oper_info(self, oper_type): + if oper_type and ':' in oper_type: + temp = oper_type.split(':') + self.oper_type = temp[0] + # get number of VMs where the sad pass oper needs to be done + self.cnt = int(temp[1]) + if len(temp) > 2: + # get the number of lag members in a portchannel that should be brought down + self.memb_cnt = int(temp[-1]) + else: + self.oper_type = oper_type def cmd(self, cmds): process = subprocess.Popen(cmds, @@ -74,7 +90,7 @@ def select_vm(self): else: self.neigh_vms.extend(self.vm_list[vm_index:]) self.neigh_vms.extend(self.vm_list[0:exceed_len]) - self.vm_list = self.vm_list[exceed_len:vm_len - self.cnt] + self.vm_list = self.vm_list[exceed_len:exceed_len + vm_len - self.cnt] def get_neigh_name(self): for key in self.vm_dut_map: @@ -101,11 +117,25 @@ def vm_disconnect(self): for vm in self.vm_handles: self.vm_handles[vm].disconnect() + def select_member(self): + # select index of lag member to put down + if self.tot_memb_cnt != 0: + self.memb_index = datetime.datetime.now().day % self.tot_memb_cnt + def setup(self): self.select_vm() self.get_neigh_name() self.down_neigh_port() self.vm_connect() + + # decide if its all member down or few members down for lag member oper type + if 'member' in self.oper_type: + self.tot_memb_cnt = len(self.vm_dut_map[self.neigh_names.values()[0]]['dut_ports']) + if self.memb_cnt == 0: + self.memb_cnt = self.tot_memb_cnt + if self.tot_memb_cnt != self.memb_cnt: + self.select_member() + for vm in self.vm_handles: self.neigh_bgps[vm], self.dut_bgps[vm] = self.vm_handles[vm].get_bgp_info() self.fails[vm] = set() @@ -128,9 +158,11 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, self.dut_ssh = dut_ssh self.dut_needed = dict() self.lag_members_down = dict() + self.neigh_lag_members_down = dict() self.neigh_lag_state = None self.po_neigh_map = dict() self.msg_prefix = ['Postboot', 'Preboot'] + self.memb_str = 'member' if 'member' in self.oper_type else '' def populate_bgp_state(self): [self.dut_needed.setdefault(vm, self.dut_bgps[vm]) for vm in self.neigh_vms] @@ -141,11 +173,11 @@ def populate_bgp_state(self): elif self.oper_type == 'dut_bgp_down': self.neigh_bgps['changed_state'] = 'Active' self.dut_bgps['changed_state'] = 'Idle' - elif self.oper_type == 'neigh_lag_down': + elif 'neigh_lag' in self.oper_type: # on the DUT side, bgp states are different pre and post boot. hence passing multiple values self.neigh_bgps['changed_state'] = 'Idle' self.dut_bgps['changed_state'] = 'Connect,Active,Idle' - elif self.oper_type == 'dut_lag_down': + elif 'dut_lag' in self.oper_type: self.neigh_bgps['changed_state'] = 'Idle' self.dut_bgps['changed_state'] = 'Active,Connect,Idle' @@ -169,13 +201,22 @@ def sad_setup(self, is_up=True): time.sleep(30) elif 'lag' in self.oper_type: - self.log.append('LAG state change will be for %s' % ", ".join(self.neigh_vms)) - if self.oper_type == 'neigh_lag_down': + self.log.append('LAG %s state change will be for %s' % (self.memb_str, ", ".join(self.neigh_vms))) + if 'neigh_lag' in self.oper_type: for vm in self.neigh_vms: - self.log.append('Changing state of LAG %s to shut' % self.vm_dut_map[self.neigh_names[vm]]['neigh_portchannel']) - self.vm_handles[vm].change_neigh_lag_state(self.vm_dut_map[self.neigh_names[vm]]['neigh_portchannel'], is_up=is_up) - elif self.oper_type == 'dut_lag_down': + + # populate entity to be brought down on neigh end (portchannel/portchannel members) + if 'member' in self.oper_type: + down_intfs = self.neigh_lag_members_down[self.neigh_names[vm]] + else: + down_intfs = [self.vm_dut_map[self.neigh_names[vm]]['neigh_portchannel']] + + self.log.append('Changing state of LAG %s %s to shut' % (self.memb_str, ", ".join(down_intfs))) + self.vm_handles[vm].change_neigh_intfs_state(down_intfs, is_up=is_up) + + elif 'dut_lag' in self.oper_type: self.change_dut_lag_state(is_up=is_up) + # wait for sometime for lag members state to sync time.sleep(120) @@ -234,30 +275,47 @@ def sad_bgp_verify(self): else: self.fails['dut'].add('BGP state not down on DUT') + def populate_lag_member_down(self, neigh_name): + po_name = self.vm_dut_map[neigh_name]['dut_portchannel'] + # build DUT portchannel to down members mapping and neigh name to down members mapping + # if only single member is down, extract the member and convert it into list otherwise assign the list directly + if self.tot_memb_cnt != self.memb_cnt: + self.lag_members_down[po_name] = [self.vm_dut_map[neigh_name]['dut_ports'][self.memb_index]] + self.neigh_lag_members_down[neigh_name] = [self.vm_dut_map[neigh_name]['neigh_ports'][self.memb_index]] + else: + self.lag_members_down[po_name] = self.vm_dut_map[neigh_name]['dut_ports'] + self.neigh_lag_members_down[neigh_name] = self.vm_dut_map[neigh_name]['neigh_ports'] + def populate_lag_state(self): - if self.oper_type == 'neigh_lag_down': - self.neigh_lag_state = 'disabled' - elif self.oper_type == 'dut_lag_down': + if 'neigh_lag' in self.oper_type: + self.neigh_lag_state = 'disabled,notconnect' + elif 'dut_lag' in self.oper_type: self.neigh_lag_state = 'notconnect' for neigh_name in self.neigh_names.values(): - # build portchannel to down members mapping - po_name = self.vm_dut_map[neigh_name]['dut_portchannel'] - self.lag_members_down[po_name] = self.vm_dut_map[neigh_name]['dut_ports'] + self.populate_lag_member_down(neigh_name) def change_dut_lag_state(self, is_up=True): state = ['shutdown', 'startup'] for neigh_name in self.neigh_names.values(): dut_portchannel = self.vm_dut_map[neigh_name]['dut_portchannel'] - if not re.match('(PortChannel|Ethernet)\d+', dut_portchannel): continue - self.log.append('Changing state of %s from DUT side to %s' % (dut_portchannel, state[is_up])) - stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], dut_portchannel)]) - if return_code != 0: - self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], dut_portchannel)) - self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code)) - self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[1 - is_up], stderr)) + + # populate the entity that needs to be brought down (portchannel or portchannel member) + if 'member' in self.oper_type: + down_intfs = self.lag_members_down[dut_portchannel] else: - self.log.append('%s: State change successful on DUT for %s' % (self.msg_prefix[1 - is_up], dut_portchannel)) + down_intfs = [dut_portchannel] + + for intf in down_intfs: + if not re.match('(PortChannel|Ethernet)\d+', intf): continue + self.log.append('Changing state of %s from DUT side to %s' % (intf, state[is_up])) + stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], intf)]) + if return_code != 0: + self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], intf)) + self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code)) + self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[1 - is_up], stderr)) + else: + self.log.append('State change successful on DUT for %s' % intf) def verify_dut_lag_member_state(self, match, pre_check=True): success = True @@ -265,10 +323,15 @@ def verify_dut_lag_member_state(self, match, pre_check=True): lag_memb_output = match.group(2) neigh_name = self.po_neigh_map[po_name] for member in self.vm_dut_map[neigh_name]['dut_ports']: - if po_name in self.lag_members_down and member in self.lag_members_down[po_name]: - search_str = '%s(D)' % member - else: - search_str = '%s(S)' % member + # default state for the lag member + search_str = '%s(S)' % member + + if po_name in self.lag_members_down: + if member in self.lag_members_down[po_name]: + search_str = '%s(D)' % member + # single member case. state of non down member of the down portchannel + elif self.tot_memb_cnt != self.memb_cnt: + search_str = '%s(S*)' % member if lag_memb_output.find(search_str) != -1: self.log.append('Lag member %s state as expected' % member) diff --git a/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml b/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml index bf6f88f113d..5262b0b3172 100644 --- a/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml +++ b/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml @@ -1,5 +1,5 @@ - set_fact: - item_cnt: "{{ item.split(':')[1]|int }}" + item_cnt: "{{ item.split(':')[-1]|int }}" host_max_len: "{{ vm_hosts|length - 1 }}" member_max_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}" diff --git a/ansible/roles/test/tasks/warm-reboot-multi-sad.yml b/ansible/roles/test/tasks/warm-reboot-multi-sad.yml index 9555da8ca35..292a5684a94 100644 --- a/ansible/roles/test/tasks/warm-reboot-multi-sad.yml +++ b/ansible/roles/test/tasks/warm-reboot-multi-sad.yml @@ -3,9 +3,20 @@ reboot_limit: 1 when: reboot_limit is not defined +# preboot_list format is 'preboot oper type:number of VMS down:number of lag members down'. for non lag member cases, this parameter will be skipped +- name: Set vars + set_fact: + pre_list: ['neigh_bgp_down:2', 'dut_bgp_down:3', 'dut_lag_down:2', 'neigh_lag_down:3', 'dut_lag_member_down:3:1', 'neigh_lag_member_down:2:1'] + lag_memb_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}" + +- name: Add all lag member down case + set_fact: + pre_list: "{{ pre_list + ['dut_lag_member_down:2:{{ lag_memb_cnt }}', 'neigh_lag_member_down:3:{{ lag_memb_cnt }}']}}" + when: testbed_type in ['t0-64', 't0-116', 't0-64-32'] + - name: Warm-reboot test include: advanced-reboot.yml vars: reboot_type: warm-reboot - preboot_list: ['neigh_bgp_down:2', 'dut_bgp_down:3', 'dut_lag_down:2', 'neigh_lag_down:3'] + preboot_list: "{{ pre_list }}" preboot_files: "peer_dev_info,neigh_port_info" From 9df3e359ccd3f3978f08864540ef09bd2e599ab9 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Thu, 5 Sep 2019 07:15:39 +0800 Subject: [PATCH 093/218] Backport pytest infra and scripts to 201811 branch (#1080) * Backport pytest infra and scripts to 201811 branch * Update the platform testing scripts for 201811 branch Signed-off-by: Xin Wang --- ansible/library/conn_graph_facts.py | 36 +-- tests/ansible_fixtures.py | 31 ++ tests/ansible_host.py | 43 +++ tests/bgp_speaker/announce_routes.py | 24 ++ tests/bgp_speaker/bgp_speaker_route.j2 | 3 + tests/bgp_speaker/config.j2 | 24 ++ tests/bgp_speaker/dump.py | 16 ++ tests/bgp_speaker/http_api.py | 16 ++ tests/bgp_speaker/routes.j2 | 3 + tests/bgp_speaker/start.j2 | 20 ++ tests/common/__init__.py | 0 tests/common/devices.py | 195 +++++++++++++ tests/common/errors.py | 8 + tests/common/mellanox_data.py | 142 +++++++++ tests/common/utilities.py | 52 ++++ tests/conftest.py | 123 ++++++++ tests/eos | 1 + tests/fdb/conftest.py | 0 tests/fdb/test_fdb.py | 159 +++++++++++ tests/platform/check_critical_services.py | 27 ++ tests/platform/check_interface_status.py | 57 ++++ tests/platform/check_transceiver_status.py | 119 ++++++++ tests/platform/mellanox/check_sysfs.py | 117 ++++++++ .../mellanox/mellanox_psu_controller.py | 269 ++++++++++++++++++ tests/platform/mellanox/test_check_sysfs.py | 25 ++ tests/platform/platform_fixtures.py | 11 + tests/platform/psu_controller.py | 113 ++++++++ tests/platform/test_platform_info.py | 177 ++++++++++++ tests/platform/test_reboot.py | 115 ++++++++ tests/platform/test_reload_config.py | 55 ++++ tests/platform/test_sequential_restart.py | 73 +++++ tests/platform/test_sfp.py | 166 +++++++++++ tests/platform/test_xcvr_info_in_db.py | 22 ++ tests/ptf_fixtures.py | 67 +++++ tests/ptf_runner.py | 22 ++ tests/ptfadapter/README.md | 47 +++ tests/ptfadapter/__init__.py | 3 + tests/ptfadapter/ptfadapter.py | 91 ++++++ .../templates/ptf_nn_agent.conf.ptf.j2 | 11 + tests/ptftests | 1 + tests/scripts/arp_responder.conf.j2 | 10 + tests/scripts/arp_responder.py | 171 +++++++++++ tests/scripts/change_mac.sh | 13 + tests/scripts/remove_ip.sh | 7 + tests/setup.cfg | 2 + tests/test_bgp_fact.py | 22 ++ tests/test_bgp_speaker.py | 156 ++++++++++ tests/test_lldp.py | 62 ++++ tests/veos.vtb | 35 +++ tests/vtestbed.csv | 3 + 50 files changed, 2949 insertions(+), 16 deletions(-) create mode 100644 tests/ansible_fixtures.py create mode 100644 tests/ansible_host.py create mode 100644 tests/bgp_speaker/announce_routes.py create mode 100644 tests/bgp_speaker/bgp_speaker_route.j2 create mode 100644 tests/bgp_speaker/config.j2 create mode 100644 tests/bgp_speaker/dump.py create mode 100644 tests/bgp_speaker/http_api.py create mode 100644 tests/bgp_speaker/routes.j2 create mode 100644 tests/bgp_speaker/start.j2 create mode 100644 tests/common/__init__.py create mode 100644 tests/common/devices.py create mode 100644 tests/common/errors.py create mode 100644 tests/common/mellanox_data.py create mode 100644 tests/common/utilities.py create mode 100644 tests/conftest.py create mode 100644 tests/eos create mode 100644 tests/fdb/conftest.py create mode 100644 tests/fdb/test_fdb.py create mode 100644 tests/platform/check_critical_services.py create mode 100644 tests/platform/check_interface_status.py create mode 100644 tests/platform/check_transceiver_status.py create mode 100644 tests/platform/mellanox/check_sysfs.py create mode 100644 tests/platform/mellanox/mellanox_psu_controller.py create mode 100644 tests/platform/mellanox/test_check_sysfs.py create mode 100644 tests/platform/platform_fixtures.py create mode 100644 tests/platform/psu_controller.py create mode 100644 tests/platform/test_platform_info.py create mode 100644 tests/platform/test_reboot.py create mode 100644 tests/platform/test_reload_config.py create mode 100644 tests/platform/test_sequential_restart.py create mode 100644 tests/platform/test_sfp.py create mode 100644 tests/platform/test_xcvr_info_in_db.py create mode 100644 tests/ptf_fixtures.py create mode 100644 tests/ptf_runner.py create mode 100644 tests/ptfadapter/README.md create mode 100644 tests/ptfadapter/__init__.py create mode 100644 tests/ptfadapter/ptfadapter.py create mode 100644 tests/ptfadapter/templates/ptf_nn_agent.conf.ptf.j2 create mode 100644 tests/ptftests create mode 100644 tests/scripts/arp_responder.conf.j2 create mode 100644 tests/scripts/arp_responder.py create mode 100644 tests/scripts/change_mac.sh create mode 100644 tests/scripts/remove_ip.sh create mode 100644 tests/setup.cfg create mode 100644 tests/test_bgp_fact.py create mode 100644 tests/test_bgp_speaker.py create mode 100644 tests/test_lldp.py create mode 100644 tests/veos.vtb create mode 100644 tests/vtestbed.csv diff --git a/ansible/library/conn_graph_facts.py b/ansible/library/conn_graph_facts.py index e1894954dd4..0db38699f96 100644 --- a/ansible/library/conn_graph_facts.py +++ b/ansible/library/conn_graph_facts.py @@ -12,7 +12,7 @@ DOCUMENTATION=''' module: conn_graph_facts.py version_added: 2.0 -short_description: Retrive lab fanout switches physical and vlan connections +short_description: Retrive lab fanout switches physical and vlan connections Description: Retrive lab fanout switches physical and vlan connections add to Ansible facts @@ -23,7 +23,7 @@ Ansible_facts: device_info: The device(host) type and hwsku device_conn: each physical connection of the device(host) - device_vlan_range: all configured vlan range for the device(host) + device_vlan_range: all configured vlan range for the device(host) device_port_vlans: detailed vlanids for each physical port and switchport mode server_links: each server port vlan ids @@ -34,35 +34,35 @@ return: "device_info": { - "ManagementIp": "10.251.0.76/24", - "HwSku": "Arista-7260QX-64", + "ManagementIp": "10.251.0.76/24", + "HwSku": "Arista-7260QX-64", "Type": "FanoutLeaf" }, "device_conn": [ { - "StartPort": "Ethernet0", - "EndPort": "Ethernet33", - "StartDevice": "str-s6000-on-1", - "VlanID": "233", - "BandWidth": "40000", - "VlanMode": "Access", + "StartPort": "Ethernet0", + "EndPort": "Ethernet33", + "StartDevice": "str-s6000-on-1", + "VlanID": "233", + "BandWidth": "40000", + "VlanMode": "Access", "EndDevice": "str-7260-01" }, {...} ], "device_vlan_range": { "VlanRange": "201-980,1041-1100" - }, + }, "device_vlan_port:=: { ... "Ethernet44": { - "vlanids": "801-860", + "vlanids": "801-860", "mode": "Trunk" - }, + }, "Ethernet42": { - "vlanids": "861-920", + "vlanids": "861-920", "mode": "Trunk" - },...... + },...... } @@ -240,7 +240,11 @@ def main(): m_args = module.params hostname = m_args['host'] try: - lab_graph = Parse_Lab_Graph(LAB_GRAPHFILE_PATH+LAB_CONNECTION_GRAPH_FILE) + if m_args['filename']: + filename = m_args['filename'] + else: + filename = LAB_GRAPHFILE_PATH + LAB_CONNECTION_GRAPH_FILE + lab_graph = Parse_Lab_Graph(filename) lab_graph.parse_graph() dev = lab_graph.get_host_device_info(hostname) if dev is None: diff --git a/tests/ansible_fixtures.py b/tests/ansible_fixtures.py new file mode 100644 index 00000000000..ac31dd27012 --- /dev/null +++ b/tests/ansible_fixtures.py @@ -0,0 +1,31 @@ +""" This module provides few pytest-ansible fixtures overridden """ + +import pytest + +# Here we override ansible_adhoc fixture from pytest-ansible plugin to overcome +# scope limitation issue; since we want to be able to use ansible_adhoc in module/class scope +# fixtures we have to override the scope here in global conftest.py +# Let's have it with module scope for now, so if something really breaks next test module run will have +# this fixture reevaluated +@pytest.fixture(scope='module') +def ansible_adhoc(request): + """Return an inventory initialization method.""" + plugin = request.config.pluginmanager.getplugin("ansible") + + def init_host_mgr(**kwargs): + return plugin.initialize(request.config, request, **kwargs) + return init_host_mgr + + +# Same as for ansible_adhoc, let's have localhost fixture with session scope +# as it feels that during session run the localhost object should persist unchanged. +# Also, we have autouse=True here to force pytest to evaluate localhost fixture to overcome +# some hidden dependency between localhost and ansible_adhoc (even with default scope) (FIXME) +@pytest.fixture(scope='session', autouse=True) +def localhost(request): + """Return a host manager representing localhost.""" + # NOTE: Do not use ansible_adhoc as a dependent fixture since that will assert specific command-line parameters have + # been supplied. In the case of localhost, the parameters are provided as kwargs below. + plugin = request.config.pluginmanager.getplugin("ansible") + return plugin.initialize(request.config, request, inventory='localhost,', connection='local', + host_pattern='localhost').localhost diff --git a/tests/ansible_host.py b/tests/ansible_host.py new file mode 100644 index 00000000000..ff1f8ad79af --- /dev/null +++ b/tests/ansible_host.py @@ -0,0 +1,43 @@ +from ansible.plugins import callback_loader +from ansible.errors import AnsibleError + +def dump_ansible_results(results, stdout_callback='yaml'): + cb = callback_loader.get(stdout_callback) + return cb._dump_results(results) if cb else results + +class AnsibleModuleException(AnsibleError): + + """Sub-class AnsibleError when module exceptions occur.""" + + def __init__(self, msg, results=None): + super(AnsibleModuleException, self).__init__(msg) + self.results = results + + def __str__(self): + return "{}\nAnsible Results => {}".format(self.message, dump_ansible_results(self.results)) + +class AnsibleHost(object): + """ wrapper for ansible host object """ + + def __init__(self, ansible_adhoc, hostname, is_local=False): + if is_local: + self.host = ansible_adhoc(inventory='localhost', connection='local')[hostname] + else: + self.host = ansible_adhoc(become=True)[hostname] + self.hostname = hostname + + def __getattr__(self, item): + self.module_name = item + self.module = getattr(self.host, item) + + return self._run + + def _run(self, *module_args, **complex_args): + + module_ignore_errors = complex_args.pop('module_ignore_errors', False) + + res = self.module(*module_args, **complex_args)[self.hostname] + if res.is_failed and not module_ignore_errors: + raise AnsibleModuleException("run module {} failed".format(self.module_name), res) + + return res diff --git a/tests/bgp_speaker/announce_routes.py b/tests/bgp_speaker/announce_routes.py new file mode 100644 index 00000000000..e13849ac5b3 --- /dev/null +++ b/tests/bgp_speaker/announce_routes.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python + +import cPickle +import os +import time +import sys + +with open(sys.argv[1]) as f: + routes = f.readlines() + +routes=[x.strip() for x in routes] +ports = set() + +for route in routes: + [command, port] = route.split(";") + port = port.strip() + ports.add(port) + os.system('curl -s --form "command=%s" http://localhost:%s/' % (command, port)) + +for n in range(0, 20): + time.sleep(10) + for port in ports: + os.system('curl -s --form "command=flush route" http://localhost:%s/' % port) + diff --git a/tests/bgp_speaker/bgp_speaker_route.j2 b/tests/bgp_speaker/bgp_speaker_route.j2 new file mode 100644 index 00000000000..1403d5042fa --- /dev/null +++ b/tests/bgp_speaker/bgp_speaker_route.j2 @@ -0,0 +1,3 @@ +0.0.0.0/0 {% for portchannel, v in minigraph_portchannels.iteritems() %}[{% for member in v.members %}{{ '%d' % minigraph_port_indices[member]}}{% if not loop.last %} {% endif %}{% endfor %}]{% if not loop.last %} {% endif %}{% endfor %} + +{{announce_prefix}} {% for vlan, v in minigraph_vlans.iteritems() %}{% for member in v.members %}[{{ '%d' % minigraph_port_indices[member]}}]{% if not loop.last %} {% endif %}{% endfor %}{% if not loop.last %} {% endif %}{% endfor %} diff --git a/tests/bgp_speaker/config.j2 b/tests/bgp_speaker/config.j2 new file mode 100644 index 00000000000..e890cd669e4 --- /dev/null +++ b/tests/bgp_speaker/config.j2 @@ -0,0 +1,24 @@ +group exabgp { + process dump { + encoder json; + receive { + parsed; + update; + } + run /usr/bin/python {{ helper_dir }}/dump.py; + } + + process http-api { + run /usr/bin/python {{ helper_dir }}/http_api.py {{ port_num[cidx] }}; + } + + neighbor {{ lo_addr }} { + router-id {{ speaker_ip }}; + local-address {{ speaker_ip }}; + peer-as {{ peer_asn }}; + local-as {{ my_asn }}; + auto-flush false; + group-updates true; + } + +} diff --git a/tests/bgp_speaker/dump.py b/tests/bgp_speaker/dump.py new file mode 100644 index 00000000000..4bca81fe6a7 --- /dev/null +++ b/tests/bgp_speaker/dump.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python + +from sys import stdin +import json +import os +import sys + +while True: + try: + line = stdin.readline() + obj = json.loads(line) + f = open("/root/exabgp/" + obj["neighbor"]["ip"], "a") + print >> f, line, + f.close() + except: + continue diff --git a/tests/bgp_speaker/http_api.py b/tests/bgp_speaker/http_api.py new file mode 100644 index 00000000000..fd719b28c86 --- /dev/null +++ b/tests/bgp_speaker/http_api.py @@ -0,0 +1,16 @@ +from flask import Flask, request +import sys + +app = Flask(__name__) + +# Setup a command route to listen for prefix advertisements +@app.route('/', methods=['POST']) +def run_command(): + command = request.form['command'] + sys.stdout.write('%s\n' % command) + sys.stdout.flush() + return 'OK\n' + +if __name__ == '__main__': + app.run(port=sys.argv[1]) + diff --git a/tests/bgp_speaker/routes.j2 b/tests/bgp_speaker/routes.j2 new file mode 100644 index 00000000000..01768f948ad --- /dev/null +++ b/tests/bgp_speaker/routes.j2 @@ -0,0 +1,3 @@ +neighbor {{ lo_addr }} announce route {{ announce_prefix }} next-hop {{ vlan_ips[1].split('/')[0] }};{{ port_num[0] }} +neighbor {{ lo_addr }} announce route {{ announce_prefix }} next-hop {{ vlan_ips[2].split('/')[0] }};{{ port_num[1] }} +neighbor {{ lo_addr }} announce route {{ peer_range }} next-hop {{ vlan_ips[0].split('/')[0] }};{{ port_num[2] }} diff --git a/tests/bgp_speaker/start.j2 b/tests/bgp_speaker/start.j2 new file mode 100644 index 00000000000..45ff79cbc62 --- /dev/null +++ b/tests/bgp_speaker/start.j2 @@ -0,0 +1,20 @@ +#!/bin/bash -ex + +ifconfig eth{{ '%d' % (vlan_ports[0]) }} {{ vlan_ips[0] }} +ifconfig eth{{ '%d' % (vlan_ports[0]) }}:0 {{ speaker_ips[0] }} +ifconfig eth{{ '%d' % (vlan_ports[0]) }}:1 {{ speaker_ips[1] }} + +{% set intf = 'eth%d' % (vlan_ports[1]) %} +ifconfig {{intf}} {{ vlan_ips[1] }} +# i=0; until [ $i -eq 10 ] || ping {{ vlan_addr }} -I {{intf}} -c 1 >/dev/null 2>&1; do i=`expr $i + 1`; done & + +{% set intf = 'eth%d' % (vlan_ports[2]) %} +ifconfig {{intf}} {{ vlan_ips[2] }} +# i=0; until [ $i -eq 10 ] || ping {{ vlan_addr }} -I {{intf}} -c 1 >/dev/null 2>&1; do i=`expr $i + 1`; done & + +ip route flush {{ lo_addr }}/{{ lo_addr_prefixlen }} +ip route add {{ lo_addr }}/{{ lo_addr_prefixlen }} via {{ vlan_addr }} + +env exabgp.daemon.user=root nohup exabgp {{ exabgp_dir }}/{{ cfnames[0] }} >/dev/null 2>&1 & +env exabgp.daemon.user=root nohup exabgp {{ exabgp_dir }}/{{ cfnames[1] }} >/dev/null 2>&1 & +env exabgp.daemon.user=root nohup exabgp {{ exabgp_dir }}/{{ cfnames[2] }} >/dev/null 2>&1 & diff --git a/tests/common/__init__.py b/tests/common/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/common/devices.py b/tests/common/devices.py new file mode 100644 index 00000000000..9aac450cbb0 --- /dev/null +++ b/tests/common/devices.py @@ -0,0 +1,195 @@ +""" +Classes for various devices that may be used in testing. + +There are other options for interacting with the devices used in testing, for example netmiko, fabric. +We have a big number of customized ansible modules in the sonic-mgmt/ansible/library folder. To reused these +modules, we have no other choice, at least for interacting with SONiC, localhost and PTF. + +We can consider using netmiko for interacting with the VMs used in testing. +""" +import json +import logging +from multiprocessing import Process, Queue + +from errors import RunAnsibleModuleFail +from errors import UnsupportedAnsibleModule + +class AnsibleHostBase(object): + """ + @summary: The base class for various objects. + + This class filters an object from the ansible_adhoc fixture by hostname. The object can be considered as an + ansible host object although it is not under the hood. Anyway, we can use this object to run ansible module + on the host. + """ + + def __init__(self, ansible_adhoc, hostname): + if hostname == 'localhost': + self.host = ansible_adhoc(inventory='localhost', connection='local', host_pattern=hostname)[hostname] + else: + self.host = ansible_adhoc(become=True)[hostname] + self.hostname = hostname + + def __getattr__(self, item): + if self.host.has_module(item): + self.module_name = item + self.module = getattr(self.host, item) + + return self._run + else: + raise UnsupportedAnsibleModule("Unsupported module") + + def _run(self, *module_args, **complex_args): + module_ignore_errors = complex_args.pop('module_ignore_errors', False) + module_async = complex_args.pop('module_async', False) + + if module_async: + q = Queue() + def run_module(queue, module_args, complex_args): + res = self.module(*module_args, **complex_args) + q.put(res[self.hostname]) + p = Process(target=run_module, args=(q, module_args, complex_args)) + p.start() + return p, q + + res = self.module(*module_args, **complex_args)[self.hostname] + if res.is_failed and not module_ignore_errors: + raise RunAnsibleModuleFail("run module {} failed, errmsg {}".format(self.module_name, res)) + + return res + + +class Localhost(AnsibleHostBase): + """ + @summary: Class for localhost + + For running ansible module on localhost + """ + def __init__(self, ansible_adhoc): + AnsibleHostBase.__init__(self, ansible_adhoc, "localhost") + + +class PTFHost(AnsibleHostBase): + """ + @summary: Class for PTF + + Instance of this class can run ansible modules on the PTF host. + """ + def __init__(self, ansible_adhoc, hostname): + AnsibleHostBase.__init__(self, ansible_adhoc, hostname) + + # TODO: Add a method for running PTF script + + +class SonicHost(AnsibleHostBase): + """ + @summary: Class for SONiC switch + + For running ansible module on the SONiC switch + """ + CRITICAL_SERVICES = ["swss", "syncd", "database", "teamd", "bgp", "pmon", "lldp"] + + def __init__(self, ansible_adhoc, hostname, gather_facts=False): + AnsibleHostBase.__init__(self, ansible_adhoc, hostname) + if gather_facts: + self.gather_facts() + + def _platform_info(self): + platform_info = self.command("show platform summary")["stdout_lines"] + for line in platform_info: + if line.startswith("Platform:"): + self.facts["platform"] = line.split(":")[1].strip() + elif line.startswith("HwSKU:"): + self.facts["hwsku"] = line.split(":")[1].strip() + elif line.startswith("ASIC:"): + self.facts["asic_type"] = line.split(":")[1].strip() + + def gather_facts(self): + """ + @summary: Gather facts of the SONiC switch and store the gathered facts in the dict type 'facts' attribute. + """ + self.facts = {} + self._platform_info() + logging.debug("SonicHost facts: %s" % json.dumps(self.facts)) + + def get_service_props(self, service, props=["ActiveState", "SubState"]): + """ + @summary: Use 'systemctl show' command to get detailed properties of a service. By default, only get + ActiveState and SubState of the service. + @param service: Service name. + @param props: Properties of the service to be shown. + @return: Returns a dictionary containing properties of the specified service, for example: + { + "ActivateState": "active", + "SubState": "running" + } + """ + props = " ".join(["-p %s" % prop for prop in props]) + output = self.command("systemctl %s show %s" % (props, service)) + result = {} + for line in output["stdout_lines"]: + fields = line.split("=") + if len(fields) >= 2: + result[fields[0]] = fields[1] + return result + + def is_service_fully_started(self, service): + """ + @summary: Check whether a SONiC specific service is fully started. + + The last step in the starting script of all SONiC services is to run "docker wait ". This command + will not exit unless the docker container of the service is stopped. We use this trick to determine whether + a SONiC service has completed starting. + + @param service: Name of the SONiC service + """ + try: + output = self.command('pgrep -f "docker wait %s"' % service) + if output["stdout_lines"]: + return True + else: + return False + except: + return False + + def critical_services_fully_started(self): + """ + @summary: Check whether all the SONiC critical services have started + """ + result = {} + for service in self.CRITICAL_SERVICES: + result[service] = self.is_service_fully_started(service) + + logging.debug("Status of critical services: %s" % str(result)) + return all(result.values()) + + + def get_crm_resources(self): + """ + @summary: Run the "crm show resources all" command and parse its output + """ + result = {"main_resources": {}, "acl_resources": [], "table_resources": []} + output = self.command("crm show resources all")["stdout_lines"] + current_table = 0 # Totally 3 tables in the command output + for line in output: + if len(line.strip()) == 0: + continue + if "---" in line: + current_table += 1 + continue + if current_table == 1: # content of first table, main resources + fields = line.split() + if len(fields) == 3: + result["main_resources"][fields[0]] = {"used": int(fields[1]), "available": int(fields[2])} + if current_table == 2: # content of the second table, acl resources + fields = line.split() + if len(fields) == 5: + result["acl_resources"].append({"stage": fields[0], "bind_point": fields[1], + "resource_name": fields[2], "used_count": int(fields[3]), "available_count": int(fields[4])}) + if current_table == 3: # content of the third table, table resources + fields = line.split() + if len(fields) == 4: + result["table_resources"].append({"table_id": fields[0], "resource_name": fields[1], + "used_count": int(fields[2]), "available_count": int(fields[3])}) + + return result diff --git a/tests/common/errors.py b/tests/common/errors.py new file mode 100644 index 00000000000..25a2397a6df --- /dev/null +++ b/tests/common/errors.py @@ -0,0 +1,8 @@ +""" +Customize exceptions +""" +class UnsupportedAnsibleModule(Exception): + pass + +class RunAnsibleModuleFail(Exception): + pass diff --git a/tests/common/mellanox_data.py b/tests/common/mellanox_data.py new file mode 100644 index 00000000000..7ef9aa424bd --- /dev/null +++ b/tests/common/mellanox_data.py @@ -0,0 +1,142 @@ + +SPC1_HWSKUS = ["ACS-MSN2700", "Mellanox-SN2700", "ACS-MSN2740", "ACS-MSN2100", "ACS-MSN2410", "ACS-MSN2010"] +SPC2_HWSKUS = ["ACS-MSN3700", "ACS-MSN3700C", "ACS-MSN3800"] +SWITCH_HWSKUS = SPC1_HWSKUS + SPC2_HWSKUS + +SWITCH_MODELS = { + "ACS-MSN2700": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": True + }, + "fans": { + "number": 4, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + } + }, + "ACS-MSN2740": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": False + }, + "fans": { + "number": 4, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + } + }, + "ACS-MSN2410": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": True + }, + "fans": { + "number": 4, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + } + }, + "ACS-MSN2010": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": False + }, + "fans": { + "number": 4, + "hot_swappable": False + }, + "psus": { + "number": 2, + "hot_swappable": False + } + }, + "ACS-MSN2100": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": False + }, + "fans": { + "number": 4, + "hot_swappable": False + }, + "psus": { + "number": 2, + "hot_swappable": False + } + }, + "ACS-MSN3800": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": False + }, + "fans": { + "number": 3, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + } + }, + "ACS-MSN3700": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": False + }, + "fans": { + "number": 6, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + } + }, + "ACS-MSN3700C": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": False + }, + "fans": { + "number": 4, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + } + }, + "ACS-MSN3510": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": False + }, + "fans": { + "number": 6, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + } + } +} diff --git a/tests/common/utilities.py b/tests/common/utilities.py new file mode 100644 index 00000000000..85a32204352 --- /dev/null +++ b/tests/common/utilities.py @@ -0,0 +1,52 @@ +""" +Utility functions can re-used in testing scripts. +""" +import time +import logging + + +def wait(seconds, msg=""): + """ + @summary: Pause specified number of seconds + @param seconds: Number of seconds to pause + @param msg: Optional extra message for pause reason + """ + logging.debug("Pause %d seconds, reason: %s" % (seconds, msg)) + time.sleep(seconds) + + +def wait_until(timeout, interval, condition, *args, **kwargs): + """ + @summary: Wait until the specified condition is True or timeout. + @param timeout: Maximum time to wait + @param interval: Poll interval + @param condition: A function that returns False or True + @param *args: Extra args required by the 'condition' function. + @param **kwargs: Extra args required by the 'condition' function. + @return: If the condition function returns True before timeout, return True. If the condition function raises an + exception, log the error and keep waiting and polling. + """ + logging.debug("Wait until %s is True, timeout is %s seconds, checking interval is %s" % \ + (condition.__name__, timeout, interval)) + start_time = time.time() + elapsed_time = 0 + while elapsed_time < timeout: + logging.debug("Time elapsed: %f seconds" % elapsed_time) + + try: + check_result = condition(*args, **kwargs) + except Exception as e: + logging.debug("Exception caught while checking %s: %s" % (condition.__name__, repr(e))) + check_result = False + + if check_result: + logging.debug("%s is True, exit early with True" % condition.__name__) + return True + else: + logging.debug("%s is False, wait %d seconds and check again" % (condition.__name__, interval)) + time.sleep(interval) + elapsed_time = time.time() - start_time + + if elapsed_time >= timeout: + logging.debug("%s is still False after %d seconds, exit with False" % (condition.__name__, timeout)) + return False diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000000..2701fdf06fd --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,123 @@ +import sys +import os + +import pytest +import csv +import yaml +import ipaddr as ipaddress + +from ansible_host import AnsibleHost + +pytest_plugins = ('ptf_fixtures', 'ansible_fixtures') + +# Add the tests folder to sys.path, for importing the lib package +_current_file_dir = os.path.dirname(os.path.realpath(__file__)) +if _current_file_dir not in sys.path: + sys.path.append(current_file_dir) + + +class TestbedInfo(object): + """ + Parse the CSV file used to describe whole testbed info + Please refer to the example of the CSV file format + CSV file first line is title + The topology name in title is using uniq-name | conf-name + """ + + def __init__(self, testbed_file): + self.testbed_filename = testbed_file + self.testbed_topo = {} + + with open(self.testbed_filename) as f: + topo = csv.DictReader(f) + for line in topo: + tb_prop = {} + name = '' + for key in line: + if ('uniq-name' in key or 'conf-name' in key) and '#' in line[key]: + ### skip comment line + continue + elif 'uniq-name' in key or 'conf-name' in key: + name = line[key] + elif 'ptf_ip' in key and line[key]: + ptfaddress = ipaddress.IPNetwork(line[key]) + tb_prop['ptf_ip'] = str(ptfaddress.ip) + tb_prop['ptf_netmask'] = str(ptfaddress.netmask) + else: + tb_prop[key] = line[key] + if name: + self.testbed_topo[name] = tb_prop + + +def pytest_addoption(parser): + parser.addoption("--testbed", action="store", default=None, help="testbed name") + parser.addoption("--testbed_file", action="store", default=None, help="testbed file name") + + +@pytest.fixture(scope="session") +def testbed(request): + """ + Create and return testbed information + """ + tbname = request.config.getoption("--testbed") + tbfile = request.config.getoption("--testbed_file") + if tbname is None or tbfile is None: + raise ValueError("testbed and testbed_file are required!") + + tbinfo = TestbedInfo(tbfile) + return tbinfo.testbed_topo[tbname] + + +@pytest.fixture(scope="module") +def testbed_devices(ansible_adhoc, testbed): + """ + @summary: Fixture for creating dut, localhost and other necessary objects for testing. These objects provide + interfaces for interacting with the devices used in testing. + @param ansible_adhoc: Fixture provided by the pytest-ansible package. Source of the various device objects. It is + mandatory argument for the class constructors. + @param testbed: Fixture for parsing testbed configuration file. + @return: Return the created device objects in a dictionary + """ + from common.devices import SonicHost, Localhost + + devices = {} + devices["localhost"] = Localhost(ansible_adhoc) + devices["dut"] = SonicHost(ansible_adhoc, testbed["dut"], gather_facts=True) + if "ptf" in testbed: + devices["ptf"] = PTFHost(ansible_adhoc, testbed["ptf"]) + + # In the future, we can implement more classes for interacting with other testbed devices in the lib.devices + # module. Then, in this fixture, we can initialize more instance of the classes and store the objects in the + # devices dict here. For example, we could have + # from common.devices import FanoutHost + # devices["fanout"] = FanoutHost(ansible_adhoc, testbed["dut"]) + + return devices + + +@pytest.fixture(scope="module") +def duthost(ansible_adhoc, testbed): + """ + Shortcut fixture for getting DUT host + """ + + hostname = testbed['dut'] + return AnsibleHost(ansible_adhoc, hostname) + + +@pytest.fixture(scope="module") +def ptfhost(ansible_adhoc, testbed): + """ + Shortcut fixture for getting PTF host + """ + + hostname = testbed['ptf'] + return AnsibleHost(ansible_adhoc, hostname) + + +@pytest.fixture(scope='session') +def eos(): + """ read and yield eos configuration """ + with open('eos/eos.yml') as stream: + eos = yaml.safe_load(stream) + return eos diff --git a/tests/eos b/tests/eos new file mode 100644 index 00000000000..798ba784963 --- /dev/null +++ b/tests/eos @@ -0,0 +1 @@ +../ansible/group_vars/eos/ \ No newline at end of file diff --git a/tests/fdb/conftest.py b/tests/fdb/conftest.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/fdb/test_fdb.py b/tests/fdb/test_fdb.py new file mode 100644 index 00000000000..b3c39873bff --- /dev/null +++ b/tests/fdb/test_fdb.py @@ -0,0 +1,159 @@ +from ansible_host import AnsibleHost + +import pytest +import ptf.testutils as testutils + +import time +import itertools +import logging +import pprint + +DEFAULT_FDB_ETHERNET_TYPE = 0x1234 +DUMMY_MAC_PREFIX = "02:11:22:33" +DUMMY_MAC_COUNT = 10 +FDB_POPULATE_SLEEP_TIMEOUT = 2 + +logger = logging.getLogger(__name__) + + +def send_eth(ptfadapter, source_port, source_mac, dest_mac): + """ + send ethernet packet + :param ptfadapter: PTF adapter object + :param source_port: source port + :param source_mac: source MAC + :param dest_mac: destination MAC + :return: + """ + pkt = testutils.simple_eth_packet( + eth_dst=dest_mac, + eth_src=source_mac, + eth_type=DEFAULT_FDB_ETHERNET_TYPE + ) + logger.debug('send packet source port id {} smac: {} dmac: {}'.format(source_port, source_mac, dest_mac)) + testutils.send(ptfadapter, source_port, pkt) + + +def send_recv_eth(ptfadapter, source_port, source_mac, dest_port, dest_mac): + """ + send ethernet packet and verify it on dest_port + :param ptfadapter: PTF adapter object + :param source_port: source port + :param source_mac: source MAC + :param dest_port: destination port to receive packet on + :param dest_mac: destination MAC + :return: + """ + pkt = testutils.simple_eth_packet( + eth_dst=dest_mac, + eth_src=source_mac, + eth_type=DEFAULT_FDB_ETHERNET_TYPE + ) + logger.debug('send packet src port {} smac: {} dmac: {} verifying on dst port {}'.format( + source_port, source_mac, dest_mac, dest_port)) + testutils.send(ptfadapter, source_port, pkt) + testutils.verify_packet_any_port(ptfadapter, pkt, [dest_port]) + + +def setup_fdb(ptfadapter, vlan_table, router_mac): + """ + :param ptfadapter: PTF adapter object + :param vlan_table: VLAN table map: VLAN subnet -> list of VLAN members + :return: FDB table map : VLAN member -> MAC addresses set + """ + + fdb = {} + + for vlan in vlan_table: + for member in vlan_table[vlan]: + mac = ptfadapter.dataplane.get_mac(0, member) + # send a packet to switch to populate layer 2 table with MAC of PTF interface + send_eth(ptfadapter, member, mac, router_mac) + + # put in learned MAC + fdb[member] = { mac } + + # Send packets to switch to populate the layer 2 table with dummy MACs for each port + # Totally 10 dummy MACs for each port, send 1 packet for each dummy MAC + dummy_macs = ['{}:{:02x}:{:02x}'.format(DUMMY_MAC_PREFIX, member, i) + for i in range(DUMMY_MAC_COUNT)] + + for dummy_mac in dummy_macs: + send_eth(ptfadapter, member, dummy_mac, router_mac) + + # put in set learned dummy MACs + fdb[member].update(dummy_macs) + + time.sleep(FDB_POPULATE_SLEEP_TIMEOUT) + + return fdb + + +@pytest.fixture +def fdb_cleanup(ansible_adhoc, testbed): + """ cleanup FDB before and after test run """ + duthost = AnsibleHost(ansible_adhoc, testbed['dut']) + try: + duthost.command('sonic-clear fdb all') + yield + finally: + # in any case clear fdb after test + duthost.command('sonic-clear fdb all') + + +@pytest.mark.usefixtures('fdb_cleanup') +def test_fdb(ansible_adhoc, testbed, ptfadapter): + """ + 1. verify fdb forwarding in T0 topology. + 2. verify show mac command on DUT for learned mac. + """ + + if testbed['topo'] not in ['t0', 't0-64', 't0-116']: + pytest.skip('unsupported testbed type') + + duthost = AnsibleHost(ansible_adhoc, testbed['dut']) + ptfhost = AnsibleHost(ansible_adhoc, testbed['ptf']) + + host_facts = duthost.setup()['ansible_facts'] + mg_facts = duthost.minigraph_facts(host=duthost.hostname)['ansible_facts'] + + # remove existing IPs from PTF host + ptfhost.script('scripts/remove_ip.sh') + # set unique MACs to PTF interfaces + ptfhost.script('scripts/change_mac.sh') + # reinitialize data plane due to above changes on PTF interfaces + ptfadapter.reinit() + + router_mac = host_facts['ansible_Ethernet0']['macaddress'] + vlan_member_count = sum([len(v['members']) for k, v in mg_facts['minigraph_vlans'].items()]) + + vlan_table = {} + for vlan in mg_facts['minigraph_vlan_interfaces']: + vlan_table[vlan['subnet']] = [] + for ifname in mg_facts['minigraph_vlans'][vlan['attachto']]['members']: + vlan_table[vlan['subnet']].append(mg_facts['minigraph_port_indices'][ifname]) + + fdb = setup_fdb(ptfadapter, vlan_table, router_mac) + + for vlan in vlan_table: + for src, dst in itertools.combinations(vlan_table[vlan], 2): + for src_mac, dst_mac in itertools.product(fdb[src], fdb[dst]): + send_recv_eth(ptfadapter, src, src_mac, dst, dst_mac) + + # Should we have fdb_facts ansible module for this test? + res = duthost.command('show mac') + logger.info('"show mac" output on DUT:\n{}'.format(pprint.pformat(res['stdout_lines']))) + + dummy_mac_count = 0 + total_mac_count = 0 + for l in res['stdout_lines']: + if DUMMY_MAC_PREFIX in l.lower(): + dummy_mac_count += 1 + if "dynamic" in l.lower(): + total_mac_count += 1 + + # Verify that the number of dummy MAC entries is expected + assert dummy_mac_count == DUMMY_MAC_COUNT * vlan_member_count + # Verify that total number of MAC entries is expected + assert total_mac_count == DUMMY_MAC_COUNT * vlan_member_count + vlan_member_count + diff --git a/tests/platform/check_critical_services.py b/tests/platform/check_critical_services.py new file mode 100644 index 00000000000..28162d0ca85 --- /dev/null +++ b/tests/platform/check_critical_services.py @@ -0,0 +1,27 @@ +""" +Helper script for checking status of critical services + +This script contains re-usable functions for checking status of critical services. +""" +import time +import logging + +from common.utilities import wait_until + + +def check_critical_services(dut): + """ + @summary: Use systemctl to check whether all the critical services have expected status. ActiveState of all + services must be "active". SubState of all services must be "running". + @param dut: The AnsibleHost object of DUT. For interacting with DUT. + """ + logging.info("Wait until all critical services are fully started") + assert wait_until(300, 20, dut.critical_services_fully_started), "Not all critical services are fully started" + + logging.info("Check critical service status") + for service in dut.CRITICAL_SERVICES: + status = dut.get_service_props(service) + assert status["ActiveState"] == "active", \ + "ActiveState of %s is %s, expected: active" % (service, status["ActiveState"]) + assert status["SubState"] == "running", \ + "SubState of %s is %s, expected: active" % (service, status["SubState"]) diff --git a/tests/platform/check_interface_status.py b/tests/platform/check_interface_status.py new file mode 100644 index 00000000000..a2aa4a4c578 --- /dev/null +++ b/tests/platform/check_interface_status.py @@ -0,0 +1,57 @@ +""" +Helper script for checking status of interfaces + +This script contains re-usable functions for checking status of interfaces on SONiC. +""" +import logging + + +def parse_intf_status(lines): + """ + @summary: Parse the output of command "intfutil description". + @param lines: The output lines of command "intfutil description". + @return: Return a dictionary like: + { + "Ethernet0": { + "oper": "up", + "admin": "up", + "alias": "etp1", + "desc": "ARISTA01T2:Ethernet1" + }, + ... + } + """ + result = {} + for line in lines: + fields = line.split() + if len(fields) >= 5: + intf = fields[0] + oper, admin, alias, desc = fields[1], fields[2], fields[3], ' '.join(fields[4:]) + result[intf] = {"oper": oper, "admin": admin, "alias": alias, "desc": desc} + return result + + +def check_interface_status(dut, interfaces): + """ + @summary: Check the admin and oper status of the specified interfaces on DUT. + @param dut: The AnsibleHost object of DUT. For interacting with DUT. + @param hostname: + @param interfaces: List of interfaces that need to be checked. + """ + logging.info("Check interface status using cmd 'intfutil'") + mg_ports = dut.minigraph_facts(host=dut.hostname)["ansible_facts"]["minigraph_ports"] + output = dut.command("intfutil description") + intf_status = parse_intf_status(output["stdout_lines"][2:]) + for intf in interfaces: + expected_oper = "up" if intf in mg_ports else "down" + expected_admin = "up" if intf in mg_ports else "down" + assert intf in intf_status, "Missing status for interface %s" % intf + assert intf_status[intf]["oper"] == expected_oper, \ + "Oper status of interface %s is %s, expected '%s'" % (intf, intf_status[intf]["oper"], expected_oper) + assert intf_status[intf]["admin"] == expected_oper, \ + "Admin status of interface %s is %s, expected '%s'" % (intf, intf_status[intf]["admin"], expected_admin) + + logging.info("Check interface status using the interface_facts module") + intf_facts = dut.interface_facts(up_ports=mg_ports)["ansible_facts"] + down_ports = intf_facts["ansible_interface_link_down_ports"] + assert len(down_ports) == 0, "Some interfaces are down: %s" % str(down_ports) diff --git a/tests/platform/check_transceiver_status.py b/tests/platform/check_transceiver_status.py new file mode 100644 index 00000000000..a2a87e0ffad --- /dev/null +++ b/tests/platform/check_transceiver_status.py @@ -0,0 +1,119 @@ +""" +Helper script for checking status of transceivers + +This script contains re-usable functions for checking status of transceivers. +""" +import logging +import re +import json + + +def parse_transceiver_info(output_lines): + """ + @summary: Parse the list of transceiver from DB table TRANSCEIVER_INFO content + @param output_lines: DB table TRANSCEIVER_INFO content output by 'redis' command + @return: Return parsed transceivers in a list + """ + result = [] + p = re.compile(r"TRANSCEIVER_INFO\|(Ethernet\d+)") + for line in output_lines: + m = p.match(line) + assert m, "Unexpected line %s" % line + result.append(m.group(1)) + return result + + +def parse_transceiver_dom_sensor(output_lines): + """ + @summary: Parse the list of transceiver from DB table TRANSCEIVER_DOM_SENSOR content + @param output_lines: DB table TRANSCEIVER_DOM_SENSOR content output by 'redis' command + @return: Return parsed transceivers in a list + """ + result = [] + p = re.compile(r"TRANSCEIVER_DOM_SENSOR\|(Ethernet\d+)") + for line in output_lines: + m = p.match(line) + assert m, "Unexpected line %s" % line + result.append(m.group(1)) + return result + + +def all_transceivers_detected(dut, interfaces): + """ + Check if transceiver information of all the specified interfaces have been detected. + """ + db_output = dut.command("redis-cli --raw -n 6 keys TRANSCEIVER_INFO\*")["stdout_lines"] + not_detected_interfaces = [intf for intf in interfaces if "TRANSCEIVER_INFO|%s" % intf not in db_output] + if len(not_detected_interfaces) > 0: + logging.debug("Interfaces not detected: %s" % str(not_detected_interfaces)) + return False + return True + + +def check_transceiver_basic(dut, interfaces): + """ + @summary: Check whether all the specified interface are in TRANSCEIVER_INFO redis DB. + @param dut: The AnsibleHost object of DUT. For interacting with DUT. + @param interfaces: List of interfaces that need to be checked. + """ + logging.info("Check whether transceiver information of all ports are in redis") + xcvr_info = dut.command("redis-cli -n 6 keys TRANSCEIVER_INFO*") + parsed_xcvr_info = parse_transceiver_info(xcvr_info["stdout_lines"]) + for intf in interfaces: + assert intf in parsed_xcvr_info, "TRANSCEIVER INFO of %s is not found in DB" % intf + + +def check_transceiver_details(dut, interfaces): + """ + @summary: Check the detailed TRANSCEIVER_INFO content of all the specified interfaces. + @param dut: The AnsibleHost object of DUT. For interacting with DUT. + @param interfaces: List of interfaces that need to be checked. + """ + logging.info("Check detailed transceiver information of each connected port") + expected_fields = ["type", "hardwarerev", "serialnum", "manufacturename", "modelname"] + for intf in interfaces: + port_xcvr_info = dut.command('redis-cli -n 6 hgetall "TRANSCEIVER_INFO|%s"' % intf) + for field in expected_fields: + assert port_xcvr_info["stdout"].find(field) >= 0, \ + "Expected field %s is not found in %s while checking %s" % (field, port_xcvr_info["stdout"], intf) + + +def check_transceiver_dom_sensor_basic(dut, interfaces): + """ + @summary: Check whether all the specified interface are in TRANSCEIVER_DOM_SENSOR redis DB. + @param dut: The AnsibleHost object of DUT. For interacting with DUT. + @param interfaces: List of interfaces that need to be checked. + """ + logging.info("Check whether TRANSCEIVER_DOM_SENSOR of all ports in redis") + xcvr_dom_sensor = dut.command("redis-cli -n 6 keys TRANSCEIVER_DOM_SENSOR*") + parsed_xcvr_dom_sensor = parse_transceiver_dom_sensor(xcvr_dom_sensor["stdout_lines"]) + for intf in interfaces: + assert intf in parsed_xcvr_dom_sensor, "TRANSCEIVER_DOM_SENSOR of %s is not found in DB" % intf + + +def check_transceiver_dom_sensor_details(dut, interfaces): + """ + @summary: Check the detailed TRANSCEIVER_DOM_SENSOR content of all the specified interfaces. + @param dut: The AnsibleHost object of DUT. For interacting with DUT. + @param interfaces: List of interfaces that need to be checked. + """ + logging.info("Check detailed TRANSCEIVER_DOM_SENSOR information of each connected ports") + expected_fields = ["temperature", "voltage", "rx1power", "rx2power", "rx3power", "rx4power", "tx1bias", + "tx2bias", "tx3bias", "tx4bias", "tx1power", "tx2power", "tx3power", "tx4power"] + for intf in interfaces: + port_xcvr_dom_sensor = dut.command('redis-cli -n 6 hgetall "TRANSCEIVER_DOM_SENSOR|%s"' % intf) + for field in expected_fields: + assert port_xcvr_dom_sensor["stdout"].find(field) >= 0, \ + "Expected field %s is not found in %s while checking %s" % (field, port_xcvr_dom_sensor["stdout"], intf) + + +def check_transceiver_status(dut, interfaces): + """ + @summary: Check transceiver information of all the specified interfaces in redis DB. + @param dut: The AnsibleHost object of DUT. For interacting with DUT. + @param interfaces: List of interfaces that need to be checked. + """ + check_transceiver_basic(dut, interfaces) + check_transceiver_details(dut, interfaces) + check_transceiver_dom_sensor_basic(dut, interfaces) + check_transceiver_dom_sensor_details(dut, interfaces) diff --git a/tests/platform/mellanox/check_sysfs.py b/tests/platform/mellanox/check_sysfs.py new file mode 100644 index 00000000000..364e977dfa5 --- /dev/null +++ b/tests/platform/mellanox/check_sysfs.py @@ -0,0 +1,117 @@ +""" +Helper script for checking status of sysfs. + +This script contains re-usable functions for checking status of hw-management related sysfs. +""" +import logging +import json + + +def check_sysfs_broken_symbolinks(dut): + logging.info("Check broken symbolinks") + excludes = [ + "/bsp/thermal_zone/thermal_zone2-x86_pkg_temp/mode", + "/bsp/environment/voltmon1", + "/bsp/environment/voltmon2", + "/bsp/led/", + "/bsp/qsfp/" + ] + + broken_symbolinks = dut.command("find /bsp -xtype l")["stdout_lines"] + broken_symbolinks = [line for line in broken_symbolinks if not any([line.startswith(ex) for ex in excludes])] + assert len(broken_symbolinks) == 0, \ + "Found some broken symbolinks: %s" % str(broken_symbolinks) + +def check_sysfs_thermal(dut): + logging.info("Check thermal") + file_asic = dut.command("cat /bsp/thermal/asic") + try: + asic_temp = float(file_asic["stdout"]) / 1000 + assert asic_temp > 0 and asic_temp < 85, "Abnormal ASIC temperature: %s" % file_asic["stdout"] + except: + assert False, "Bad content in /bsp/thermal/asic: %s" % file_asic["stdout"] + + +def check_sysfs_fan(dut): + logging.info("Check fan") + + from common.mellanox_data import SWITCH_MODELS + fan_count = SWITCH_MODELS[dut.facts["hwsku"]]["fans"]["number"] + + if SWITCH_MODELS[dut.facts["hwsku"]]["fans"]["hot_swappable"]: + fan_status_list = ["/bsp/module/fan%d_status" % fan_id for fan_id in range(1, fan_count + 1)] + for fan_status in fan_status_list: + fan_status_content = dut.command("cat %s" % fan_status) + assert fan_status_content["stdout"] == "1", "Content of %s is not 1" % fan_status + + fan_min_list = ["/bsp/fan/fan%d_min" % fan_id for fan_id in range(1, fan_count + 1)] + for fan_min in fan_min_list: + try: + fan_min_content = dut.command("cat %s" % fan_min) + fan_min_speed = int(fan_min_content["stdout"]) + assert fan_min_speed > 0, "Bad fan minimum speed: %s" % str(fan_min_speed) + except Exception as e: + assert False, "Get content from %s failed, exception: %s" % (fan_min, repr(e)) + + fan_max_list = ["/bsp/fan/fan%d_max" % fan_id for fan_id in range(1, fan_count + 1)] + for fan_max in fan_max_list: + try: + fan_max_content = dut.command("cat %s" % fan_max) + fan_max_speed = int(fan_max_content["stdout"]) + assert fan_max_speed > 10000, "Bad fan maximum speed: %s" % str(fan_max_speed) + except Exception as e: + assert False, "Get content from %s failed, exception: %s" % (fan_max, repr(e)) + + fan_speed_get_list = ["/bsp/fan/fan%d_speed_get" % fan_id for fan_id in range(1, fan_count + 1)] + for fan_speed_get in fan_speed_get_list: + try: + fan_speed_get_content = dut.command("cat %s" % fan_speed_get) + fan_speed = int(fan_speed_get_content["stdout"]) + assert fan_speed > 1000, "Bad fan speed: %s" % str(fan_speed) + except Exception as e: + assert False, "Get content from %s failed, exception: %s" % (fan_speed_get, repr(e)) + + fan_speed_set_list = ["/bsp/fan/fan%d_speed_set" % fan_id for fan_id in range(1, fan_count + 1)] + for fan_speed_set in fan_speed_set_list: + fan_speed_set_content = dut.command("cat %s" % fan_speed_set) + assert fan_speed_set_content["stdout"] == "153", "Fan speed should be set to 60%, 153/255" + + +def check_sysfs_psu(dut): + logging.info("Check psu") + + from common.mellanox_data import SWITCH_MODELS + psu_count = SWITCH_MODELS[dut.facts["hwsku"]]["psus"]["number"] + + if SWITCH_MODELS[dut.facts["hwsku"]]["psus"]["hot_swappable"]: + psu_status_list = ["/bsp/module/psu%d_status" % psu_id for psu_id in range(1, psu_count + 1)] + for psu_status in psu_status_list: + psu_status_content = dut.command("cat %s" % psu_status) + assert psu_status_content["stdout"] == "1", "Content of %s is not 1" % psu_status + + +def check_sysfs_qsfp(dut, interfaces): + logging.info("Check qsfp status") + ports_config = json.loads(dut.command("sonic-cfggen -d --var-json PORT")["stdout"]) + + for intf in interfaces: + intf_lanes = ports_config[intf]["lanes"] + sfp_id = int(intf_lanes.split(",")[0])/4 + 1 + qsfp_status_file = "/bsp/qsfp/qsfp%d_status" % sfp_id + assert dut.command("cat %s" % qsfp_status_file)["stdout"] == "1", \ + "Content of %s should be '1'" % qsfp_status_file + + +def check_sysfs(dut, interfaces): + """ + @summary: Check various hw-management related sysfs under /var/run/hw-management + """ + check_sysfs_broken_symbolinks(dut) + + check_sysfs_thermal(dut) + + check_sysfs_fan(dut) + + check_sysfs_psu(dut) + + check_sysfs_qsfp(dut, interfaces) diff --git a/tests/platform/mellanox/mellanox_psu_controller.py b/tests/platform/mellanox/mellanox_psu_controller.py new file mode 100644 index 00000000000..683d6729b6e --- /dev/null +++ b/tests/platform/mellanox/mellanox_psu_controller.py @@ -0,0 +1,269 @@ +""" +Mellanox specific PSU controller + +This script contains illustrative functions and class for creating PSU controller based on Mellanox lab configuration. + +Some actual configurations were or replaced with dummy configurations. +""" +import logging +import subprocess + +import paramiko + +from psu_controller import PsuControllerBase + + +def run_local_cmd(cmd): + """ + @summary: Helper function for run command on localhost -- the sonic-mgmt container + @param cmd: Command to be executed + @return: Returns whatever output to stdout by the command + @raise: Raise an exception if the command return code is not 0. + """ + process = subprocess.Popen(cmd.split(), shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + ret_code = process.returncode + + if ret_code != 0: + raise Exception("ret_code=%d, error message=%s. cmd=%s" % (ret_code, stderr, ' '.join(cmd))) + + return stdout + + +def connect_mellanox_server(): + """ + @summary: Connect to a server on Mellanox lab network via SSH + @return: Returns a paramiko.client.SSHClient object which can be used for running commands + """ + mellanox_server = None + try: + mellanox_server = paramiko.client.SSHClient() + mellanox_server.set_missing_host_key_policy(paramiko.client.AutoAddPolicy()) + mellanox_server.connect("a_mellanox_server", username="username", password="password") + except Exception as e: + logging.debug("Failed to connect to mellanox server, exception: " + repr(e)) + return mellanox_server + + +def find_psu_controller_conf_file(server): + """ + @summary: Find the exact location of the configuration file which contains mapping between PSU controllers and DUT + switches. + @param server: The paramiko.client.SSHClient object connected to a Mellanox server + @return: Returns the exact path of the configuration file + """ + result = None + try: + locations = ("/path1", "/path2") + config_file_name = "psu_controller_configuration_file.txt" + for location in locations: + _, stdout, stderr = server.exec_command("find %s -name %s" % (location, config_file_name)) + + lines = stdout.readlines() + if len(lines) > 0: + result = lines[0].strip() + break + except paramiko.SSHException as e: + logging.debug("Failed to find psu controller configuration file location, exception: " + repr(e)) + return result + + +def get_psu_controller_host(hostname, server, conf_file_location): + """ + @summary: Check the configuration file to find out IP address of the PDU controlling power to PSUs of DUT. + @param hostname: Hostname of the SONiC DUT + @param server: The paramiko.client.SSHClient object connected to a Mellanox server + @param conf_file_location: Exact path of the configuration file on the Mellanox server + @return: Returns IP address of the PDU controlling power to PSUs of DUT + """ + result = None + try: + _, stdout, stderr = server.exec_command("grep %s %s" % (hostname, conf_file_location)) + for line in stdout.readlines(): + fields = line.strip().split() + if len(fields) == 2: + result = fields[1] + break + except paramiko.SSHException as e: + logging.debug("Failed to get psu controller host, exception: " + repr(e)) + return result + + +def get_psu_controller_type(psu_controller_host): + """ + @summary: Use SNMP to get the type of PSU controller host + @param psu_controller_host: IP address of PSU controller host + @return: Returns type string of the specified PSU controller host + """ + result = None + cmd = "snmpget -v 1 -c public -Ofenqv %s .1.3.6.1.2.1.1.1.0" % psu_controller_host + try: + stdout = run_local_cmd(cmd) + + lines = stdout.splitlines() + if len(lines) > 0: + result = lines[0].strip() + result = result.replace('"', '') + except Exception as e: + logging.debug("Failed to get psu controller type, exception: " + repr(e)) + + return result + + +class SentrySwitchedCDU(PsuControllerBase): + """ + PSU Controller class for 'Sentry Switched CDU' + + This class implements the interface defined in PsuControllerBase class for PDU type 'Sentry Switched CDU' + """ + PORT_NAME_BASE_OID = ".1.3.6.1.4.1.1718.3.2.3.1.3.1" + PORT_STATUS_BASE_OID = ".1.3.6.1.4.1.1718.3.2.3.1.5.1" + PORT_CONTROL_BASE_OID = ".1.3.6.1.4.1.1718.3.2.3.1.11.1" + STATUS_ON = "1" + STATUS_OFF = "0" + CONTROL_ON = "1" + CONTROL_OFF = "2" + + def _get_psu_ports(self): + """ + @summary: Helper method for getting PDU ports connected to PSUs of DUT + """ + try: + cmd = "snmpwalk -v 1 -c public -Ofenq %s %s " % (self.controller, self.PORT_NAME_BASE_OID) + stdout = run_local_cmd(cmd) + for line in stdout.splitlines(): + if self.hostname in line: + fields = line.split() + if len(fields) == 2: + # Remove the preceeding PORT_NAME_BASE_OID, remaining string is the PDU port ID + self.pdu_ports.append(fields[0].replace(self.PORT_NAME_BASE_OID, '')) + except Exception as e: + logging.debug("Failed to get ports controlling PSUs of DUT, exception: " + repr(e)) + + def __init__(self, hostname, controller): + PsuControllerBase.__init__(self) + self.hostname = hostname + self.controller = controller + self.pdu_ports = [] + self._get_psu_ports() + logging.info("Initialized " + self.__class__.__name__) + + def turn_on_psu(self, psu_id): + """ + @summary: Use SNMP to turn on power to PSU of DUT specified by psu_id + + There is a limitation in the Mellanox configuration. Currently we can just find out which PDU ports are + connected to PSUs of which DUT. But we cannot find out the exact mapping between PDU ports and PSUs of DUT. + + To overcome this limitation, the trick is to convert the specified psu_id to integer, then calculate the mode + upon the number of PSUs on DUT. The calculated mode is used as an index to get PDU ports ID stored in + self.pdu_ports. + + @param psu_id: ID of the PSU on SONiC DUT + @return: Return true if successfully execute the command for turning on power. Otherwise return False. + """ + try: + idx = int(psu_id) % len(self.pdu_ports) + port_oid = self.PORT_CONTROL_BASE_OID + self.pdu_ports[idx] + cmd = "snmpset -v1 -C q -c private %s %s i %s" % (self.controller, port_oid, self.CONTROL_ON) + run_local_cmd(cmd) + logging.info("Turned on PSU %s" % str(psu_id)) + return True + except Exception as e: + logging.debug("Failed to turn on PSU %s, exception: %s" % (str(psu_id), repr(e))) + return False + + def turn_off_psu(self, psu_id): + """ + @summary: Use SNMP to turn off power to PSU of DUT specified by psu_id + + There is a limitation in the Mellanox configuration. Currently we can just find out which PDU ports are + connected to PSUs of which DUT. But we cannot find out the exact mapping between PDU ports and PSUs of DUT. + + To overcome this limitation, the trick is to convert the specified psu_id to integer, then calculate the mode + upon the number of PSUs on DUT. The calculated mode is used as an index to get PDU ports ID stored in + self.pdu_ports. + + @param psu_id: ID of the PSU on SONiC DUT + @return: Return true if successfully execute the command for turning off power. Otherwise return False. + """ + try: + idx = int(psu_id) % len(self.pdu_ports) + port_oid = self.PORT_CONTROL_BASE_OID + self.pdu_ports[idx] + cmd = "snmpset -v1 -C q -c private %s %s i %s" % (self.controller, port_oid, self.CONTROL_OFF) + run_local_cmd(cmd) + logging.info("Turned off PSU %s" % str(psu_id)) + return True + except Exception as e: + logging.debug("Failed to turn off PSU %s, exception: %s" % (str(psu_id), repr(e))) + return False + + def get_psu_status(self, psu_id=None): + """ + @summary: Use SNMP to get status of PDU ports supplying power to PSUs of DUT + + There is a limitation in the Mellanox configuration. Currently we can just find out which PDU ports are + connected to PSUs of which DUT. But we cannot find out the exact mapping between PDU ports and PSUs of DUT. + + To overcome this limitation, the trick is to convert the specified psu_id to integer, then calculate the mode + upon the number of PSUs on DUT. The calculated mode is used as an index to get PDU ports ID stored in + self.pdu_ports. + + @param psu_id: Optional. If specified, only return status of PDU port connected to specified PSU of DUT. If + omitted, return status of all PDU ports connected to PSUs of DUT. + @return: Return status of PDU ports connected to PSUs of DUT in a list of dictionary. Example result: + [{"psu_id": 0, "psu_on": True}, {"psu_id": 1, "psu_on": True}] + The psu_id in returned result is integer starts from 0. + """ + results = [] + try: + cmd = "snmpwalk -v 1 -c public -Ofenq %s %s " % (self.controller, self.PORT_STATUS_BASE_OID) + stdout = run_local_cmd(cmd) + for line in stdout.splitlines(): + for idx, port in enumerate(self.pdu_ports): + port_oid = self.PORT_STATUS_BASE_OID + port + if port_oid in line: + fields = line.strip().split() + if len(fields) == 2: + status = {"psu_id": idx, "psu_on": True if fields[1] == self.STATUS_ON else False} + results.append(status) + if psu_id is not None: + idx = int(psu_id) % len(self.pdu_ports) + results = results[idx:idx+1] + logging.info("Got PSU status: %s" % str(results)) + except Exception as e: + logging.debug("Failed to get psu status, exception: " + repr(e)) + return results + + def close(self): + pass + + +def make_mellanox_psu_controller(hostname): + """ + @summary: For creating different type of PSU controller based on Mellanox lab configuration. + @param hostname: Hostname of the SONiC DUT + @return: Returns an instance of PSU controller + """ + mellanox_server = connect_mellanox_server() + if not mellanox_server: + return None + + conf_file_location = find_psu_controller_conf_file(mellanox_server) + logging.info("conf_file_location: %s" % conf_file_location) + if not conf_file_location: + return None + + psu_controller_host = get_psu_controller_host(hostname, mellanox_server, conf_file_location) + logging.info("psu_controller_host: %s" % psu_controller_host) + if not psu_controller_host: + return None + + psu_controller_type = get_psu_controller_type(psu_controller_host) + logging.info("psu_controller_type: %s" % psu_controller_type) + if not psu_controller_type: + return None + + if "Sentry Switched CDU" in psu_controller_type: + logging.info("Initializing PSU controller instance") + return SentrySwitchedCDU(hostname, psu_controller_host) diff --git a/tests/platform/mellanox/test_check_sysfs.py b/tests/platform/mellanox/test_check_sysfs.py new file mode 100644 index 00000000000..973d653f6c6 --- /dev/null +++ b/tests/platform/mellanox/test_check_sysfs.py @@ -0,0 +1,25 @@ +""" +Check SYSFS + +This script covers the test case 'Check SYSFS' in the SONiC platform test plan: +https://github.com/Azure/SONiC/blob/master/doc/pmon/sonic_platform_test_plan.md +""" +import logging + +try: + from platform_fixtures import conn_graph_facts +except ImportError: + import sys + current_file_dir = os.path.dirname(os.path.realpath(__file__)) + parent_folder = os.path.normpath(os.path.join(current_file_dir, "../")) + if parent_folder not in sys.path: + sys.path.append(parent_folder) + from platform_fixtures import conn_graph_facts +from check_sysfs import check_sysfs + + +def test_check_hw_mgmt_sysfs(testbed_devices, conn_graph_facts): + """This test case is to check the symbolic links under /var/run/hw-management + """ + ans_host = testbed_devices["dut"] + check_sysfs(ans_host, conn_graph_facts["device_conn"]) diff --git a/tests/platform/platform_fixtures.py b/tests/platform/platform_fixtures.py new file mode 100644 index 00000000000..0b73940db3f --- /dev/null +++ b/tests/platform/platform_fixtures.py @@ -0,0 +1,11 @@ +import pytest + +@pytest.fixture(scope="module") +def conn_graph_facts(testbed_devices): + dut = testbed_devices["dut"] + localhost = testbed_devices["localhost"] + + base_path = os.path.dirname(os.path.realpath(__file__)) + lab_conn_graph_file = os.path.join(base_path, "../../ansible/files/lab_connection_graph.xml") + conn_graph_facts = localhost.conn_graph_facts(host=dut.hostname, filename=lab_conn_graph_file)['ansible_facts'] + return conn_graph_facts diff --git a/tests/platform/psu_controller.py b/tests/platform/psu_controller.py new file mode 100644 index 00000000000..420b57ef28e --- /dev/null +++ b/tests/platform/psu_controller.py @@ -0,0 +1,113 @@ +""" +Fixture for controlling PSUs of DUT + +This file defines fixture psu_controller which is for controlling PSUs of DUT. The fixture uses factory design pattern +and returns a function for creating PSU controller instance. The function takes two arguments: +* hostname - hostname of the DUT +* asic_type - asic type of the DUT +Based on these two inputs, different PSU controller implemented by different vendors could be returned. + +The PSU controller implemented by each vendor must be a subclass of the PsuControllerBase class and implements the +methods defined in the base class. +""" +import os +import sys + +import pytest + + +class PsuControllerBase(): + """ + @summary: Base class for PSU controller + + This base class defines the basic interface to be provided by PSU controller. PSU controller implemented by each + vendor must be a subclass of this base class. + """ + def __init__(self): + pass + + def turn_on_psu(self, psu_id): + """ + @summary: Turn on power for specified PSU. + + @param psu_id: PSU ID, it could be integer of string digit. For example: 0 or '1' + @return: Returns True if operation is successful. Otherwise, returns False + """ + raise NotImplementedError + + def turn_off_psu(self, psu_id): + """ + @summary: Turn off power for specified PSU. + + @param psu_id: PSU ID, it could be integer of string digit. For example: 0 or '1' + @return: Returns True if operation is successful. Otherwise, returns False + """ + raise NotImplementedError + + def get_psu_status(self, psu_id=None): + """ + @summary: Get current power status of PSUs + + @param psu_id: Optional PSU ID, it could be integer or string digit. If no psu_id is specified, power status of + all PSUs should be returned + @return: Returns a list of dictionaries. For example: + [{"psu_id": 0, "psu_on": True}, {"psu_id": 1, "psu_on": True}] + If getting PSU status failed, an empty list should be returned. + """ + raise NotImplementedError + + def close(self): + """ + @summary Close the PDU controller to release resources. + """ + raise NotImplementedError + + +@pytest.fixture +def psu_controller(): + """ + @summary: Fixture for controlling power supply to PSUs of DUT + + @returns: Returns a function for creating PSU controller object. The object must implement the PsuControllerBase + interface. + """ + # For holding PSU controller object to be used in fixture tear down section + controllers = [] + + def _make_psu_controller(hostname, asic_type): + """ + @summary: Function for creating PSU controller object. + @param hostname: Hostname of DUT + @param asic_type: ASIC type of DUT, for example: 'mellanox' + """ + controller = None + + # Create PSU controller object based on asic type and hostname of DUT + if asic_type == "mellanox": + current_file_dir = os.path.dirname(os.path.realpath(__file__)) + if current_file_dir not in sys.path: + sys.path.append(current_file_dir) + sub_folder_dir = os.path.join(current_file_dir, "mellanox") + if sub_folder_dir not in sys.path: + sys.path.append(sub_folder_dir) + from mellanox_psu_controller import make_mellanox_psu_controller + + controller = make_mellanox_psu_controller(hostname) + if controller: + # The PSU controller object is returned to test case calling this fixture. Need to save the object + # for later use in tear down section + controllers.append(controller) + + return controller + + yield _make_psu_controller + + # Tear down section, ensure that all PSUs are turned on after test + for controller in controllers: + if controller: + psu_status = controller.get_psu_status() + if psu_status: + for psu in psu_status: + if not psu["psu_on"]: + controller.turn_on_psu(psu["psu_id"]) + controller.close() diff --git a/tests/platform/test_platform_info.py b/tests/platform/test_platform_info.py new file mode 100644 index 00000000000..67eb65676a5 --- /dev/null +++ b/tests/platform/test_platform_info.py @@ -0,0 +1,177 @@ +""" +Check platform information + +This script covers the test case 'Check platform information' in the SONiC platform test plan: +https://github.com/Azure/SONiC/blob/master/doc/pmon/sonic_platform_test_plan.md +""" +import logging +import re +import time + +import pytest + +from psu_controller import psu_controller + + +CMD_PLATFORM_SUMMARY = "show platform summary" +CMD_PLATFORM_PSUSTATUS = "show platform psustatus" +CMD_PLATFORM_SYSEEPROM = "show platform syseeprom" + + +def test_show_platform_summary(testbed_devices): + """ + @summary: Check output of 'show platform summary' + """ + ans_host = testbed_devices["dut"] + + logging.info("Check output of '%s'" % CMD_PLATFORM_SUMMARY) + platform_summary = ans_host.command(CMD_PLATFORM_SUMMARY) + expected_fields = set(["Platform", "HwSKU", "ASIC"]) + actual_fields = set() + for line in platform_summary["stdout_lines"]: + key_value = line.split(":") + assert len(key_value) == 2, "output format is not 'field_name: field_value'" + assert len(key_value[1]) > 0, "No value for field %s" % key_value[0] + actual_fields.add(line.split(":")[0]) + assert actual_fields == expected_fields, \ + "Unexpected output fields, actual=%s, expected=%s" % (str(actual_fields), str(expected_fields)) + + +def test_show_platform_psustatus(testbed_devices): + """ + @summary: Check output of 'show platform psustatus' + """ + ans_host = testbed_devices["dut"] + + logging.info("Check PSU status using '%s', hostname: %s" % (CMD_PLATFORM_PSUSTATUS, ans_host.hostname)) + psu_status = ans_host.command(CMD_PLATFORM_PSUSTATUS) + psu_line_pattern = re.compile(r"PSU\s+\d+\s+(OK|NOT OK)") + for line in psu_status["stdout_lines"][2:]: + assert psu_line_pattern.match(line), "Unexpected PSU status output" + + +def test_turn_on_off_psu_and_check_psustatus(testbed_devices, psu_controller): + """ + @summary: Turn off/on PSU and check PSU status using 'show platform psustatus' + """ + ans_host = testbed_devices["dut"] + + psu_line_pattern = re.compile(r"PSU\s+\d+\s+(OK|NOT OK|NOT PRESENT)") + cmd_num_psu = "sudo psuutil numpsus" + + logging.info("Check whether the DUT has enough PSUs for this testing") + psu_num_out = ans_host.command(cmd_num_psu) + psu_num = 0 + try: + psu_num = int(psu_num_out["stdout"]) + except: + assert False, "Unable to get the number of PSUs using command '%s'" % cmd_num_psu + if psu_num < 2: + pytest.skip("At least 2 PSUs required for rest of the testing in this case") + + logging.info("Create PSU controller for testing") + psu_ctrl = psu_controller(ans_host.hostname, ans_host.facts["asic_type"]) + if psu_ctrl is None: + pytest.skip("No PSU controller for %s, skip rest of the testing in this case" % ans_host.hostname) + + logging.info("To avoid DUT losing power, need to turn on PSUs that are not powered") + all_psu_status = psu_ctrl.get_psu_status() + if all_psu_status: + for psu in all_psu_status: + if not psu["psu_on"]: + psu_ctrl.turn_on_psu(psu["psu_id"]) + time.sleep(5) + + logging.info("Initialize test results") + cli_psu_status = ans_host.command(CMD_PLATFORM_PSUSTATUS) + psu_test_results = {} + for line in cli_psu_status["stdout_lines"][2:]: + fields = line.split() + psu_test_results[fields[1]] = False + if " ".join(fields[2:]) == "NOT OK": + pytest.skip("Some PSUs are still not powered, it is not safe to proceed, skip testing") + assert len(psu_test_results.keys()) == psu_num, \ + "In consistent PSU number output by '%s' and '%s'" % (CMD_PLATFORM_PSUSTATUS, cmd_num_psu) + + logging.info("Start testing turn off/on PSUs") + all_psu_status = psu_ctrl.get_psu_status() + for psu in all_psu_status: + psu_under_test = None + + logging.info("Turn off PSU %s" % str(psu["psu_id"])) + psu_ctrl.turn_off_psu(psu["psu_id"]) + time.sleep(5) + + cli_psu_status = ans_host.command(CMD_PLATFORM_PSUSTATUS) + for line in cli_psu_status["stdout_lines"][2:]: + assert psu_line_pattern.match(line), "Unexpected PSU status output" + fields = line.split() + if fields[2] != "OK": + psu_under_test = fields[1] + assert psu_under_test is not None, "No PSU is turned off" + + logging.info("Turn on PSU %s" % str(psu["psu_id"])) + psu_ctrl.turn_on_psu(psu["psu_id"]) + time.sleep(5) + + cli_psu_status = ans_host.command(CMD_PLATFORM_PSUSTATUS) + for line in cli_psu_status["stdout_lines"][2:]: + assert psu_line_pattern.match(line), "Unexpected PSU status output" + fields = line.split() + if fields[1] == psu_under_test: + assert fields[2] == "OK", "Unexpected PSU status after turned it on" + + psu_test_results[psu_under_test] = True + + for psu in psu_test_results: + assert psu_test_results[psu], "Test psu status of PSU %s failed" % psu + + +def parse_platform_summary(raw_input_lines): + """ + @summary: Helper function for parsing the output of 'show system platform' + @return: Returned parsed information in a dictionary + """ + res = {} + for line in raw_input_lines: + fields = line.split(":") + if len(fields) != 2: + continue + res[fields[0].lower()] = fields[1].strip() + return res + + +def test_show_platform_syseeprom(testbed_devices): + """ + @summary: Check output of 'show platform syseeprom' + """ + ans_host = testbed_devices["dut"] + + logging.info("Check output of '%s'" % CMD_PLATFORM_SYSEEPROM) + show_output = ans_host.command(CMD_PLATFORM_SYSEEPROM) + assert show_output["rc"] == 0, "Run command '%s' failed" % CMD_PLATFORM_SYSEEPROM + if ans_host.facts["asic_type"] in ["mellanox"]: + expected_fields = [ + "Product Name", + "Part Number", + "Serial Number", + "Base MAC Address", + "Manufacture Date", + "Device Version", + "MAC Addresses", + "Manufacturer", + "Vendor Extension", + "ONIE Version", + "CRC-32"] + utility_cmd = "sudo python -c \"import imp; \ + m = imp.load_source('eeprom', '/usr/share/sonic/device/%s/plugins/eeprom.py'); \ + t = m.board('board', '', '', ''); e = t.read_eeprom(); t.decode_eeprom(e)\"" % ans_host.facts["platform"] + utility_cmd_output = ans_host.command(utility_cmd) + + for field in expected_fields: + assert show_output["stdout"].find(field) >= 0, "Expected field %s is not found" % field + assert utility_cmd_output["stdout"].find(field) >= 0, "Expected field %s is not found" % field + + for line in utility_cmd_output["stdout_lines"]: + assert line in show_output["stdout"], \ + "Line %s is not found in output of '%s'" % (line, CMD_PLATFORM_SYSEEPROM) diff --git a/tests/platform/test_reboot.py b/tests/platform/test_reboot.py new file mode 100644 index 00000000000..8278b96c42a --- /dev/null +++ b/tests/platform/test_reboot.py @@ -0,0 +1,115 @@ +""" +Check platform status after reboot. Three types of reboot are covered in this script: +* Cold reboot +* Fast reboot +* Warm reboot + +This script is to cover the test case 'Reload configuration' in the SONiC platform test plan: +https://github.com/Azure/SONiC/blob/master/doc/pmon/sonic_platform_test_plan.md +""" +import logging +import re +import os +import time +import sys + +import pytest + +from platform_fixtures import conn_graph_facts +from common.utilities import wait_until +from check_critical_services import check_critical_services +from check_interface_status import check_interface_status +from check_transceiver_status import check_transceiver_basic +from check_transceiver_status import all_transceivers_detected + + +def reboot_and_check(localhost, dut, interfaces, reboot_type="cold"): + """ + Perform the specified type of reboot and check platform status. + """ + logging.info("Run %s reboot on DUT" % reboot_type) + if reboot_type == "cold": + reboot_cmd = "reboot" + reboot_timeout = 300 + elif reboot_type == "fast": + reboot_cmd = "fast-reboot" + reboot_timeout = 180 + elif reboot_type == "warm": + reboot_cmd = "warm-reboot" + reboot_timeout = 180 + else: + assert False, "Reboot type %s is not supported" % reboot_type + process, queue = dut.command(reboot_cmd, module_async=True) + + logging.info("Wait for DUT to go down") + res = localhost.wait_for(host=dut.hostname, port=22, state="stopped", delay=10, timeout=120, + module_ignore_errors=True) + if "failed" in res: + if process.is_alive(): + logging.error("Command '%s' is not completed" % reboot_cmd) + process.terminate() + logging.error("reboot result %s" % str(queue.get())) + assert False, "DUT did not go down" + + logging.info("Wait for DUT to come back") + localhost.wait_for(host=dut.hostname, port=22, state="started", delay=10, timeout=reboot_timeout) + + logging.info("Wait until all critical services are fully started") + check_critical_services(dut) + + logging.info("Wait some time for all the transceivers to be detected") + assert wait_until(300, 20, all_transceivers_detected, dut, interfaces), \ + "Not all transceivers are detected in 300 seconds" + + logging.info("Check interface status") + check_interface_status(dut, interfaces) + + logging.info("Check transceiver status") + check_transceiver_basic(dut, interfaces) + + if dut.facts["asic_type"] in ["mellanox"]: + + current_file_dir = os.path.dirname(os.path.realpath(__file__)) + sub_folder_dir = os.path.join(current_file_dir, "mellanox") + if sub_folder_dir not in sys.path: + sys.path.append(sub_folder_dir) + from check_sysfs import check_sysfs + + logging.info("Check sysfs") + check_sysfs(dut, interfaces) + + +def test_cold_reboot(testbed_devices, conn_graph_facts): + """ + @summary: This test case is to perform cold reboot and check platform status + """ + ans_host = testbed_devices["dut"] + localhost = testbed_devices["localhost"] + + reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type="cold") + + +def test_fast_reboot(testbed_devices, conn_graph_facts): + """ + @summary: This test case is to perform cold reboot and check platform status + """ + ans_host = testbed_devices["dut"] + localhost = testbed_devices["localhost"] + + reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type="fast") + + +def test_warm_reboot(testbed_devices, conn_graph_facts): + """ + @summary: This test case is to perform cold reboot and check platform status + """ + ans_host = testbed_devices["dut"] + localhost = testbed_devices["localhost"] + asic_type = ans_host.facts["asic_type"] + + if asic_type in ["mellanox"]: + issu_capability = ans_host.command("show platform mlnx issu")["stdout"] + if "disabled" in issu_capability: + pytest.skip("ISSU is not supported on this DUT, skip this test case") + + reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type="warm") diff --git a/tests/platform/test_reload_config.py b/tests/platform/test_reload_config.py new file mode 100644 index 00000000000..047945dfab4 --- /dev/null +++ b/tests/platform/test_reload_config.py @@ -0,0 +1,55 @@ +""" +Check platform status after config is reloaded + +This script is to cover the test case 'Reload configuration' in the SONiC platform test plan: +https://github.com/Azure/SONiC/blob/master/doc/pmon/sonic_platform_test_plan.md +""" +import logging +import re +import os +import time +import sys + +from platform_fixtures import conn_graph_facts +from common.utilities import wait_until +from check_critical_services import check_critical_services +from check_interface_status import check_interface_status +from check_transceiver_status import check_transceiver_basic +from check_transceiver_status import all_transceivers_detected + + +def test_reload_configuration(testbed_devices, conn_graph_facts): + """ + @summary: This test case is to reload the configuration and check platform status + """ + ans_host = testbed_devices["dut"] + interfaces = conn_graph_facts["device_conn"] + asic_type = ans_host.facts["asic_type"] + + logging.info("Reload configuration") + ans_host.command("sudo config reload -y") + + logging.info("Wait until all critical services are fully started") + check_critical_services(ans_host) + + logging.info("Wait some time for all the transceivers to be detected") + assert wait_until(300, 20, all_transceivers_detected, ans_host, interfaces), \ + "Not all transceivers are detected in 300 seconds" + + logging.info("Check interface status") + time.sleep(60) + check_interface_status(ans_host, interfaces) + + logging.info("Check transceiver status") + check_transceiver_basic(ans_host, interfaces) + + if asic_type in ["mellanox"]: + + current_file_dir = os.path.dirname(os.path.realpath(__file__)) + sub_folder_dir = os.path.join(current_file_dir, "mellanox") + if sub_folder_dir not in sys.path: + sys.path.append(sub_folder_dir) + from check_sysfs import check_sysfs + + logging.info("Check sysfs") + check_sysfs(ans_host, interfaces) diff --git a/tests/platform/test_sequential_restart.py b/tests/platform/test_sequential_restart.py new file mode 100644 index 00000000000..78f087689fb --- /dev/null +++ b/tests/platform/test_sequential_restart.py @@ -0,0 +1,73 @@ +""" +Check platform status after service is restarted + +This script is to cover the test case 'Sequential syncd/swss restart' in the SONiC platform test plan: +https://github.com/Azure/SONiC/blob/master/doc/pmon/sonic_platform_test_plan.md +""" +import logging +import re +import os +import time +import sys + +import pytest + +from platform_fixtures import conn_graph_facts +from common.utilities import wait_until +from check_critical_services import check_critical_services +from check_interface_status import check_interface_status +from check_transceiver_status import check_transceiver_basic +from check_transceiver_status import all_transceivers_detected + + +def restart_service_and_check(localhost, dut, service, interfaces): + """ + Restart specified service and check platform status + """ + + logging.info("Restart the %s service" % service) + dut.command("sudo systemctl restart %s" % service) + + logging.info("Wait until all critical services are fully started") + check_critical_services(dut) + + logging.info("Wait some time for all the transceivers to be detected") + assert wait_until(300, 20, all_transceivers_detected, dut, interfaces), \ + "Not all transceivers are detected in 300 seconds" + + logging.info("Check interface status") + time.sleep(60) + check_interface_status(dut, interfaces) + + logging.info("Check transceiver status") + check_transceiver_basic(dut, interfaces) + + if dut.facts["asic_type"] in ["mellanox"]: + + current_file_dir = os.path.dirname(os.path.realpath(__file__)) + sub_folder_dir = os.path.join(current_file_dir, "mellanox") + if sub_folder_dir not in sys.path: + sys.path.append(sub_folder_dir) + from check_sysfs import check_sysfs + + logging.info("Check sysfs") + check_sysfs(dut, interfaces) + + +def test_restart_swss(testbed_devices, conn_graph_facts): + """ + @summary: This test case is to restart the swss service and check platform status + """ + dut = testbed_devices["dut"] + localhost = testbed_devices["localhost"] + restart_service_and_check(localhost, dut, "swss", conn_graph_facts["device_conn"]) + + +@pytest.mark.skip(reason="Restarting syncd is not supported yet") +def test_restart_syncd(testbed_devices, conn_graph_facts): + """ + @summary: This test case is to restart the syncd service and check platform status + """ + dut = testbed_devices["dut"] + localhost = testbed_devices["localhost"] + restart_service_and_check(localhost, dut, "syncd", conn_graph_facts["device_conn"]) diff --git a/tests/platform/test_sfp.py b/tests/platform/test_sfp.py new file mode 100644 index 00000000000..bba52ad5473 --- /dev/null +++ b/tests/platform/test_sfp.py @@ -0,0 +1,166 @@ +""" +Check SFP status and configure SFP + +This script covers test case 'Check SFP status and configure SFP' in the SONiC platform test plan: +https://github.com/Azure/SONiC/blob/master/doc/pmon/sonic_platform_test_plan.md +""" +import logging +import re +import os +import time +import copy + +import pytest + +from platform_fixtures import conn_graph_facts + + +def parse_output(output_lines): + """ + @summary: For parsing command output. The output lines should have format 'key value'. + @param output_lines: Command output lines + @return: Returns result in a dictionary + """ + res = {} + for line in output_lines: + fields = line.split() + if len(fields) != 2: + continue + res[fields[0]] = fields[1] + return res + + +def parse_eeprom(output_lines): + """ + @summary: Parse the SFP eeprom information from command output + @param output_lines: Command output lines + @return: Returns result in a dictionary + """ + res = {} + for line in output_lines: + if re.match(r"^Ethernet\d+: .*", line): + fields = line.split(":") + res[fields[0]] = fields[1].strip() + return res + + +def test_check_sfp_status_and_configure_sfp(testbed_devices, conn_graph_facts): + """ + @summary: Check SFP status and configure SFP + + This case is to use the sfputil tool and show command to check SFP status and configure SFP. Currently the + only configuration is to reset SFP. Commands to be tested: + * sfputil show presence + * show interface transceiver presence + * sfputil show eeprom + * show interface transceiver eeprom + * sfputil reset + """ + + ans_host = testbed_devices["dut"] + + cmd_sfp_presence = "sudo sfputil show presence" + cmd_sfp_eeprom = "sudo sfputil show eeprom" + cmd_sfp_reset = "sudo sfputil reset" + cmd_xcvr_presence = "show interface transceiver presence" + cmd_xcvr_eeprom = "show interface transceiver eeprom" + + logging.info("Check output of '%s'" % cmd_sfp_presence) + sfp_presence = ans_host.command(cmd_sfp_presence) + parsed_presence = parse_output(sfp_presence["stdout_lines"][2:]) + for intf in conn_graph_facts["device_conn"]: + assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_sfp_presence + assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'" + + logging.info("Check output of '%s'" % cmd_xcvr_presence) + xcvr_presence = ans_host.command(cmd_xcvr_presence) + parsed_presence = parse_output(xcvr_presence["stdout_lines"][2:]) + for intf in conn_graph_facts["device_conn"]: + assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_xcvr_presence + assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'" + + logging.info("Check output of '%s'" % cmd_sfp_eeprom) + sfp_eeprom = ans_host.command(cmd_sfp_eeprom) + parsed_eeprom = parse_eeprom(sfp_eeprom["stdout_lines"]) + for intf in conn_graph_facts["device_conn"]: + assert intf in parsed_eeprom, "Interface is not in output of 'sfputil show eeprom'" + assert parsed_eeprom[intf] == "SFP EEPROM detected" + + logging.info("Check output of '%s'" % cmd_xcvr_eeprom) + xcvr_eeprom = ans_host.command(cmd_xcvr_eeprom) + parsed_eeprom = parse_eeprom(xcvr_eeprom["stdout_lines"]) + for intf in conn_graph_facts["device_conn"]: + assert intf in parsed_eeprom, "Interface is not in output of '%s'" % cmd_xcvr_eeprom + assert parsed_eeprom[intf] == "SFP EEPROM detected" + + logging.info("Test '%s '" % cmd_sfp_reset) + for intf in conn_graph_facts["device_conn"]: + reset_result = ans_host.command("%s %s" % (cmd_sfp_reset, intf)) + assert reset_result["rc"] == 0, "'%s %s' failed" % (cmd_sfp_reset, intf) + time.sleep(120) # Wait some time for SFP to fully recover after reset + + logging.info("Check sfp presence again after reset") + sfp_presence = ans_host.command(cmd_sfp_presence) + parsed_presence = parse_output(sfp_presence["stdout_lines"][2:]) + for intf in conn_graph_facts["device_conn"]: + assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_sfp_presence + assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'" + + +def test_check_sfp_low_power_mode(testbed_devices, conn_graph_facts): + """ + @summary: Check SFP low power mode + + This case is to use the sfputil tool command to check and set SFP low power mode + * sfputil show lpmode + * sfputil lpmode off + * sfputil lpmode on + """ + ans_host = testbed_devices["dut"] + + cmd_sfp_presence = "sudo sfputil show presence" + cmd_sfp_show_lpmode = "sudo sfputil show lpmode" + cmd_sfp_set_lpmode = "sudo sfputil lpmode" + + logging.info("Check output of '%s'" % cmd_sfp_show_lpmode) + lpmode_show = ans_host.command(cmd_sfp_show_lpmode) + parsed_lpmode = parse_output(lpmode_show["stdout_lines"][2:]) + original_lpmode = copy.deepcopy(parsed_lpmode) + for intf in conn_graph_facts["device_conn"]: + assert intf in parsed_lpmode, "Interface is not in output of '%s'" % cmd_sfp_show_lpmode + assert parsed_lpmode[intf].lower() == "on" or parsed_lpmode[intf].lower() == "off", "Unexpected SFP lpmode" + + logging.info("Try to change SFP lpmode") + for intf in conn_graph_facts["device_conn"]: + new_lpmode = "off" if original_lpmode[intf].lower() == "on" else "on" + lpmode_set_result = ans_host.command("%s %s %s" % (cmd_sfp_set_lpmode, new_lpmode, intf)) + assert lpmode_set_result["rc"] == 0, "'%s %s %s' failed" % (cmd_sfp_set_lpmode, new_lpmode, intf) + time.sleep(10) + + logging.info("Check SFP lower power mode again after changing SFP lpmode") + lpmode_show = ans_host.command(cmd_sfp_show_lpmode) + parsed_lpmode = parse_output(lpmode_show["stdout_lines"][2:]) + for intf in conn_graph_facts["device_conn"]: + assert intf in parsed_lpmode, "Interface is not in output of '%s'" % cmd_sfp_show_lpmode + assert parsed_lpmode[intf].lower() == "on" or parsed_lpmode[intf].lower() == "off", "Unexpected SFP lpmode" + + logging.info("Try to change SFP lpmode") + for intf in conn_graph_facts["device_conn"]: + new_lpmode = original_lpmode[intf].lower() + lpmode_set_result = ans_host.command("%s %s %s" % (cmd_sfp_set_lpmode, new_lpmode, intf)) + assert lpmode_set_result["rc"] == 0, "'%s %s %s' failed" % (cmd_sfp_set_lpmode, new_lpmode, intf) + time.sleep(10) + + logging.info("Check SFP lower power mode again after changing SFP lpmode") + lpmode_show = ans_host.command(cmd_sfp_show_lpmode) + parsed_lpmode = parse_output(lpmode_show["stdout_lines"][2:]) + for intf in conn_graph_facts["device_conn"]: + assert intf in parsed_lpmode, "Interface is not in output of '%s'" % cmd_sfp_show_lpmode + assert parsed_lpmode[intf].lower() == "on" or parsed_lpmode[intf].lower() == "off", "Unexpected SFP lpmode" + + logging.info("Check sfp presence again after setting lpmode") + sfp_presence = ans_host.command(cmd_sfp_presence) + parsed_presence = parse_output(sfp_presence["stdout_lines"][2:]) + for intf in conn_graph_facts["device_conn"]: + assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_sfp_presence + assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'" diff --git a/tests/platform/test_xcvr_info_in_db.py b/tests/platform/test_xcvr_info_in_db.py new file mode 100644 index 00000000000..264a0e78859 --- /dev/null +++ b/tests/platform/test_xcvr_info_in_db.py @@ -0,0 +1,22 @@ +""" +Check xcvrd information in DB + +This script is to cover the test case 'Check xcvrd information in DB' in the SONiC platform test plan: +https://github.com/Azure/SONiC/blob/master/doc/pmon/sonic_platform_test_plan.md +""" +import logging +import re +import os + +from check_transceiver_status import check_transceiver_status +from platform_fixtures import conn_graph_facts + + +def test_xcvr_info_in_db(testbed_devices, conn_graph_facts): + """ + @summary: This test case is to verify that xcvrd works as expected by checking transceiver information in DB + """ + dut = testbed_devices["dut"] + + logging.info("Check transceiver status") + check_transceiver_status(dut, conn_graph_facts["device_conn"]) diff --git a/tests/ptf_fixtures.py b/tests/ptf_fixtures.py new file mode 100644 index 00000000000..d6a21d341cb --- /dev/null +++ b/tests/ptf_fixtures.py @@ -0,0 +1,67 @@ +"""This module provides ptfadapter fixture to be used by tests to send/receive traffic via PTF ports""" + +import pytest + +from ptfadapter import PtfTestAdapter +from ansible_host import AnsibleHost + +DEFAULT_PTF_NN_PORT = 10900 +DEFAULT_DEVICE_NUM = 0 +ETH_PFX = 'eth' + + +def get_ifaces(netdev_output): + """ parse /proc/net/dev content + :param netdev_output: content of /proc/net/dev + :return: interface names list + """ + + ifaces = [] + for line in netdev_output.split('\n'): + # Skip a header + if ':' not in line: + continue + + iface = line.split(':')[0].strip() + + # Skip not FP interfaces + if ETH_PFX not in iface: + continue + + ifaces.append(iface) + + return ifaces + + +@pytest.fixture(scope='module') +def ptfadapter(ansible_adhoc, testbed): + """return ptf test adapter object. + The fixture is module scope, because usually there is not need to + restart PTF nn agent and reinitialize data plane thread on every + test class or test function/method. Session scope should also be Ok, + however if something goes really wrong in one test module it is safer + to restart PTF before proceeding running other test modules + """ + + ptfhost = AnsibleHost(ansible_adhoc, testbed['ptf']) + # get the eth interfaces from PTF and initialize ifaces_map + res = ptfhost.command('cat /proc/net/dev') + ifaces = get_ifaces(res['stdout']) + ifaces_map = {int(ifname.replace(ETH_PFX, '')): ifname for ifname in ifaces} + + # generate supervisor configuration for ptf_nn_agent + ptfhost.host.options['variable_manager'].extra_vars = { + 'device_num': DEFAULT_DEVICE_NUM, + 'ptf_nn_port': DEFAULT_PTF_NN_PORT, + 'ifaces_map': ifaces_map, + } + ptfhost.template(src='ptfadapter/templates/ptf_nn_agent.conf.ptf.j2', + dest='/etc/supervisor/conf.d/ptf_nn_agent.conf') + + # reread configuration and update supervisor + ptfhost.command('supervisorctl reread') + ptfhost.command('supervisorctl update') + + with PtfTestAdapter(testbed['ptf_ip'], DEFAULT_PTF_NN_PORT, 0, len(ifaces_map)) as adapter: + yield adapter + diff --git a/tests/ptf_runner.py b/tests/ptf_runner.py new file mode 100644 index 00000000000..5a3acca7912 --- /dev/null +++ b/tests/ptf_runner.py @@ -0,0 +1,22 @@ +def ptf_runner(host, testdir, testname, platform_dir, params={}, \ + platform="remote", qlen=0, relax=True, debug_level="info", log_file=None): + + ptf_test_params = ";".join(["{}=\"{}\"".format(k, v) for k, v in params.items()]) + + cmd = "ptf --test-dir {} {} --platform-dir {}".format(testdir, testname, platform_dir) + if qlen: + cmd += " --qlen={}".format(qlen) + if platform: + cmd += " --platform {}".format(platform) + if ptf_test_params: + cmd += " -t '{}'".format(ptf_test_params) + if relax: + cmd += " --relax" + if debug_level: + cmd += " --debug {}".format(debug_level) + if log_file: + cmd += " --log-file {}".format(log_file) + + res = host.shell(cmd, chdir="/root") + + diff --git a/tests/ptfadapter/README.md b/tests/ptfadapter/README.md new file mode 100644 index 00000000000..4a51cb15ecc --- /dev/null +++ b/tests/ptfadapter/README.md @@ -0,0 +1,47 @@ +# How to write traffic tests using PTF adapter + +## Overview + +```PtfTestAdapter``` provides an interface to send and receive traffic in the same way as ```ptf.base_tests.BaseTest``` object in PTF framework. +It makes use of ```ptf_nn_agent.py``` script running on PTF host, connectes to it over TCP and intialize PTF data plane thread. + +**NOTE** a good network connection between sonic-mgmt node and PTF host is requiered for traffic tests to be stable. + +## Usage in pytest + +You can use ```ptfadapter``` fixture which runs ```ptf_nn_agent.py``` on PTF and yields ```PtfTestAdapter``` object. + +Example test case code using PTF adapter: + +```python +import ptf.testutils as testutils +import ptf.mask as mask + +def test_some_traffic(ptfadapter): + pkt = testutils.simple_tcp_packet( + eth_dst=host_facts['ansible_Ethernet0']['macaddress'], + eth_src=ptfadapter.dataplane.get_mac(0, 0), + ip_src='1.1.1.1', + ip_dst='192.168.0.1', + ip_ttl=64, + tcp_sport=1234, + tcp_dport=4321) + + exp_pkt = pkt.copy() + exp_pkt = mask.Mask(exp_pkt) + exp_pkt.set_do_not_care_scapy(packet.Ether, 'dst') + exp_pkt.set_do_not_care_scapy(packet.Ether, 'src') + exp_pkt.set_do_not_care_scapy(packet.IP, 'ttl') + exp_pkt.set_do_not_care_scapy(packet.IP, 'chksum') + + testutils.send(ptfadapter, 5, pkt) + testutils.verify_packet_any_port(ptfadapter, exp_pkt, ports=[28, 29, 30, 31]) +``` + +If you have changed interface configuration on PTF host (like MAC address change) or you want to run PTF providing custom parameters you can use ```reinit``` method, e.g.: + +```python +def test_some_traffic(ptfadapter): + ptfadapter.reinit({'qlen': 1000}) + # rest of the test ... +``` \ No newline at end of file diff --git a/tests/ptfadapter/__init__.py b/tests/ptfadapter/__init__.py new file mode 100644 index 00000000000..c9eb27c8599 --- /dev/null +++ b/tests/ptfadapter/__init__.py @@ -0,0 +1,3 @@ +from ptfadapter import PtfTestAdapter + +__all__ = ['PtfTestAdapter'] diff --git a/tests/ptfadapter/ptfadapter.py b/tests/ptfadapter/ptfadapter.py new file mode 100644 index 00000000000..1c7c7b25953 --- /dev/null +++ b/tests/ptfadapter/ptfadapter.py @@ -0,0 +1,91 @@ +import ptf +from ptf.base_tests import BaseTest +from ptf.dataplane import DataPlane +import ptf.platforms.nn as nn +import ptf.ptfutils as ptfutils + + +class PtfTestAdapter(BaseTest): + """PtfTestAdapater class provides interface for pytest to use ptf.testutils functions """ + + DEFAULT_PTF_QUEUE_LEN = 100000 + DEFAULT_PTF_TIMEOUT = 2 + DEFAULT_PTF_NEG_TIMEOUT = 0.1 + + def __init__(self, ptf_ip, ptf_nn_port, device_num, ptf_ports_num): + """ initialize PtfTestAdapter + :param ptf_ip: PTF host IP + :param ptf_nn_port: PTF nanomessage agent port + :param device_num: device number + :param ptf_ports_num: PTF ports count + :return: + """ + self.runTest = lambda : None # set a no op runTest attribute to satisfy BaseTest interface + super(PtfTestAdapter, self).__init__() + self._init_ptf_dataplane(ptf_ip, ptf_nn_port, device_num, ptf_ports_num) + + def __enter__(self): + """ enter in 'with' block """ + + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ exit from 'with' block """ + + self.kill() + + def _init_ptf_dataplane(self, ptf_ip, ptf_nn_port, device_num, ptf_ports_num, ptf_config=None): + """ + initialize ptf framework and establish connection to ptf_nn_agent + running on PTF host + :param ptf_ip: PTF host IP + :param ptf_nn_port: PTF nanomessage agent port + :param device_num: device number + :param ptf_ports_num: PTF ports count + :return: + """ + self.ptf_ip = ptf_ip + self.ptf_nn_port = ptf_nn_port + self.device_num = device_num + self.ptf_ports_num = ptf_ports_num + + ptfutils.default_timeout = self.DEFAULT_PTF_TIMEOUT + ptfutils.default_negative_timeout = self.DEFAULT_PTF_NEG_TIMEOUT + + ptf.config.update({ + 'platform': 'nn', + 'device_sockets': [ + (device_num, range(ptf_ports_num), 'tcp://{}:{}'.format(ptf_ip, ptf_nn_port)) + ], + 'qlen': self.DEFAULT_PTF_QUEUE_LEN, + 'relax': True, + }) + if ptf_config is not None: + ptf.config.update(ptf_config) + + # update ptf.config based on NN platform and create dataplane instance + nn.platform_config_update(ptf.config) + ptf.dataplane_instance = DataPlane(config=ptf.config) + + # TODO: in case of multi PTF hosts topologies we'll have to provide custom platform that supports that + # and initialize port_map specifying mapping between tcp://: and port tuple (device_id, port_id) + for id, ifname in ptf.config['port_map'].items(): + device_id, port_id = id + ptf.dataplane_instance.port_add(ifname, device_id, port_id) + + self.dataplane = ptf.dataplane_instance + + def kill(self): + """ kill data plane thread """ + self.dataplane.kill() + + def reinit(self, ptf_config=None): + """ reinitialize ptf data plane thread. + In case if test changes PTF host network configuration (like MAC change on interfaces) + reinit() method has to be called to restart data plane thread. + Also if test wants to restart PTF data plane specifying non-default PTF configuration + :param ptf_config: PTF configuration dictionary + """ + self.kill() + self._init_ptf_dataplane(self.ptf_ip, self.ptf_nn_port, self.device_num, self.ptf_ports_num, ptf_config) + diff --git a/tests/ptfadapter/templates/ptf_nn_agent.conf.ptf.j2 b/tests/ptfadapter/templates/ptf_nn_agent.conf.ptf.j2 new file mode 100644 index 00000000000..d0a4ffbe5df --- /dev/null +++ b/tests/ptfadapter/templates/ptf_nn_agent.conf.ptf.j2 @@ -0,0 +1,11 @@ +[program:ptf_nn_agent] +command=/usr/bin/python /opt/ptf_nn_agent.py --device-socket {{ device_num }}@tcp://0.0.0.0:{{ ptf_nn_port }} {% for id in ifaces_map -%} -i {{ device_num }}-{{ id }}@{{ ifaces_map[id] }} {% endfor %} + +process_name=ptf_nn_agent +stdout_logfile=/tmp/ptf_nn_agent.out.log +stderr_logfile=/tmp/ptf_nn_agent.err.log +redirect_stderr=false +autostart=true +autorestart=true +startsecs=1 +numprocs=1 \ No newline at end of file diff --git a/tests/ptftests b/tests/ptftests new file mode 100644 index 00000000000..bc19c3dcc38 --- /dev/null +++ b/tests/ptftests @@ -0,0 +1 @@ +../ansible/roles/test/files/ptftests \ No newline at end of file diff --git a/tests/scripts/arp_responder.conf.j2 b/tests/scripts/arp_responder.conf.j2 new file mode 100644 index 00000000000..7d6dcb3062d --- /dev/null +++ b/tests/scripts/arp_responder.conf.j2 @@ -0,0 +1,10 @@ +[program:arp_responder] +command=/usr/bin/python /opt/arp_responder.py {{ arp_responder_args }} +process_name=arp_responder +stdout_logfile=/tmp/arp_responder.out.log +stderr_logfile=/tmp/arp_responder.err.log +redirect_stderr=false +autostart=false +autorestart=true +startsecs=1 +numprocs=1 diff --git a/tests/scripts/arp_responder.py b/tests/scripts/arp_responder.py new file mode 100644 index 00000000000..02e41847f8c --- /dev/null +++ b/tests/scripts/arp_responder.py @@ -0,0 +1,171 @@ +import binascii +import socket +import struct +import select +import json +import argparse +import os.path +from fcntl import ioctl +from pprint import pprint + + +def hexdump(data): + print " ".join("%02x" % ord(d) for d in data) + +def get_if(iff, cmd): + s = socket.socket() + ifreq = ioctl(s, cmd, struct.pack("16s16x",iff)) + s.close() + + return ifreq + +def get_mac(iff): + SIOCGIFHWADDR = 0x8927 # Get hardware address + return get_if(iff, SIOCGIFHWADDR)[18:24] + + +class Interface(object): + ETH_P_ALL = 0x03 + RCV_TIMEOUT = 1000 + RCV_SIZE = 4096 + + def __init__(self, iface): + self.iface = iface + self.socket = None + self.mac_address = get_mac(iface) + + def __del__(self): + if self.socket: + self.socket.close() + + def bind(self): + self.socket = socket.socket(socket.AF_PACKET, socket.SOCK_RAW, socket.htons(self.ETH_P_ALL)) + self.socket.bind((self.iface, 0)) + self.socket.settimeout(self.RCV_TIMEOUT) + + def handler(self): + return self.socket.fileno() + + def recv(self): + return self.socket.recv(self.RCV_SIZE) + + def send(self, data): + self.socket.send(data) + + def mac(self): + return self.mac_address + + def name(self): + return self.iface + + +class Poller(object): + def __init__(self, interfaces, responder): + self.responder = responder + self.mapping = {} + for interface in interfaces: + self.mapping[interface.handler()] = interface + + def poll(self): + handlers = self.mapping.keys() + while True: + (rdlist, _, _) = select.select(handlers, [], []) + for handler in rdlist: + self.responder.action(self.mapping[handler]) + + +class ARPResponder(object): + ARP_PKT_LEN = 60 + def __init__(self, ip_sets): + self.arp_chunk = binascii.unhexlify('08060001080006040002') # defines a part of the packet for ARP Reply + self.arp_pad = binascii.unhexlify('00' * 18) + + self.ip_sets = ip_sets + + return + + def action(self, interface): + data = interface.recv() + if len(data) >= self.ARP_PKT_LEN: + return + + remote_mac, remote_ip, request_ip = self.extract_arp_info(data) + + request_ip_str = socket.inet_ntoa(request_ip) + if request_ip_str not in self.ip_sets[interface.name()]: + return + + if 'vlan' in self.ip_sets[interface.name()]: + vlan_id = self.ip_sets[interface.name()]['vlan'] + else: + vlan_id = None + + arp_reply = self.generate_arp_reply(self.ip_sets[interface.name()][request_ip_str], remote_mac, request_ip, remote_ip, vlan_id) + interface.send(arp_reply) + + return + + def extract_arp_info(self, data): + return data[6:12], data[28:32], data[38:42] # remote_mac, remote_ip, request_ip + + def generate_arp_reply(self, local_mac, remote_mac, local_ip, remote_ip, vlan_id): + eth_hdr = remote_mac + local_mac + if vlan_id is not None: + eth_type = binascii.unhexlify('8100') + eth_hdr += eth_type + vlan_id + + return eth_hdr + self.arp_chunk + local_mac + local_ip + remote_mac + remote_ip + self.arp_pad + +def parse_args(): + parser = argparse.ArgumentParser(description='ARP autoresponder') + parser.add_argument('--conf', '-c', type=str, dest='conf', default='/tmp/from_t1.json', help='path to json file with configuration') + parser.add_argument('--extended', '-e', action='store_true', dest='extended', default=False, help='enable extended mode') + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + if not os.path.exists(args.conf): + print "Can't find file %s" % args.conf + return + + with open(args.conf) as fp: + data = json.load(fp) + + # generate ip_sets. every ip address will have it's own uniq mac address + ip_sets = {} + counter = 0 + for iface, ip_dict in data.items(): + vlan = None + if iface.find('@') != -1: + iface, vlan = iface.split('@') + vlan_tag = format(int(vlan), 'x') + vlan_tag = vlan_tag.zfill(4) + ip_sets[str(iface)] = {} + if args.extended: + for ip, mac in ip_dict.items(): + ip_sets[str(iface)][str(ip)] = binascii.unhexlify(str(mac)) + counter += 1 + else: + for ip in ip_dict: + ip_sets[str(iface)][str(ip)] = get_mac(str(iface)) + if vlan is not None: + ip_sets[str(iface)]['vlan'] = binascii.unhexlify(vlan_tag) + + ifaces = [] + for iface_name in ip_sets.keys(): + iface = Interface(iface_name) + iface.bind() + ifaces.append(iface) + + resp = ARPResponder(ip_sets) + + p = Poller(ifaces, resp) + p.poll() + + return + +if __name__ == '__main__': + main() diff --git a/tests/scripts/change_mac.sh b/tests/scripts/change_mac.sh new file mode 100644 index 00000000000..7845f3f2395 --- /dev/null +++ b/tests/scripts/change_mac.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -e + +for INTF in $(ip -br link show | grep 'eth' | awk '{sub(/@.*/,"",$1); print $1}'); do + ADDR="$(ip -br link show dev ${INTF} | awk '{print $3}')" + PREFIX="$(cut -c1-15 <<< ${ADDR})" + SUFFIX="$(printf "%02x" ${INTF##eth})" + MAC="${PREFIX}${SUFFIX}" + + echo "Update ${INTF} MAC address: ${ADDR}->$MAC" + ip link set dev ${INTF} address ${MAC} +done diff --git a/tests/scripts/remove_ip.sh b/tests/scripts/remove_ip.sh new file mode 100644 index 00000000000..5ccc4b3a066 --- /dev/null +++ b/tests/scripts/remove_ip.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -e + +for i in `cat /proc/net/dev | grep eth | awk -F'eth|:' '{print $2}'`; do + ip address flush dev eth$i +done diff --git a/tests/setup.cfg b/tests/setup.cfg new file mode 100644 index 00000000000..6d6d72b613d --- /dev/null +++ b/tests/setup.cfg @@ -0,0 +1,2 @@ +[tool:pytest] +norecursedirs = ptftests diff --git a/tests/test_bgp_fact.py b/tests/test_bgp_fact.py new file mode 100644 index 00000000000..5570b12f130 --- /dev/null +++ b/tests/test_bgp_fact.py @@ -0,0 +1,22 @@ +from ansible_host import AnsibleHost + +def test_bgp_facts(ansible_adhoc, testbed): + """compare the bgp facts between observed states and target state""" + + hostname = testbed['dut'] + ans_host = AnsibleHost(ansible_adhoc, hostname) + + bgp_facts = ans_host.bgp_facts()['ansible_facts'] + mg_facts = ans_host.minigraph_facts(host=hostname)['ansible_facts'] + + for k, v in bgp_facts['bgp_neighbors'].items(): + # Verify bgp sessions are established + assert v['state'] == 'established' + # Verify locat ASNs in bgp sessions + assert v['local AS'] == mg_facts['minigraph_bgp_asn'] + + for v in mg_facts['minigraph_bgp']: + # Compare the bgp neighbors name with minigraph bgp neigbhors name + assert v['name'] == bgp_facts['bgp_neighbors'][v['addr'].lower()]['description'] + # Compare the bgp neighbors ASN with minigraph + assert v['asn'] == bgp_facts['bgp_neighbors'][v['addr'].lower()]['remote AS'] diff --git a/tests/test_bgp_speaker.py b/tests/test_bgp_speaker.py new file mode 100644 index 00000000000..7e72b4183a2 --- /dev/null +++ b/tests/test_bgp_speaker.py @@ -0,0 +1,156 @@ +import pytest +from netaddr import * +import sys +import time +import ipaddress +from ansible_host import AnsibleHost +from ptf_runner import ptf_runner + +def generate_ips(num, prefix, exclude_ips): + """ + Generate random ips within prefix + """ + prefix = IPNetwork(prefix) + exclude_ips.append(prefix.broadcast) + exclude_ips.append(prefix.network) + available_ips = list(prefix) + + if len(available_ips) - len(exclude_ips)< num: + raise Exception("Not enough available IPs") + + generated_ips = [] + for available_ip in available_ips: + if available_ip not in exclude_ips: + generated_ips.append(IPNetwork(str(available_ip) + '/' + str(prefix.prefixlen))) + if len(generated_ips) == num: + break + + return generated_ips + +@pytest.mark.skip(reason='test is broken') +def test_bgp_speaker(localhost, ansible_adhoc, testbed): + """setup bgp speaker on T0 topology and verify routes advertised + by bgp speaker is received by T0 TOR + """ + + hostname = testbed['dut'] + ptf_hostname = testbed['ptf'] + host = AnsibleHost(ansible_adhoc, hostname) + ptfhost = AnsibleHost(ansible_adhoc, ptf_hostname) + + mg_facts = host.minigraph_facts(host=hostname)['ansible_facts'] + host_facts = host.setup()['ansible_facts'] + + res = host.shell("sonic-cfggen -m -d -y /etc/sonic/deployment_id_asn_map.yml -v \"deployment_id_asn_map[DEVICE_METADATA['localhost']['deployment_id']]\"") + bgp_speaker_asn = res['stdout'] + + vlan_ips = generate_ips(3, \ + "%s/%s" % (mg_facts['minigraph_vlan_interfaces'][0]['addr'], mg_facts['minigraph_vlan_interfaces'][0]['prefixlen']), + [IPAddress(mg_facts['minigraph_vlan_interfaces'][0]['addr'])]) + + # three speaker ips, two from peer range, another is vlan ip [0] + speaker_ips = generate_ips(2, mg_facts['minigraph_bgp_peers_with_range'][0]['ip_range'][0], []) + speaker_ips.append(vlan_ips[0]) + + for ip in vlan_ips: + host.command("ip route flush %s/32" % ip.ip) + host.command("ip route add %s/32 dev %s" % (ip.ip, mg_facts['minigraph_vlan_interfaces'][0]['attachto'])) + + root_dir = "/root" + exabgp_dir = "/root/exabgp" + helper_dir = "/root/helpers" + port_num = [5000, 6000, 7000] + cfnames = ["config_1.ini", "config_2.ini", "config_3.ini"] + vlan_ports = [] + for i in range(0, 3): + vlan_ports.append(mg_facts['minigraph_port_indices'][mg_facts['minigraph_vlans'][mg_facts['minigraph_vlan_interfaces'][0]['attachto']]['members'][i]]) + + ptfhost.file(path=exabgp_dir, state="directory") + ptfhost.file(path=helper_dir, state="directory") + ptfhost.copy(src="bgp_speaker/dump.py", dest=helper_dir) + ptfhost.copy(src="bgp_speaker/http_api.py", dest=helper_dir) + ptfhost.copy(src="bgp_speaker/announce_routes.py", dest=helper_dir) + + # deploy config file + extra_vars = \ + { 'helper_dir': helper_dir, + 'exabgp_dir': exabgp_dir, + 'lo_addr' : mg_facts['minigraph_lo_interfaces'][0]['addr'], + 'lo_addr_prefixlen' : mg_facts['minigraph_lo_interfaces'][0]['prefixlen'], + 'vlan_addr' : mg_facts['minigraph_vlan_interfaces'][0]['addr'], + 'peer_range': mg_facts['minigraph_bgp_peers_with_range'][0]['ip_range'][0], + 'announce_prefix': '10.10.10.0/26', + 'minigraph_portchannels' : mg_facts['minigraph_portchannels'], + 'minigraph_vlans' : mg_facts['minigraph_vlans'], + 'minigraph_port_indices' : mg_facts['minigraph_port_indices'], + 'peer_asn' : mg_facts['minigraph_bgp_asn'], + 'peer_asn' : mg_facts['minigraph_bgp_asn'], + 'my_asn' : bgp_speaker_asn, + 'vlan_ports' : vlan_ports, + 'port_num' : port_num, + 'speaker_ips': [str(ip) for ip in speaker_ips], + 'vlan_ips': [str(ip) for ip in vlan_ips], + 'cfnames': cfnames } + + for i in range(0, 3): + extra_vars.update({ 'cidx':i }) + extra_vars.update({ 'speaker_ip': str(speaker_ips[i].ip) }) + ptfhost.host.options['variable_manager'].extra_vars = extra_vars + ptfhost.template(src="bgp_speaker/config.j2", dest="%s/%s" % (exabgp_dir, cfnames[i])) + + # deploy routes + ptfhost.template(src="bgp_speaker/routes.j2", dest="%s/%s" % (exabgp_dir, "routes")) + + # deploy start script + ptfhost.template(src="bgp_speaker/start.j2", dest="%s/%s" % (exabgp_dir, "start.sh"), mode="u+rwx") + # kill exabgp + res = ptfhost.shell("pkill exabgp || true") + print res + + # start exabgp instance + res = ptfhost.shell("bash %s/start.sh" % exabgp_dir) + print res + + time.sleep(10) + + # announce route + res = ptfhost.shell("nohup python %s/announce_routes.py %s/routes >/dev/null 2>&1 &" % (helper_dir, exabgp_dir)) + print res + + # make sure routes announced to dynamic bgp neighbors + time.sleep(60) + + bgp_facts = host.bgp_facts()['ansible_facts'] + + # Verify bgp sessions are established + for k, v in bgp_facts['bgp_neighbors'].items(): + assert v['state'] == 'established' + + # Verify accepted prefixes of the dynamic neighbors are correct + for ip in speaker_ips: + assert bgp_facts['bgp_neighbors'][str(ip.ip)]['accepted prefixes'] == 1 + assert bgp_facts['bgp_neighbors'][str(vlan_ips[0].ip)]['accepted prefixes'] == 1 + + + # Generate route-port map information + ptfhost.template(src="bgp_speaker/bgp_speaker_route.j2", dest="/root/bgp_speaker_route.txt") + + ptfhost.copy(src="ptftests", dest=root_dir) + + ptf_runner(ptfhost, \ + "ptftests", + "fib_test.FibTest", + platform_dir="ptftests", + params={"testbed_type": "t0", + "router_mac": host_facts['ansible_Ethernet0']['macaddress'], + "fib_info": "/root/bgp_speaker_route.txt", + "ipv4": True, + "ipv6": False }, + log_file="/tmp/bgp_speaker_test.FibTest.log") + + res = ptfhost.shell("pkill exabgp || true") + + for ip in vlan_ips: + host.command("ip route flush %s/32" % ip.ip) + + # ptfhost.shell("ip addr flush dev eth{{ '%d' % (minigraph_vlans[minigraph_vlan_interfaces[0]['attachto']]['members'][0] | replace("Ethernet", "") | int / 4)}} diff --git a/tests/test_lldp.py b/tests/test_lldp.py new file mode 100644 index 00000000000..fea40465033 --- /dev/null +++ b/tests/test_lldp.py @@ -0,0 +1,62 @@ +from ansible_host import AnsibleHost + + +def test_lldp(localhost, ansible_adhoc, testbed): + """ verify the LLDP message on DUT """ + + hostname = testbed['dut'] + ans_host = AnsibleHost(ansible_adhoc, hostname) + + mg_facts = ans_host.minigraph_facts(host=hostname)['ansible_facts'] + lldp_facts = ans_host.lldp()['ansible_facts'] + + minigraph_lldp_nei = {} + for k, v in mg_facts['minigraph_neighbors'].items(): + if 'server' not in v['name'].lower(): + minigraph_lldp_nei[k] = v + + # Verify LLDP information is available on most interfaces + assert len(lldp_facts['lldp']) > len(minigraph_lldp_nei) * 0.8 + + for k, v in lldp_facts['lldp'].items(): + if k == 'eth0': + continue + # Compare the LLDP neighbor name with minigraph neigbhor name (exclude the management port) + assert v['chassis']['name'] == minigraph_lldp_nei[k]['name'] + # Compare the LLDP neighbor interface with minigraph neigbhor interface (exclude the management port) + assert v['port']['ifname'] == mg_facts['minigraph_neighbors'][k]['port'] + + +def test_lldp_neighbor(localhost, ansible_adhoc, testbed, eos): + """ verify LLDP information on neighbors """ + + hostname = testbed['dut'] + ans_host = AnsibleHost(ansible_adhoc, hostname) + mg_facts = ans_host.minigraph_facts(host=hostname)['ansible_facts'] + res = ans_host.shell("docker exec -i lldp lldpcli show chassis | grep \"SysDescr:\" | sed -e 's/^\\s*SysDescr:\\s*//g'") + dut_system_description = res['stdout'] + lldp_facts = ans_host.lldp()['ansible_facts'] + host_facts = ans_host.setup()['ansible_facts'] + lhost = AnsibleHost(ansible_adhoc, 'localhost', True) + + for k, v in lldp_facts['lldp'].items(): + if k == 'eth0': + # skip test on management interface + continue + + hostip = v['chassis']['mgmt-ip'] + nei_lldp_facts = lhost.lldp_facts(host=hostip, version='v2c', community=eos['snmp_rocommunity'])['ansible_facts'] + print nei_lldp_facts + neighbor_interface = v['port']['ifname'] + # Verify the published DUT system name field is correct + assert nei_lldp_facts['ansible_lldp_facts'][neighbor_interface]['neighbor_sys_name'] == hostname + # Verify the published DUT chassis id field is not empty + assert nei_lldp_facts['ansible_lldp_facts'][neighbor_interface]['neighbor_chassis_id'] == \ + "0x%s" % (host_facts['ansible_eth0']['macaddress'].replace(':', '')) + # Verify the published DUT system description field is correct + assert nei_lldp_facts['ansible_lldp_facts'][neighbor_interface]['neighbor_sys_desc'] == dut_system_description + # Verify the published DUT port id field is correct + assert nei_lldp_facts['ansible_lldp_facts'][neighbor_interface]['neighbor_port_id'] == mg_facts['minigraph_ports'][k]['alias'] + # Verify the published DUT port description field is correct + assert nei_lldp_facts['ansible_lldp_facts'][neighbor_interface]['neighbor_port_desc'] == \ + "%s:%s" % (mg_facts['minigraph_neighbors'][k]['name'], mg_facts['minigraph_neighbors'][k]['port']) diff --git a/tests/veos.vtb b/tests/veos.vtb new file mode 100644 index 00000000000..54527ea9b88 --- /dev/null +++ b/tests/veos.vtb @@ -0,0 +1,35 @@ +[vm_host_1] +STR-ACS-VSERV-01 ansible_host=172.17.0.1 ansible_user=lgh + +[vm_host:children] +vm_host_1 + +[vms_1] +VM0100 ansible_host=10.250.0.51 +VM0101 ansible_host=10.250.0.52 +VM0102 ansible_host=10.250.0.53 +VM0103 ansible_host=10.250.0.54 + + +[eos:children] +vms_1 + +## The groups below are helper to limit running playbooks to server_1, server_2 or server_3 only +[server_1:children] +vm_host_1 +vms_1 + +[server_1:vars] +host_var_file=host_vars/STR-ACS-VSERV-01.yml + +[servers:children] +server_1 + +[servers:vars] +topologies=['t1', 't1-lag', 't1-64-lag', 't0', 't0-16', 't0-56', 't0-52', 'ptf32', 'ptf64', 't0-64', 't0-64-32', 't0-116'] + +[sonic] +vlab-01 ansible_host=10.250.0.101 type=kvm hwsku=Force10-S6000 ansible_password=password ansible_user=admin +vlab-02 ansible_host=10.250.0.102 type=kvm hwsku=Force10-S6100 ansible_password=password + +ptf-01 ansible_host=10.250.0.102 ansible_user=root ansible_password=root diff --git a/tests/vtestbed.csv b/tests/vtestbed.csv new file mode 100644 index 00000000000..dd5549cf297 --- /dev/null +++ b/tests/vtestbed.csv @@ -0,0 +1,3 @@ +# conf-name,group-name,topo,ptf_image_name,ptf,ptf_ip,server,vm_base,dut,comment +vms-kvm-t0,vms6-1,t0,docker-ptf-brcm,ptf-01,10.250.0.102/24,server_1,VM0100,vlab-01,Tests virtual switch vm +vms-kvm-t0-64,vms6-1,t0-64,docker-ptf-brcm,ptf-01,10.250.0.102/24,server_1,VM0100,vlab-02,Tests virtual switch vm From fd6239eccbca998cc0d2b690e1380f79c0bbbc07 Mon Sep 17 00:00:00 2001 From: Sumukha Tumkur Vani Date: Tue, 10 Sep 2019 09:20:58 -0700 Subject: [PATCH 094/218] Merge pull request #1105 from sumukhatv/m0-nightly Adding Celestica E1031 SKU and related changes to testcases (Phase 1) --- ansible/group_vars/sonic/vars | 2 +- ansible/library/minigraph_facts.py | 3 + ansible/roles/eos/templates/t0-52-leaf.j2 | 187 +++++++++++++++++- .../test/files/ptftests/IP_decap_test.py | 2 + ansible/roles/test/files/ptftests/fib_test.py | 2 + ansible/roles/test/tasks/crm.yml | 4 +- ansible/roles/test/tasks/decap.yml | 2 +- ansible/roles/test/tasks/fdb.yml | 2 +- ansible/roles/test/tasks/shared-fib.yml | 2 +- ansible/roles/test/templates/fib.j2 | 6 +- ansible/roles/test/vars/testcases.yml | 18 +- 11 files changed, 213 insertions(+), 17 deletions(-) mode change 120000 => 100644 ansible/roles/eos/templates/t0-52-leaf.j2 diff --git a/ansible/group_vars/sonic/vars b/ansible/group_vars/sonic/vars index 86ab98c0e28..cbd819c150b 100644 --- a/ansible/group_vars/sonic/vars +++ b/ansible/group_vars/sonic/vars @@ -2,7 +2,7 @@ ansible_ssh_user: admin sonic_version: "v2" -broadcom_hwskus: [ "Force10-S6000", "Accton-AS7712-32X", "Celestica-DX010-C32", "Seastone-DX010" ] +broadcom_hwskus: [ "Force10-S6000", "Accton-AS7712-32X", "Celestica-DX010-C32", "Seastone-DX010", "Celestica-E1031-T48S4"] mellanox_hwskus: [ 'ACS-MSN2700', 'Mellanox-SN2700', 'ACS-MSN2740', 'ACS-MSN2100', 'ACS-MSN2410', 'ACS-MSN2010' ] diff --git a/ansible/library/minigraph_facts.py b/ansible/library/minigraph_facts.py index 18b28e242a2..434b20d9dd9 100644 --- a/ansible/library/minigraph_facts.py +++ b/ansible/library/minigraph_facts.py @@ -528,6 +528,9 @@ def parse_xml(filename, hostname): elif hwsku == "Seastone-DX010": for i in range(1, 33): port_alias_map["Eth%d" % i] = "Ethernet%d" % ((i - 1) * 4) + elif hwsku == "Celestica-E1031-T48S4": + for i in range(1, 53): + port_alias_map["etp%d" % i] = "Ethernet%d" % ((i - 1)) else: for i in range(0, 128, 4): port_alias_map["Ethernet%d" % i] = "Ethernet%d" % i diff --git a/ansible/roles/eos/templates/t0-52-leaf.j2 b/ansible/roles/eos/templates/t0-52-leaf.j2 deleted file mode 120000 index 8430cb1debd..00000000000 --- a/ansible/roles/eos/templates/t0-52-leaf.j2 +++ /dev/null @@ -1 +0,0 @@ -t0-leaf.j2 \ No newline at end of file diff --git a/ansible/roles/eos/templates/t0-52-leaf.j2 b/ansible/roles/eos/templates/t0-52-leaf.j2 new file mode 100644 index 00000000000..0f1050964ff --- /dev/null +++ b/ansible/roles/eos/templates/t0-52-leaf.j2 @@ -0,0 +1,186 @@ +{% set host = configuration[hostname] %} +{% set mgmt_ip = ansible_host %} +no schedule tech-support +! +hostname {{ hostname }} +! +vrf definition MGMT + rd 1:1 +! +spanning-tree mode mstp +! +aaa root secret 0 123456 +! +username admin privilege 15 role network-admin secret 0 123456 +! +clock timezone UTC +! +lldp run +lldp management-address Management1 +lldp management-address vrf MGMT +! +snmp-server community {{ snmp_rocommunity }} ro +snmp-server vrf MGMT +! +ip routing +ip routing vrf MGMT +ipv6 unicast-routing +! +{% if vm_mgmt_gw is defined %} +ip route vrf MGMT 0.0.0.0/0 {{ vm_mgmt_gw }} +{% else %} +ip route vrf MGMT 0.0.0.0/0 {{ mgmt_gw }} +{% endif %} +! +route-map DEFAULT_ROUTES permit +! +{# #} +{# NOTE: Using large enough values (e.g., podset_number = 200, #} +{# us to overflow the 192.168.0.0/16 private address space here. #} +{# This should be fine for internal use, but may pose an issue if used otherwise #} +{# #} +{% for podset in range(0, (props.podset_number // 10) | int) %} +{% for tor in range(0, props.tor_number) %} +{% for subnet in range(0, props.tor_subnet_number) %} +{# Skip tor 0 podset 0 #} +{% if podset != 0 or tor != 0 %} +{% set suffix = ( (podset * props.tor_number * props.max_tor_subnet_number * props.tor_subnet_size) + + (tor * props.max_tor_subnet_number * props.tor_subnet_size) + + (subnet * props.tor_subnet_size) ) %} +{% set octet2 = (168 + (suffix // (256 ** 2))) %} +{% set octet1 = (192 + (octet2 // 256)) %} +{% set octet2 = (octet2 % 256) %} +{% set octet3 = ((suffix // 256) % 256) %} +{% set octet4 = (suffix % 256) %} +{% set prefixlen_v4 = (32 - ((props.tor_subnet_size | log(2))) | int) %} +ip route {{ octet1 }}.{{ octet2 }}.{{ octet3 }}.{{ octet4 }}/{{ prefixlen_v4 }} {{ props.nhipv4 }} +ipv6 route {{ '20%02x' % octet1 }}:{{ '%02X%02X' % (octet2, octet3) }}:0:{{ '%02X' % octet4 }}::/64 {{ props.nhipv6 }} +{% endif %} +{% endfor %} +{% endfor %} +{% endfor %} +! +{% for podset in range(0, (props.podset_number // 10) | int) %} +{% for tor in range(0, props.tor_number) %} +{# Skip tor 0 podset 0 #} +{% if podset != 0 or tor != 0 %} +{% set suffix = ( (podset * props.tor_number * props.max_tor_subnet_number * props.tor_subnet_size) + + (tor * props.max_tor_subnet_number * props.tor_subnet_size) ) %} +{% set octet2 = (168 + (suffix // (256 ** 2))) %} +{% set octet1 = (192 + (octet2 // 256)) %} +{% set octet2 = (octet2 % 256) %} +{% set octet3 = ((suffix // 256) % 256) %} +{% set octet4 = (suffix % 256) %} +{% set prefixlen_v4 = (32 - (((props.max_tor_subnet_number * props.tor_subnet_size) | log(2)) | int) ) %} +{% set prefixlen_v6 = (64 - (((props.max_tor_subnet_number * props.tor_subnet_size) | log(2)) | int) ) %} +ip prefix-list test_ipv4_{{ podset}}_{{ tor }} seq 10 permit {{ octet1 }}.{{ octet2 }}.{{ octet3 }}.{{ octet4 }}/{{ prefixlen_v4 }} ge {{ prefixlen_v4 }} +ipv6 prefix-list test_ipv6_{{ podset}}_{{ tor }} + seq 10 permit {{ '20%02x' % octet1 }}:{{ '%02X%02X' % (octet2, octet3) }}:0:{{ '%02X' % octet4 }}::/{{ prefixlen_v6 }} ge {{ prefixlen_v6 }} +exit +{% endif %} +{% endfor %} +{% endfor %} +! +interface Management 1 + description TO LAB MGMT SWITCH + vrf forwarding MGMT + ip address {{ mgmt_ip }}/{{ mgmt_prefixlen }} + no shutdown +! +{% for name, iface in host['interfaces'].items() %} +interface {{ name }} +{% if name.startswith('Loopback') %} + description LOOPBACK +{% else %} + no switchport +{% endif %} +{% if name.startswith('Port-Channel') %} + port-channel min-links 1 +{% endif %} +{% if iface['lacp'] is defined %} + channel-group {{ iface['lacp'] }} mode active + lacp rate normal +{% endif %} +{% if iface['ipv4'] is defined %} + ip address {{ iface['ipv4'] }} +{% endif %} +{% if iface['ipv6'] is defined %} + ipv6 enable + ipv6 address {{ iface['ipv6'] }} + ipv6 nd ra suppress +{% endif %} + no shutdown +! +{% endfor %} +! +interface {{ bp_ifname }} + description backplane + no switchport +{% if host['bp_interface']['ipv4'] is defined %} + ip address {{ host['bp_interface']['ipv4'] }} +{% endif %} +{% if host['bp_interface']['ipv6'] is defined %} + ipv6 enable + ipv6 address {{ host['bp_interface']['ipv6'] }} + ipv6 nd ra suppress +{% endif %} + no shutdown +! +{% for podset in range(0, props.podset_number) %} +{% if range(0, 1000)|random() >= props.failure_rate %} +{% for tor in range(0, props.tor_number) %} +{% set leafasn = props.leaf_asn_start + podset %} +{% set torasn = props.tor_asn_start + tor %} +route-map PREPENDAS permit {{ 2 * (podset * props.tor_number + tor + 1) }} + match ip address prefix-list test_ipv4_{{ podset }}_{{ tor }} +{% if podset == 0 %} + set as-path prepend {{ torasn }} +{% else %} + set as-path prepend {{ props.spine_asn }} {{ leafasn }} {{ torasn }} +{% endif %} +! +route-map PREPENDAS permit {{ 2 * (podset * props.tor_number + tor + 1) + 1 }} + match ipv6 address prefix-list test_ipv6_{{ podset }}_{{ tor }} +{% if podset == 0 %} + set as-path prepend {{ torasn }} +{% else %} + set as-path prepend {{ props.spine_asn }} {{ leafasn }} {{ torasn }} +{% endif %} +! +{% endfor %} +{% endif %} +{% endfor %} +! +router bgp {{ host['bgp']['asn'] }} + router-id {{ host['interfaces']['Loopback0']['ipv4'] | ipaddr('address') }} + ! +{% for asn, remote_ips in host['bgp']['peers'].items() %} +{% for remote_ip in remote_ips %} + neighbor {{ remote_ip }} remote-as {{ asn }} + neighbor {{ remote_ip }} description {{ asn }} + neighbor {{ remote_ip }} default-originate route-map DEFAULT_ROUTES +{% if remote_ip | ipv6 %} + address-family ipv6 + neighbor {{ remote_ip }} activate + exit +{% endif %} +{% endfor %} +{% endfor %} + ! +{% for name, iface in host['interfaces'].items() if name.startswith('Loopback') %} +{% if iface['ipv4'] is defined %} + network {{ iface['ipv4'] }} +{% endif %} +{% if iface['ipv6'] is defined %} + network {{ iface['ipv6'] }} +{% endif %} +{% endfor %} + redistribute static route-map PREPENDAS +! +management api http-commands + no protocol https + protocol http + no shutdown +! +end +s diff --git a/ansible/roles/test/files/ptftests/IP_decap_test.py b/ansible/roles/test/files/ptftests/IP_decap_test.py index 8b8b67a4173..a7441e5c4aa 100644 --- a/ansible/roles/test/files/ptftests/IP_decap_test.py +++ b/ansible/roles/test/files/ptftests/IP_decap_test.py @@ -99,6 +99,8 @@ def setUp(self): self.src_ports = [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 48, 52, 53, 54, 55, 56, 57, 58] if self.test_params['testbed_type'] == 't0-116': self.src_ports = range(0, 24) + range(32, 120) + if self.test_params['testbed_type'] == 't0-52': + self.src_ports = range(0, 52) # which type of tunneled trafic to test (IPv4 in IPv4, IPv6 in IPv4, IPv6 in IPv4, IPv6 in IPv6) self.test_outer_ipv4 = self.test_params.get('outer_ipv4', True) diff --git a/ansible/roles/test/files/ptftests/fib_test.py b/ansible/roles/test/files/ptftests/fib_test.py index a53e572772a..d4f7fbfe7e4 100644 --- a/ansible/roles/test/files/ptftests/fib_test.py +++ b/ansible/roles/test/files/ptftests/fib_test.py @@ -101,6 +101,8 @@ def setUp(self): self.src_ports = [0, 1, 4, 5, 16, 17, 20, 21, 34, 36, 37, 38, 39, 42, 44, 45, 46, 47, 50, 52, 53, 54, 55, 58, 60, 61, 62, 63] if self.test_params['testbed_type'] == 't0': self.src_ports = range(1, 25) + range(28, 32) + if self.test_params['testbed_type'] == 't0-52': + self.src_ports = range(0, 52) if self.test_params['testbed_type'] == 't0-56': self.src_ports = [0, 1, 4, 5, 8, 9] + range(12, 18) + [20, 21, 24, 25, 28, 29, 32, 33, 36, 37] + range(40, 46) + [48, 49, 52, 53] if self.test_params['testbed_type'] == 't0-64': diff --git a/ansible/roles/test/tasks/crm.yml b/ansible/roles/test/tasks/crm.yml index a502dcd19c1..de17d369674 100644 --- a/ansible/roles/test/tasks/crm.yml +++ b/ansible/roles/test/tasks/crm.yml @@ -4,7 +4,7 @@ when: (testbed_type is not defined) - fail: msg="Invalid testbed_type value '{{testbed_type}}'" - when: testbed_type not in ['t1', 't1-lag', 't0', 't0-56', 't0-64', 't0-116'] + when: testbed_type not in ['t1', 't1-lag', 't0', 't0-52', 't0-56', 't0-64', 't0-116'] - set_fact: crm_intf="{{minigraph_interfaces[0].attachto}}" crm_intf1="{{minigraph_interfaces[2].attachto}}" @@ -12,7 +12,7 @@ - set_fact: crm_intf="{{minigraph_portchannel_interfaces[0].attachto}}" crm_intf1="{{minigraph_portchannel_interfaces[2].attachto}}" - when: testbed_type in ['t0', 't1-lag', 't0-56', 't0-64', 't0-116'] + when: testbed_type in ['t0', 't1-lag', 't0-52', 't0-56', 't0-64', 't0-116'] - set_fact: ansible_date_time: "{{ansible_date_time}}" diff --git a/ansible/roles/test/tasks/decap.yml b/ansible/roles/test/tasks/decap.yml index ce08e508335..cf93cf5014f 100644 --- a/ansible/roles/test/tasks/decap.yml +++ b/ansible/roles/test/tasks/decap.yml @@ -43,7 +43,7 @@ - name: Expand properties into props set_fact: props="{{configuration_properties['common']}}" - when: testbed_type in ['t0', 't0-64', 't0-116'] + when: testbed_type in ['t0', 't0-52', 't0-64', 't0-116'] - name: Expand properties into props set_fact: props_tor="{{configuration_properties['tor']}}" diff --git a/ansible/roles/test/tasks/fdb.yml b/ansible/roles/test/tasks/fdb.yml index a7d2c8eed51..b4c0b8c95b6 100644 --- a/ansible/roles/test/tasks/fdb.yml +++ b/ansible/roles/test/tasks/fdb.yml @@ -2,7 +2,7 @@ when: testbed_type is not defined - fail: msg="testbed_type {{test_type}} is invalid" - when: testbed_type not in ['t0', 't0-64', 't0-116'] + when: testbed_type not in ['t0', 't0-64', 't0-116', 't0-52'] - include_vars: "vars/topo_{{testbed_type}}.yml" diff --git a/ansible/roles/test/tasks/shared-fib.yml b/ansible/roles/test/tasks/shared-fib.yml index 87557248ea5..4bd1fac2b2c 100644 --- a/ansible/roles/test/tasks/shared-fib.yml +++ b/ansible/roles/test/tasks/shared-fib.yml @@ -18,7 +18,7 @@ - name: Expand properties into props set_fact: props="{{configuration_properties['common']}}" - when: testbed_type in ['t0', 't0-56', 't0-64', 't0-116'] + when: testbed_type in ['t0', 't0-52', 't0-56', 't0-64', 't0-64-32', 't0-116'] - name: Expand ToR properties into props set_fact: props_tor="{{configuration_properties['tor']}}" diff --git a/ansible/roles/test/templates/fib.j2 b/ansible/roles/test/templates/fib.j2 index 38720ce5213..88d725ca75d 100644 --- a/ansible/roles/test/templates/fib.j2 +++ b/ansible/roles/test/templates/fib.j2 @@ -1,7 +1,7 @@ {# defualt route#} {% if testbed_type == 't1' %} 0.0.0.0/0 {% for ifname, v in minigraph_neighbors.iteritems() %}{% if "T2" in v.name %}{{ '[%d]' % minigraph_port_indices[ifname]}}{% if not loop.last %} {% endif %}{% endif %}{% endfor %} -{% elif testbed_type == 't0' or testbed_type == 't0-64' or testbed_type == 't1-lag' %} +{% elif testbed_type == 't0' or testbed_type == 't0-52'or testbed_type == 't0-64' or testbed_type == 't1-lag' or testbed_type == 't0-64-32' %} 0.0.0.0/0 {% for portchannel, v in minigraph_portchannels.iteritems() %} [{% for member in v.members %}{{ '%d' % minigraph_port_indices[member]}}{% if not loop.last %} {% endif %}{% endfor %}]{% if not loop.last %} {% endif %}{% endfor %} {% elif testbed_type == 't1-64-lag' %} @@ -31,8 +31,12 @@ 192.168.{{ podset }}.{{ tor * 16 + subnet }}/32 [0 1] [4 5] [16 17] [20 21] 20C0:A8{{ '%02X' % podset }}:0:{{ '%02X' % (tor * 16 + subnet)}}::/64 [0 1] [4 5] [16 17] [20 21] +<<<<<<< HEAD {% elif testbed_type == 't0' or testbed_type == 't0-64' %} +======= +{% elif testbed_type == 't0' or testbed_type == 't0-52' or testbed_type == 't0-64' or testbed_type == 't0-64-32' %} +>>>>>>> 8b2905c... Merge pull request #1105 from sumukhatv/m0-nightly {% set suffix = ( (podset * props.tor_number * props.max_tor_subnet_number * props.tor_subnet_size) + (tor * props.max_tor_subnet_number * props.tor_subnet_size) + (subnet * props.tor_subnet_size) ) %} diff --git a/ansible/roles/test/vars/testcases.yml b/ansible/roles/test/vars/testcases.yml index d179e41f8bd..69d0723b6ae 100644 --- a/ansible/roles/test/vars/testcases.yml +++ b/ansible/roles/test/vars/testcases.yml @@ -14,7 +14,7 @@ testcases: bgp_fact: filename: bgp_fact.yml - topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] + topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] bgp_gr_helper: filename: bgp_gr_helper.yml @@ -50,7 +50,7 @@ testcases: decap: filename: decap.yml - topologies: [t1, t1-lag, t1-64-lag, t0, t0-56, t0-64, t0-116] + topologies: [t1, t1-lag, t1-64-lag, t0, t0-52, t0-56, t0-64, t0-116] required_vars: ptf_host: testbed_type: @@ -58,7 +58,7 @@ testcases: dhcp_relay: filename: dhcp_relay.yml - topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116] + topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116] required_vars: ptf_host: @@ -103,7 +103,7 @@ testcases: fib: filename: simple-fib.yml - topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] + topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] required_vars: ptf_host: testbed_type: @@ -117,7 +117,7 @@ testcases: fdb: filename: fdb.yml - topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116] + topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116] required_vars: ptf_host: testbed_type: @@ -139,18 +139,18 @@ testcases: lag_2: filename: lag_2.yml - topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1-lag, t1-64-lag] + topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1-lag, t1-64-lag] required_vars: ptf_host: testbed_type: lldp: filename: lldp.yml - topologies: [t0, t0-16, t0-56, t0-64, t0-116, t0-64-32, t1, t1-lag, t1-64-lag] + topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-116, t0-64-32, t1, t1-lag, t1-64-lag] link_flap: filename: link_flap.yml - topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] mem_check: filename: mem_check.yml @@ -234,7 +234,7 @@ testcases: crm: filename: crm.yml - topologies: [t1, t1-lag, t0, t0-56, t0-64, t0-116] + topologies: [t1, t1-lag, t0, t0-52, t0-56, t0-64, t0-116] dip_sip: filename: dip_sip.yml From 9ddd92eac47f82cf6ce796c43300dbba8e22626c Mon Sep 17 00:00:00 2001 From: Neetha John Date: Tue, 10 Sep 2019 12:33:25 -0700 Subject: [PATCH 095/218] Fix trying to access match group when regex match failed (#1095) Signed-off-by: Neetha John --- ansible/roles/test/files/ptftests/sad_path.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/ansible/roles/test/files/ptftests/sad_path.py b/ansible/roles/test/files/ptftests/sad_path.py index bf722d917f5..11852ea7822 100644 --- a/ansible/roles/test/files/ptftests/sad_path.py +++ b/ansible/roles/test/files/ptftests/sad_path.py @@ -354,14 +354,15 @@ def verify_dut_lag_state(self, pre_check=True): stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'show interfaces portchannel']) if return_code == 0: for line in stdout.split('\n'): - if any(po_name in line for po_name in po_list): - is_match = pat.match(line) - if is_match and self.verify_dut_lag_member_state(is_match, pre_check=pre_check): - self.log.append('Lag state is down as expected on the DUT for %s' % is_match.group(1)) - self.log.append('Pattern check: %s' % line) - else: - self.fails['dut'].add('%s: Lag state is not down on the DUT for %s' % (self.msg_prefix[pre_check], is_match.group(1))) - self.fails['dut'].add('%s: Obtained: %s' % (self.msg_prefix[pre_check], line)) + for po_name in po_list: + if po_name in line: + is_match = pat.match(line) + if is_match and self.verify_dut_lag_member_state(is_match, pre_check=pre_check): + self.log.append('Lag state is down as expected on the DUT for %s' % po_name) + self.log.append('Pattern check: %s' % line) + else: + self.fails['dut'].add('%s: Lag state is not down on the DUT for %s' % (self.msg_prefix[pre_check], po_name)) + self.fails['dut'].add('%s: Obtained: %s' % (self.msg_prefix[pre_check], line)) else: self.fails['dut'].add('%s: Retreiving LAG info from DUT side failed' % self.msg_prefix[pre_check]) self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[pre_check], return_code)) From 4cb5653ae49043529f3becaf8caa81505e9c43fd Mon Sep 17 00:00:00 2001 From: Neetha John Date: Tue, 10 Sep 2019 13:39:36 -0700 Subject: [PATCH 096/218] Preboot sad path automation for n vlan ports (#1096) Signed-off-by: Neetha John --- .../test/files/ptftests/advanced-reboot.py | 11 +- ansible/roles/test/files/ptftests/sad_path.py | 100 +++++++++++++++--- ansible/roles/test/tasks/advanced-reboot.yml | 18 ---- .../advanced_reboot/validate_preboot_list.yml | 4 + .../roles/test/tasks/ptf_runner_reboot.yml | 27 ++++- .../test/tasks/warm-reboot-multi-sad.yml | 2 +- ansible/roles/test/tasks/warm-reboot-sad.yml | 2 +- 7 files changed, 127 insertions(+), 37 deletions(-) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 9a015aa09ce..fe93e9e931f 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -349,6 +349,12 @@ def build_peer_mapping(self): self.get_neigh_port_info() self.get_portchannel_info() + def build_vlan_if_port_mapping(self): + content = self.read_json('vlan_ports_file') + if len(content) > 1: + raise Exception("Too many vlans") + return [(ifname, self.port_indices[ifname]) for ifname in content.values()[0]['members']] + def populate_fail_info(self, fails): for key in fails: if key not in self.fails: @@ -386,6 +392,7 @@ def setUp(self): self.vlan_ports = self.read_vlan_ports() if self.test_params['preboot_oper'] is not None: self.build_peer_mapping() + self.test_params['vlan_if_port'] = self.build_vlan_if_port_mapping() self.vlan_ip_range = self.test_params['vlan_ip_range'] self.default_ip_range = self.test_params['default_ip_range'] @@ -410,8 +417,8 @@ def setUp(self): self.log("Converted addresses VMs: %s" % str(self.ssh_targets)) if self.preboot_oper is not None: self.log("Preboot Operations:") - self.pre_handle = sp.PrebootTest(self.preboot_oper, self.ssh_targets, self.portchannel_ports, self.vm_dut_map, self.test_params, self.dut_ssh) - (self.ssh_targets, self.portchannel_ports, self.neigh_vm), (log_info, fails) = self.pre_handle.setup() + self.pre_handle = sp.PrebootTest(self.preboot_oper, self.ssh_targets, self.portchannel_ports, self.vm_dut_map, self.test_params, self.dut_ssh, self.vlan_ports) + (self.ssh_targets, self.portchannel_ports, self.neigh_vm, self.vlan_ports), (log_info, fails) = self.pre_handle.setup() self.populate_fail_info(fails) for log in log_info: self.log(log) diff --git a/ansible/roles/test/files/ptftests/sad_path.py b/ansible/roles/test/files/ptftests/sad_path.py index 11852ea7822..bda06a49265 100644 --- a/ansible/roles/test/files/ptftests/sad_path.py +++ b/ansible/roles/test/files/ptftests/sad_path.py @@ -7,26 +7,30 @@ class PrebootTest(object): - def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh): + def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh, vlan_ports): self.oper_type = oper_type self.vm_list = vm_list self.portchannel_ports = portchannel_ports self.vm_dut_map = vm_dut_map self.test_args = test_args self.dut_ssh = dut_ssh + self.vlan_ports = vlan_ports self.fails_vm = set() self.fails_dut = set() self.log = [] - self.shandle = SadOper(self.oper_type, self.vm_list, self.portchannel_ports, self.vm_dut_map, self.test_args, self.dut_ssh) + self.shandle = SadOper(self.oper_type, self.vm_list, self.portchannel_ports, self.vm_dut_map, self.test_args, self.dut_ssh, self.vlan_ports) def setup(self): self.shandle.sad_setup(is_up=False) return self.shandle.retreive_test_info(), self.shandle.retreive_logs() def verify(self, pre_check=True): - self.shandle.sad_bgp_verify() - if 'lag' in self.oper_type: - self.shandle.sad_lag_verify(pre_check=pre_check) + if 'vlan' in self.oper_type: + self.shandle.verify_vlan_port_state(pre_check=pre_check) + else: + self.shandle.sad_bgp_verify() + if 'lag' in self.oper_type: + self.shandle.sad_lag_verify(pre_check=pre_check) return self.shandle.retreive_logs() def revert(self): @@ -35,7 +39,7 @@ def revert(self): class SadPath(object): - def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args): + def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, vlan_ports): self.oper_type = '' self.cnt = 1 self.memb_cnt = 0 @@ -43,6 +47,8 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args) self.portchannel_ports = portchannel_ports self.vm_dut_map = vm_dut_map self.test_args = test_args + self.vlan_ports = vlan_ports + self.vlan_if_port = self.test_args['vlan_if_port'] self.neigh_vms = [] self.neigh_names = dict() self.vm_handles = dict() @@ -53,13 +59,16 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args) self.fails['dut'] = set() self.tot_memb_cnt = 0 self.memb_index = 0 + self.if_port = [] + self.down_vlan_info = [] self.extract_oper_info(oper_type) def extract_oper_info(self, oper_type): if oper_type and ':' in oper_type: temp = oper_type.split(':') self.oper_type = temp[0] - # get number of VMs where the sad pass oper needs to be done + # get number of VMs where the sad pass oper needs to be done. For vlan_member case, + # this will be the number of down vlan ports self.cnt = int(temp[1]) if len(temp) > 2: # get the number of lag members in a portchannel that should be brought down @@ -122,6 +131,24 @@ def select_member(self): if self.tot_memb_cnt != 0: self.memb_index = datetime.datetime.now().day % self.tot_memb_cnt + def select_vlan_ports(self): + self.if_port = sorted(self.vlan_if_port, key=lambda tup: tup[0]) + vlan_len = len(self.if_port) + vlan_index = datetime.datetime.now().day % vlan_len if vlan_len > 0 else 0 + exceed_len = vlan_index + self.cnt - vlan_len + if exceed_len <= 0: + self.down_vlan_info.extend(self.if_port[vlan_index:vlan_index+self.cnt]) + self.if_port = self.if_port[0:vlan_index] + self.if_port[vlan_index+self.cnt:] + else: + self.down_vlan_info.extend(self.if_port[vlan_index:]) + self.down_vlan_info.extend(self.if_port[0:exceed_len]) + self.if_port = self.if_port[exceed_len:exceed_len + vlan_len - self.cnt] + + def down_vlan_ports(self): + # extract the selected vlan ports and mark them down + for item in self.down_vlan_info: + self.vlan_ports.remove(item[1]) + def setup(self): self.select_vm() self.get_neigh_name() @@ -146,15 +173,15 @@ def setup(self): self.log.append('DUT BGP v6: %s' % self.dut_bgps[vm]['v6']) def retreive_test_info(self): - return self.vm_list, self.portchannel_ports, self.neigh_vms + return self.vm_list, self.portchannel_ports, self.neigh_vms, self.vlan_ports def retreive_logs(self): return self.log, self.fails class SadOper(SadPath): - def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh): - super(SadOper, self).__init__(oper_type, vm_list, portchannel_ports, vm_dut_map, test_args) + def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh, vlan_ports): + super(SadOper, self).__init__(oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, vlan_ports) self.dut_ssh = dut_ssh self.dut_needed = dict() self.lag_members_down = dict() @@ -185,10 +212,14 @@ def sad_setup(self, is_up=True): self.log = [] if not is_up: - self.setup() - self.populate_bgp_state() - if 'lag' in self.oper_type: - self.populate_lag_state() + if 'vlan' in self.oper_type: + self.select_vlan_ports() + self.down_vlan_ports() + else: + self.setup() + self.populate_bgp_state() + if 'lag' in self.oper_type: + self.populate_lag_state() if 'bgp' in self.oper_type: self.log.append('BGP state change will be for %s' % ", ".join(self.neigh_vms)) @@ -220,6 +251,47 @@ def sad_setup(self, is_up=True): # wait for sometime for lag members state to sync time.sleep(120) + elif 'vlan' in self.oper_type: + self.change_vlan_port_state(is_up=is_up) + + def change_vlan_port_state(self, is_up=True): + state = ['shutdown', 'startup'] + + for intf, port in self.down_vlan_info: + if not re.match('Ethernet\d+', intf): continue + self.log.append('Changing state of %s from DUT side to %s' % (intf, state[is_up])) + stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], intf)]) + if return_code != 0: + self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], intf)) + self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code)) + self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[1 - is_up], stderr)) + else: + self.log.append('State change successful on DUT for %s' % intf) + + def verify_vlan_port_state(self, state='down', pre_check=True): + self.log = [] + # pattern match "Ethernet252 177,178,179,180 40G 9100 Ethernet64/1 routed down down QSFP28 off" + # extract the admin status + pat = re.compile('(\S+\s+){7}%s' % state) + for intf, port in self.down_vlan_info: + stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'show interfaces status %s' % intf]) + if return_code == 0: + for line in stdout.split('\n'): + if intf in line: + is_match = pat.match(line.strip()) + if is_match: + self.log.append('Interface state is down as expected on the DUT for %s' % intf) + self.log.append('Pattern check: %s' % line) + break + + else: + self.fails['dut'].add('%s: Interface state is not down on the DUT for %s' % (self.msg_prefix[pre_check], intf)) + self.fails['dut'].add('%s: Obtained: %s' % (self.msg_prefix[pre_check], line)) + else: + self.fails['dut'].add('%s: Retreiving interface %s info from DUT side failed' % (self.msg_prefix[pre_check], intf)) + self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[pre_check], return_code)) + self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[pre_check], stderr)) + def change_bgp_dut_state(self, is_up=True): state = ['shutdown', 'startup'] for vm in self.neigh_vms: diff --git a/ansible/roles/test/tasks/advanced-reboot.yml b/ansible/roles/test/tasks/advanced-reboot.yml index 31df0f62e4a..810c9ec6a5b 100644 --- a/ansible/roles/test/tasks/advanced-reboot.yml +++ b/ansible/roles/test/tasks/advanced-reboot.yml @@ -52,24 +52,6 @@ copy: src=roles/test/files/ptftests dest=/root delegate_to: "{{ ptf_host }}" - - name: Copy arp responder to the PTF container - copy: src=roles/test/files/helpers/arp_responder.py dest=/opt - delegate_to: "{{ ptf_host }}" - - - name: Copy arp responder supervisor configuration to the PTF container - template: src=arp_responder.conf.j2 dest=/etc/supervisor/conf.d/arp_responder.conf - vars: - - arp_responder_args: '-e' - delegate_to: "{{ ptf_host }}" - - - name: Reread supervisor configuration - shell: supervisorctl reread - delegate_to: "{{ ptf_host }}" - - - name: Update supervisor configuration - shell: supervisorctl update - delegate_to: "{{ ptf_host }}" - - name: Remove old keys file: path: "{{ item }}" diff --git a/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml b/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml index 5262b0b3172..d0c59eae24b 100644 --- a/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml +++ b/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml @@ -2,6 +2,7 @@ item_cnt: "{{ item.split(':')[-1]|int }}" host_max_len: "{{ vm_hosts|length - 1 }}" member_max_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}" + vlan_max_cnt: "{{ minigraph_vlans.values()[0]['members']|length - 1 }}" - fail: msg="Bgp neigh down count is greater than or equal to number of VM hosts. Current val = {{ item_cnt }} Max val = {{ host_max_len }}" when: "{{ 'bgp_down' in item and item_cnt > host_max_len }}" @@ -11,3 +12,6 @@ - fail: msg="Lag member count is greater than available number of lag members. Current val = {{ item_cnt }} Available cnt = {{ member_max_cnt }}" when: "{{ 'lag_member_down' in item and item_cnt > member_max_cnt }}" + +- fail: msg="Vlan count is greater than or equal to number of Vlan interfaces. Current val = {{ item_cnt }} Max val = {{ vlan_max_cnt }}" + when: "{{ 'vlan_port_down' in item and item_cnt|int > vlan_max_cnt|int }}" diff --git a/ansible/roles/test/tasks/ptf_runner_reboot.yml b/ansible/roles/test/tasks/ptf_runner_reboot.yml index a8fe127bc84..9a39f152bf5 100644 --- a/ansible/roles/test/tasks/ptf_runner_reboot.yml +++ b/ansible/roles/test/tasks/ptf_runner_reboot.yml @@ -1,4 +1,30 @@ - block: + - name: Copy arp responder to the PTF container + copy: src=roles/test/files/helpers/arp_responder.py dest=/opt + delegate_to: "{{ ptf_host }}" + + - name: Copy arp responder supervisor configuration to the PTF container. No args when there is no preboot type + template: src=arp_responder.conf.j2 dest=/etc/supervisor/conf.d/arp_responder.conf + vars: + - arp_responder_args: '-e' + delegate_to: "{{ ptf_host }}" + when: not item or item == 'None' + + - name: Copy arp responder supervisor configuration to the PTF container. Specifying args when there is a preboot type + template: src=arp_responder.conf.j2 dest=/etc/supervisor/conf.d/arp_responder.conf + vars: + - arp_responder_args: '-e -c /tmp/from_t1_{{ item }}.json' + delegate_to: "{{ ptf_host }}" + when: item and item != 'None' + + - name: Update supervisor configuration + include: "roles/test/tasks/common_tasks/update_supervisor.yml" + vars: + supervisor_host: "{{ ptf_host }}" + + - name: Clear FDB entries on the DUT + command: sonic-clear fdb all + - include: ptf_runner.yml vars: ptf_test_name: Advanced-reboot test @@ -101,7 +127,6 @@ dest: '/tmp/' flat: yes - - name: Wait for the DUT to be ready for the next test pause: seconds=420 when: preboot_list|length > 1 diff --git a/ansible/roles/test/tasks/warm-reboot-multi-sad.yml b/ansible/roles/test/tasks/warm-reboot-multi-sad.yml index 292a5684a94..4458fa2d747 100644 --- a/ansible/roles/test/tasks/warm-reboot-multi-sad.yml +++ b/ansible/roles/test/tasks/warm-reboot-multi-sad.yml @@ -6,7 +6,7 @@ # preboot_list format is 'preboot oper type:number of VMS down:number of lag members down'. for non lag member cases, this parameter will be skipped - name: Set vars set_fact: - pre_list: ['neigh_bgp_down:2', 'dut_bgp_down:3', 'dut_lag_down:2', 'neigh_lag_down:3', 'dut_lag_member_down:3:1', 'neigh_lag_member_down:2:1'] + pre_list: ['neigh_bgp_down:2', 'dut_bgp_down:3', 'dut_lag_down:2', 'neigh_lag_down:3', 'dut_lag_member_down:3:1', 'neigh_lag_member_down:2:1', 'vlan_port_down:4'] lag_memb_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}" - name: Add all lag member down case diff --git a/ansible/roles/test/tasks/warm-reboot-sad.yml b/ansible/roles/test/tasks/warm-reboot-sad.yml index 6ace17b74e0..6f37e34deed 100644 --- a/ansible/roles/test/tasks/warm-reboot-sad.yml +++ b/ansible/roles/test/tasks/warm-reboot-sad.yml @@ -7,5 +7,5 @@ include: advanced-reboot.yml vars: reboot_type: warm-reboot - preboot_list: ['neigh_bgp_down', 'dut_bgp_down', 'dut_lag_down', 'neigh_lag_down'] + preboot_list: ['neigh_bgp_down', 'dut_bgp_down', 'dut_lag_down', 'neigh_lag_down', 'dut_lag_member_down:1:1', 'neigh_lag_member_down:1:1', 'vlan_port_down'] preboot_files: "peer_dev_info,neigh_port_info" From 839b363f2a70b5183487b79c7cffdd9d526b1d79 Mon Sep 17 00:00:00 2001 From: Stephen Sun <5379172+stephenxs@users.noreply.github.com> Date: Thu, 12 Sep 2019 07:28:15 +0800 Subject: [PATCH 097/218] Fix issue 2414 by always using static arp/neighbour entry (#1106) [neighbour-mac-noptf] Fix issue 2414 by always using static arp/neighbour entry (#1026) backport #1026 to 201811 --- ansible/library/show_interface.py | 13 ++-- .../roles/test/tasks/neighbour-mac-noptf.yml | 63 ++++++++----------- 2 files changed, 33 insertions(+), 43 deletions(-) diff --git a/ansible/library/show_interface.py b/ansible/library/show_interface.py index cf6e2d91dda..e56b9852a6d 100644 --- a/ansible/library/show_interface.py +++ b/ansible/library/show_interface.py @@ -40,6 +40,7 @@ "name": "Ethernet0" "speed": "40G" "alias": "fortyGigE1/1/1" + "vlan": "routed" "oper_state": "down" "admin_state": "up" } @@ -82,7 +83,7 @@ def run(self): self.module.exit_json(ansible_facts=self.facts) def collect_interface_status(self): - regex_int = re.compile(r'(\S+)\s+[\d,]+\s+(\w+)\s+(\d+)\s+([\w\/]+)\s+(\w+)\s+(\w+)') + regex_int = re.compile(r'(\S+)\s+[\d,N\/A]+\s+(\w+)\s+(\d+)\s+([\w\/]+)\s+(\w+)\s+(\w+)\s+(\w+)') self.int_status = {} if self.m_args['interfaces'] is not None: for interface in self.m_args['interfaces']: @@ -96,8 +97,9 @@ def collect_interface_status(self): self.int_status[interface]['name'] = regex_int.match(line).group(1) self.int_status[interface]['speed'] = regex_int.match(line).group(2) self.int_status[interface]['alias'] = regex_int.match(line).group(4) - self.int_status[interface]['oper_state'] = regex_int.match(line).group(5) - self.int_status[interface]['admin_state'] = regex_int.match(line).group(6) + self.int_status[interface]['vlan'] = regex_int.match(line).group(5) + self.int_status[interface]['oper_state'] = regex_int.match(line).group(6) + self.int_status[interface]['admin_state'] = regex_int.match(line).group(7) self.facts['int_status'] = self.int_status except Exception as e: self.module.fail_json(msg=str(e)) @@ -114,8 +116,9 @@ def collect_interface_status(self): self.int_status[interface]['name'] = interface self.int_status[interface]['speed'] = regex_int.match(line).group(2) self.int_status[interface]['alias'] = regex_int.match(line).group(4) - self.int_status[interface]['oper_state'] = regex_int.match(line).group(5) - self.int_status[interface]['admin_state'] = regex_int.match(line).group(6) + self.int_status[interface]['vlan'] = regex_int.match(line).group(5) + self.int_status[interface]['oper_state'] = regex_int.match(line).group(6) + self.int_status[interface]['admin_state'] = regex_int.match(line).group(7) self.facts['int_status'] = self.int_status except Exception as e: self.module.fail_json(msg=str(e)) diff --git a/ansible/roles/test/tasks/neighbour-mac-noptf.yml b/ansible/roles/test/tasks/neighbour-mac-noptf.yml index 25d9b7b5ab6..6beab8b8ac5 100644 --- a/ansible/roles/test/tasks/neighbour-mac-noptf.yml +++ b/ansible/roles/test/tasks/neighbour-mac-noptf.yml @@ -31,32 +31,32 @@ - name: init loganalyzer for later syslog analysis include: roles/test/files/tools/loganalyzer/loganalyzer_init.yml - - name: gather DUT arp table - switch_arptable: - - ########## Test V4 mac address change ################# - - name: pick IPv4 neighbor to test change mac behavior + - name: gather DUT interface table + show_interface: command='status' + + # To pick an interface which is up and a routing interface for the test. + # For a routing interface, the item.value['vlan'] contains string 'routed'. + # The key name 'vlan' aligns with the title of column where its data comes. + # It doesn't mean to use a 'vlan' interface for the test. + - name: select A routing interface for testing set_fact: - v4_nei: "{{ item.key }}" - v4_intf: "{{ item.value['interface'] }}" - with_dict: "{{ arptable.v4 }}" + routing_interface: "{{ item.key }}" + with_dict: "{{int_status}}" when: - - ('Ethernet' in item.value['interface']) or ('PortChannel' in item.value['interface']) - - arptable.v4 | length != 0 + - ('routed' in item.value['vlan'] and 'up' in item.value['oper_state']) + - int_status | length != 0 - - name: select Ethernet0 if cannot find an v4 neighbor for test + ########## Test V4 mac address change ################# + - name: pick {{routing_interface}} to test change mac behavior set_fact: - v4_intf: "Ethernet0" + v4_intf: "{{routing_interface}}" v4_nei: "{{ v4_intf_nei }}" - when: v4_nei is not defined - - name: add an ip entry for Ethernet0 - command: "/sbin/ifconfig Ethernet0 {{ v4_intf_ip }}" - when: v4_nei is not defined + - name: add an ip entry for {{v4_intf}} + command: "config interface ip add {{ v4_intf }} {{ v4_intf_ip }}" - - name: add neighbor of Ethernet0 - command: "/sbin/ip neigh add {{ v4_intf_nei }} lladdr {{ v4_mac1 }} dev Ethernet0" - when: v4_nei is not defined + - name: add neighbor for {{v4_intf}} + command: "/sbin/ip neigh add {{ v4_intf_nei }} lladdr {{ v4_mac1 }} dev {{ v4_intf }}" - name: change v4 neighbor mac address 1st time command: "ip neigh change {{ v4_nei }} lladdr {{ v4_mac1 }} dev {{ v4_intf }}" @@ -91,29 +91,16 @@ - assert: { that: "neighbour_mac.stdout | lower == v4_mac2" } ############## Test V6 mac change ################## - - name: pick IPv6 neighbor to test change mac address" - set_fact: - v6_nei: "{{ item.key }}" - v6_intf: "{{ item.value['interface'] }}" - with_dict: "{{ arptable.v6 }}" - when: - - "'fe80::' not in item.key | lower" - - ('Ethernet' in item.value['interface']) or ('PortChannel' in item.value['interface']) - - arptable.v6 | length != 0 - - - name: pick Ethernet0 as test interface if cannot fine v6 neighbor to test + - name: pick {{routing_interface}} as test interface set_fact: - v6_intf: "Ethernet0" + v6_intf: "{{routing_interface}}" v6_nei: "{{ v6_intf_nei }}" - when: v6_nei is not defined - - name: add an ipv6 entry for Ethernet0 if not find v6 neighbor - command: "/sbin/ifconfig Ethernet0 inet6 add {{ v6_intf_ip }}" - when: v6_nei is not defined + - name: add an ipv6 entry for {{v6_intf}} + command: "config interface ip add {{v6_intf}} {{ v6_intf_ip }}" - - name: add an ipv6 neighbor of Ethernet0 if not find v6 neighbor to test - command: "/sbin/ip neigh add {{ v6_intf_nei }} lladdr {{ v6_mac1 }} dev Ethernet0" - when: v6_nei is not defined + - name: add an ipv6 neighbor for {{v6_intf}} + command: "/sbin/ip neigh add {{ v6_intf_nei }} lladdr {{ v6_mac1 }} dev {{v6_intf}}" - name: change v6 neighbor mac address 1st time command: "ip -6 neigh change {{ v6_nei }} lladdr {{ v6_mac1 }} dev {{ v6_intf }}" From 7843f70232e83df54c27e58a365a83d777b7e8ec Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Thu, 12 Sep 2019 07:52:41 -0700 Subject: [PATCH 098/218] [fib] address an error left in manual merge (#1110) Signed-off-by: Ying Xie --- ansible/roles/test/templates/fib.j2 | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ansible/roles/test/templates/fib.j2 b/ansible/roles/test/templates/fib.j2 index 88d725ca75d..6044ba63ca4 100644 --- a/ansible/roles/test/templates/fib.j2 +++ b/ansible/roles/test/templates/fib.j2 @@ -31,12 +31,7 @@ 192.168.{{ podset }}.{{ tor * 16 + subnet }}/32 [0 1] [4 5] [16 17] [20 21] 20C0:A8{{ '%02X' % podset }}:0:{{ '%02X' % (tor * 16 + subnet)}}::/64 [0 1] [4 5] [16 17] [20 21] -<<<<<<< HEAD - -{% elif testbed_type == 't0' or testbed_type == 't0-64' %} -======= {% elif testbed_type == 't0' or testbed_type == 't0-52' or testbed_type == 't0-64' or testbed_type == 't0-64-32' %} ->>>>>>> 8b2905c... Merge pull request #1105 from sumukhatv/m0-nightly {% set suffix = ( (podset * props.tor_number * props.max_tor_subnet_number * props.tor_subnet_size) + (tor * props.max_tor_subnet_number * props.tor_subnet_size) + (subnet * props.tor_subnet_size) ) %} From 5e04d504271c88f13585cd6521a528bedd81747e Mon Sep 17 00:00:00 2001 From: Neetha John Date: Thu, 12 Sep 2019 14:24:52 -0700 Subject: [PATCH 099/218] Fix merge error for #1096 (#1112) --- .../test/tasks/common_tasks/update_supervisor.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 ansible/roles/test/tasks/common_tasks/update_supervisor.yml diff --git a/ansible/roles/test/tasks/common_tasks/update_supervisor.yml b/ansible/roles/test/tasks/common_tasks/update_supervisor.yml new file mode 100644 index 00000000000..45dba5f43cb --- /dev/null +++ b/ansible/roles/test/tasks/common_tasks/update_supervisor.yml @@ -0,0 +1,12 @@ +- fail: msg="supervisor_host is not defined" + when: "supervisor_host is not defined" + +- name: "Reread supervisor configuration" + shell: "supervisorctl reread" + become: "yes" + delegate_to: "{{ supervisor_host }}" + +- name: "Update supervisor configuration" + shell: "supervisorctl update" + become: "yes" + delegate_to: "{{ supervisor_host }}" From 845b7ace0127e9f591d2d805eae1ad78b2699ef7 Mon Sep 17 00:00:00 2001 From: Neetha John Date: Mon, 16 Sep 2019 16:26:54 -0700 Subject: [PATCH 100/218] Avoid FDB clear (#1116) Signed-off-by: Neetha John --- ansible/roles/test/tasks/ptf_runner_reboot.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/ansible/roles/test/tasks/ptf_runner_reboot.yml b/ansible/roles/test/tasks/ptf_runner_reboot.yml index 9a39f152bf5..b3d9004c965 100644 --- a/ansible/roles/test/tasks/ptf_runner_reboot.yml +++ b/ansible/roles/test/tasks/ptf_runner_reboot.yml @@ -22,9 +22,6 @@ vars: supervisor_host: "{{ ptf_host }}" - - name: Clear FDB entries on the DUT - command: sonic-clear fdb all - - include: ptf_runner.yml vars: ptf_test_name: Advanced-reboot test From 4aa0693c542d06eab08c492607f4576f3701b926 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Tue, 17 Sep 2019 22:53:41 +0800 Subject: [PATCH 101/218] Improve exit code return in shell scripts (#1117) In shell script, by default exit code of the last command in commands connected by pipe is returned. If previous command returned none zero exit code, the error is not captured. If ansible run such script in playbook, the script issue is ignored and ansible is not aware of it. The fix is to add "set -o pipefail" in shell scripts. And put the commands embedded in for loop statement to a separate statement. Signed-off-by: Xin Wang --- ansible/roles/test/files/helpers/change_mac.sh | 9 ++++++--- ansible/roles/test/files/helpers/remove_ip.sh | 7 +++++-- tests/scripts/change_mac.sh | 16 ++++++++-------- tests/scripts/remove_ip.sh | 7 +++++-- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/ansible/roles/test/files/helpers/change_mac.sh b/ansible/roles/test/files/helpers/change_mac.sh index 3f8b9869faf..21420658f03 100644 --- a/ansible/roles/test/files/helpers/change_mac.sh +++ b/ansible/roles/test/files/helpers/change_mac.sh @@ -1,10 +1,13 @@ #!/bin/bash -for i in $(ifconfig | grep eth | cut -f 1 -d ' ') -do +set -euo pipefail + +INTF_LIST=$(ifconfig | grep eth | cut -f 1 -d ' ') + +for i in ${INTF_LIST}; do prefix=$(ifconfig $i | grep HWaddr | cut -c39-53) suffix=$( printf "%02x" ${i##eth}) - mac=$prefix$suffix + mac=$prefix$suffix echo $i $mac ifconfig $i hw ether $mac done diff --git a/ansible/roles/test/files/helpers/remove_ip.sh b/ansible/roles/test/files/helpers/remove_ip.sh index 5ccc4b3a066..34432772de9 100755 --- a/ansible/roles/test/files/helpers/remove_ip.sh +++ b/ansible/roles/test/files/helpers/remove_ip.sh @@ -1,7 +1,10 @@ #!/bin/bash -set -e +set -euo pipefail -for i in `cat /proc/net/dev | grep eth | awk -F'eth|:' '{print $2}'`; do +INTF_IDX_LIST=$(cat /proc/net/dev | grep eth | awk -F'eth|:' '{print $2}') + +for i in ${INTF_IDX_LIST}; do + echo "Flush eth${i} IP address" ip address flush dev eth$i done diff --git a/tests/scripts/change_mac.sh b/tests/scripts/change_mac.sh index 7845f3f2395..21420658f03 100644 --- a/tests/scripts/change_mac.sh +++ b/tests/scripts/change_mac.sh @@ -1,13 +1,13 @@ #!/bin/bash -set -e +set -euo pipefail -for INTF in $(ip -br link show | grep 'eth' | awk '{sub(/@.*/,"",$1); print $1}'); do - ADDR="$(ip -br link show dev ${INTF} | awk '{print $3}')" - PREFIX="$(cut -c1-15 <<< ${ADDR})" - SUFFIX="$(printf "%02x" ${INTF##eth})" - MAC="${PREFIX}${SUFFIX}" +INTF_LIST=$(ifconfig | grep eth | cut -f 1 -d ' ') - echo "Update ${INTF} MAC address: ${ADDR}->$MAC" - ip link set dev ${INTF} address ${MAC} +for i in ${INTF_LIST}; do + prefix=$(ifconfig $i | grep HWaddr | cut -c39-53) + suffix=$( printf "%02x" ${i##eth}) + mac=$prefix$suffix + echo $i $mac + ifconfig $i hw ether $mac done diff --git a/tests/scripts/remove_ip.sh b/tests/scripts/remove_ip.sh index 5ccc4b3a066..34432772de9 100644 --- a/tests/scripts/remove_ip.sh +++ b/tests/scripts/remove_ip.sh @@ -1,7 +1,10 @@ #!/bin/bash -set -e +set -euo pipefail -for i in `cat /proc/net/dev | grep eth | awk -F'eth|:' '{print $2}'`; do +INTF_IDX_LIST=$(cat /proc/net/dev | grep eth | awk -F'eth|:' '{print $2}') + +for i in ${INTF_IDX_LIST}; do + echo "Flush eth${i} IP address" ip address flush dev eth$i done From 04badf678db340dd457eb3fba2dd113b5d1555f9 Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Thu, 12 Sep 2019 01:49:24 +0300 Subject: [PATCH 102/218] [everflow_policer] stabilize policer test case (#1098) * [everflow_policer] sleep for (cir/cbs) seconds after checkOriginalFlow Signed-off-by: Stepan Blyschak * [everflow_policer] increase the number of packets to send 200 packets may be not enough to fill cbs (usually receive 197 packets back); to increase test stability increase the number of packets Signed-off-by: Stepan Blyschak --- ansible/roles/test/files/acstests/everflow_policer_test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ansible/roles/test/files/acstests/everflow_policer_test.py b/ansible/roles/test/files/acstests/everflow_policer_test.py index 31249a244db..d89c340cc57 100644 --- a/ansible/roles/test/files/acstests/everflow_policer_test.py +++ b/ansible/roles/test/files/acstests/everflow_policer_test.py @@ -6,6 +6,7 @@ ''' +import time import ptf import ptf.packet as scapy import ptf.dataplane as dataplane @@ -16,7 +17,7 @@ class EverflowPolicerTest(BaseTest): GRE_PROTOCOL_NUMBER = 47 - NUM_OF_TOTAL_PACKETS = 200 + NUM_OF_TOTAL_PACKETS = 500 def __init__(self): @@ -157,6 +158,10 @@ def runTest(self): count = self.checkOriginalFlow() assert count == self.NUM_OF_TOTAL_PACKETS + # Sleep for t=CBS/CIR=(100packets)/(100packets/s)=1s to refill CBS capacity after checkOriginalFlow() + # otherwise we can have first mirrored packet dropped by policer in checkMirroredFlow() + time.sleep(1) + testutils.add_filter(self.greFilter) # Send traffic and verify the mirroed traffic is rate limited From 779eadabb1c1a0b08416d32d03426b9489e99106 Mon Sep 17 00:00:00 2001 From: Shuotian Cheng Date: Tue, 17 Sep 2019 09:45:59 -0700 Subject: [PATCH 103/218] [everflow]: Add pause to stablize the test (#1102) --- ansible/roles/test/tasks/everflow_testbed/testcase_8.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml b/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml index 57876c6651b..7fb8ed3b36a 100644 --- a/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml +++ b/ansible/roles/test/tasks/everflow_testbed/testcase_8.yml @@ -29,6 +29,9 @@ redis-cli -n 4 hmset "ACL_RULE|{{dscp_table_name}}|RULE_1" "PRIORITY" "9999" "MIRROR_ACTION" "{{policer_session_name}}" "DSCP" "8/56" become: yes + - name: Pause to sync the rule + pause: seconds=3 + - name: "Start PTF runner" include: roles/test/tasks/ptf_runner.yml vars: From 8878e431c7ed55296e4af6307081a8d7d539472b Mon Sep 17 00:00:00 2001 From: Sumukha Tumkur Vani Date: Wed, 18 Sep 2019 16:31:19 -0700 Subject: [PATCH 104/218] Merge pull request #1118 from bbinxie/patch-3 update sku-sensors-data.yml --- ansible/group_vars/sonic/sku-sensors-data.yml | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/ansible/group_vars/sonic/sku-sensors-data.yml b/ansible/group_vars/sonic/sku-sensors-data.yml index 029ed4fc7a8..1ca98b919f3 100644 --- a/ansible/group_vars/sonic/sku-sensors-data.yml +++ b/ansible/group_vars/sonic/sku-sensors-data.yml @@ -1964,3 +1964,28 @@ sensors_checks: - pch_haswell-virtual-0/temp1/temp1_input psu_skips: {} + + Celestica-E1031-T48S4: + alarms: + fan: [] + power: [] + temp: + - coretemp-isa-0000/Core 0/temp2_crit_alarm + - coretemp-isa-0000/Core 1/temp3_crit_alarm + compares: + fan: [] + power: [] + temp: + - - coretemp-isa-0000/Core 0/temp2_input + - coretemp-isa-0000/Core 0/temp2_crit + - - coretemp-isa-0000/Core 0/temp2_input + - coretemp-isa-0000/Core 0/temp2_max + - - coretemp-isa-0000/Core 1/temp3_input + - coretemp-isa-0000/Core 1/temp3_crit + - - coretemp-isa-0000/Core 1/temp3_input + - coretemp-isa-0000/Core 1/temp3_max + non_zero: + fan: [] + power: [] + temp: [] + psu_skips: {} From dc5fba093b154608a27685dfe5538879618fd2b0 Mon Sep 17 00:00:00 2001 From: yvolynets-mlnx <50697593+yvolynets-mlnx@users.noreply.github.com> Date: Thu, 19 Sep 2019 16:24:39 +0300 Subject: [PATCH 105/218] Make link flap timeout configurable. (#1062) Signed-off-by: Yuriy Volynets --- ansible/roles/test/tasks/link_flap/link_flap_helper.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/tasks/link_flap/link_flap_helper.yml b/ansible/roles/test/tasks/link_flap/link_flap_helper.yml index ef3ae6dd81f..5c9f5f59f10 100644 --- a/ansible/roles/test/tasks/link_flap/link_flap_helper.yml +++ b/ansible/roles/test/tasks/link_flap/link_flap_helper.yml @@ -10,6 +10,10 @@ - name: Set default link timeout set_fact: link_timeout: 20 + when: link_timeout is not defined + + - name: Set default link delay + set_fact: link_delay: 5 - set_fact: @@ -50,7 +54,7 @@ interface_facts: up_ports="[ '{{ interface }}' ]" register: out until: out.ansible_facts.ansible_interface_link_down_ports | length > 0 - retries: "{{ (link_timeout / link_delay) | round(0, 'ceil') | int }}" + retries: "{{ ((link_timeout | int) / link_delay) | round(0, 'ceil') | int }}" delay: "{{ link_delay }}" when: "interface in minigraph_ports.keys()" @@ -84,7 +88,7 @@ interface_facts: up_ports="[ '{{ interface }}' ]" register: out until: out.ansible_facts.ansible_interface_link_down_ports | length == 0 - retries: "{{ (link_timeout / link_delay) | round(0, 'ceil') | int }}" + retries: "{{ ((link_timeout | int) / link_delay) | round(0, 'ceil') | int }}" delay: "{{ link_delay }}" when: "interface in minigraph_ports.keys()" From 2eba20a643c00023a144837bd314bafd88130a81 Mon Sep 17 00:00:00 2001 From: Iris Hsu Date: Wed, 3 Jul 2019 02:19:01 +0800 Subject: [PATCH 106/218] Update CRM test to handle acl counter on different platforms. (#983) * Update CRM test to handle acl counter on different platforms. * Add test case : Verify "crm_stats_acl_counter_available" counter was equal to original value. --- ansible/roles/test/tasks/crm/crm_test_acl_counter.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ansible/roles/test/tasks/crm/crm_test_acl_counter.yml b/ansible/roles/test/tasks/crm/crm_test_acl_counter.yml index 9b536a3ee6a..5beca76991a 100644 --- a/ansible/roles/test/tasks/crm/crm_test_acl_counter.yml +++ b/ansible/roles/test/tasks/crm/crm_test_acl_counter.yml @@ -75,4 +75,7 @@ assert: {that: "{{new_crm_stats_acl_counter_used|int - crm_stats_acl_counter_used|int == 0}}"} - name: Verify "crm_stats_acl_counter_available" counter was incremented - assert: {that: "{{new_crm_stats_acl_counter_available|int - crm_stats_acl_counter_available|int == 0}}"} + assert: {that: "{{new_crm_stats_acl_counter_available|int - crm_stats_acl_counter_available|int >= 0}}"} + + - name: Verify "crm_stats_acl_counter_available" counter was equal to original value + assert: {that: "{{original_crm_stats_acl_counter_available|int - new_crm_stats_acl_counter_available|int == 0}}"} From f5be0a6b759fe4f8bd11d2a5a1faff97994f6752 Mon Sep 17 00:00:00 2001 From: yvolynets-mlnx <50697593+yvolynets-mlnx@users.noreply.github.com> Date: Wed, 22 May 2019 12:07:13 +0300 Subject: [PATCH 107/218] Update CRM test to support SPC2 (#916) Change-Id: If66887b1cd32104f9991be652ea836cbec9b2f48 --- ansible/roles/test/tasks/crm/crm_test_ipv4_neighbor.yml | 2 +- ansible/roles/test/tasks/crm/crm_test_ipv4_nexthop.yml | 4 ++-- ansible/roles/test/tasks/crm/crm_test_ipv6_neighbor.yml | 2 +- ansible/roles/test/tasks/crm/crm_test_ipv6_nexthop.yml | 2 +- ansible/roles/test/tasks/crm/crm_test_ipv6_route.yml | 4 ++-- .../roles/test/tasks/crm/crm_test_nexthop_group_member.yml | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ansible/roles/test/tasks/crm/crm_test_ipv4_neighbor.yml b/ansible/roles/test/tasks/crm/crm_test_ipv4_neighbor.yml index 62221ec2f11..01151701a33 100644 --- a/ansible/roles/test/tasks/crm/crm_test_ipv4_neighbor.yml +++ b/ansible/roles/test/tasks/crm/crm_test_ipv4_neighbor.yml @@ -25,7 +25,7 @@ assert: {that: "{{new_crm_stats_ipv4_neighbor_used|int - crm_stats_ipv4_neighbor_used|int >= 1}}"} - name: Verify "crm_stats_ipv4_neighbor_available" counter was decremented - assert: {that: "{{crm_stats_ipv4_neighbor_available|int - new_crm_stats_ipv4_neighbor_available|int == 1}}"} + assert: {that: "{{crm_stats_ipv4_neighbor_available|int - new_crm_stats_ipv4_neighbor_available|int >= 1}}"} - name: Remove IPv4 neighbor command: ip neigh del 2.2.2.2 lladdr 11:22:33:44:55:66 dev {{crm_intf}} diff --git a/ansible/roles/test/tasks/crm/crm_test_ipv4_nexthop.yml b/ansible/roles/test/tasks/crm/crm_test_ipv4_nexthop.yml index 733341268c5..896345684cd 100644 --- a/ansible/roles/test/tasks/crm/crm_test_ipv4_nexthop.yml +++ b/ansible/roles/test/tasks/crm/crm_test_ipv4_nexthop.yml @@ -22,10 +22,10 @@ new_crm_stats_ipv4_nexthop_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_ipv4_nexthop_used" counter was incremented - assert: {that: "{{new_crm_stats_ipv4_nexthop_used|int - crm_stats_ipv4_nexthop_used|int == 1}}"} + assert: {that: "{{new_crm_stats_ipv4_nexthop_used|int - crm_stats_ipv4_nexthop_used|int >= 1}}"} - name: Verify "crm_stats_ipv4_nexthop_available" counter was decremented - assert: {that: "{{crm_stats_ipv4_nexthop_available|int - new_crm_stats_ipv4_nexthop_available|int == 1}}"} + assert: {that: "{{crm_stats_ipv4_nexthop_available|int - new_crm_stats_ipv4_nexthop_available|int >= 1}}"} - name: Remove IPv4 nexthop command: ip neigh del 2.2.2.2 lladdr 11:22:33:44:55:66 dev {{crm_intf}} diff --git a/ansible/roles/test/tasks/crm/crm_test_ipv6_neighbor.yml b/ansible/roles/test/tasks/crm/crm_test_ipv6_neighbor.yml index 58f8b8aa67f..779e0be0918 100644 --- a/ansible/roles/test/tasks/crm/crm_test_ipv6_neighbor.yml +++ b/ansible/roles/test/tasks/crm/crm_test_ipv6_neighbor.yml @@ -25,7 +25,7 @@ assert: {that: "{{new_crm_stats_ipv6_neighbor_used|int - crm_stats_ipv6_neighbor_used|int >= 1}}"} - name: Verify "crm_stats_ipv6_neighbor_available" counter was decremented - assert: {that: "{{crm_stats_ipv6_neighbor_available|int - new_crm_stats_ipv6_neighbor_available|int == 1}}"} + assert: {that: "{{crm_stats_ipv6_neighbor_available|int - new_crm_stats_ipv6_neighbor_available|int >= 1}}"} - name: Remove IPv6 neighbor command: ip neigh del 2001::1 lladdr 11:22:33:44:55:66 dev {{crm_intf}} diff --git a/ansible/roles/test/tasks/crm/crm_test_ipv6_nexthop.yml b/ansible/roles/test/tasks/crm/crm_test_ipv6_nexthop.yml index 15147374908..7b128c9b529 100644 --- a/ansible/roles/test/tasks/crm/crm_test_ipv6_nexthop.yml +++ b/ansible/roles/test/tasks/crm/crm_test_ipv6_nexthop.yml @@ -25,7 +25,7 @@ assert: {that: "{{new_crm_stats_ipv6_nexthop_used|int - crm_stats_ipv6_nexthop_used|int == 1}}"} - name: Verify "crm_stats_ipv6_nexthop_available" counter was decremented - assert: {that: "{{crm_stats_ipv6_nexthop_available|int - new_crm_stats_ipv6_nexthop_available|int == 1}}"} + assert: {that: "{{crm_stats_ipv6_nexthop_available|int - new_crm_stats_ipv6_nexthop_available|int >= 1}}"} - name: Remove IPv6 nexthop command: ip neigh del 2001::1 lladdr 11:22:33:44:55:66 dev {{crm_intf}} diff --git a/ansible/roles/test/tasks/crm/crm_test_ipv6_route.yml b/ansible/roles/test/tasks/crm/crm_test_ipv6_route.yml index 5cf7b662b97..a7a0daa98e4 100644 --- a/ansible/roles/test/tasks/crm/crm_test_ipv6_route.yml +++ b/ansible/roles/test/tasks/crm/crm_test_ipv6_route.yml @@ -27,10 +27,10 @@ new_crm_stats_ipv6_route_available: "{{ out.stdout_lines[1] }}" - name: Verify "crm_stats_ipv6_route_used" counter was incremented - assert: {that: "{{new_crm_stats_ipv6_route_used|int - crm_stats_ipv6_route_used|int == 1}}"} + assert: {that: "{{new_crm_stats_ipv6_route_used|int - crm_stats_ipv6_route_used|int >= 1}}"} - name: Verify "crm_stats_ipv6_route_available" counter was decremented - assert: {that: "{{crm_stats_ipv6_route_available|int - new_crm_stats_ipv6_route_available|int == 1}}"} + assert: {that: "{{crm_stats_ipv6_route_available|int - new_crm_stats_ipv6_route_available|int >= 1}}"} - name: Remove IPv6 route command: ip -6 route del 2001::/126 via {{nh_ip}} diff --git a/ansible/roles/test/tasks/crm/crm_test_nexthop_group_member.yml b/ansible/roles/test/tasks/crm/crm_test_nexthop_group_member.yml index 1a24af51da8..9033d3cc8ef 100644 --- a/ansible/roles/test/tasks/crm/crm_test_nexthop_group_member.yml +++ b/ansible/roles/test/tasks/crm/crm_test_nexthop_group_member.yml @@ -35,7 +35,7 @@ assert: {that: "{{new_crm_stats_nexthop_group_member_used|int - crm_stats_nexthop_group_member_used|int == 2}}"} - name: Verify "crm_stats_nexthop_group_member_available" counter was decremented - assert: {that: "{{crm_stats_nexthop_group_member_available|int - new_crm_stats_nexthop_group_member_available|int == 2}}"} + assert: {that: "{{crm_stats_nexthop_group_member_available|int - new_crm_stats_nexthop_group_member_available|int >= 2}}"} - name: Remove nexthop group members command: ip route del 2.2.2.0/24 nexthop via {{nh_ip1}} nexthop via {{nh_ip2}} From 87ec6e4109f70a40cbe901d0c498b9fd6361b1aa Mon Sep 17 00:00:00 2001 From: Joe LeVeque Date: Thu, 4 Apr 2019 21:58:33 -0700 Subject: [PATCH 108/218] [minigraph_facts] Refactor such that port names and aliases are never stored together (#832) * [minigraph_facts] Refactor such that port names and aliases are never stored together * Update affected playbooks to reflect the new change --- ansible/library/minigraph_facts.py | 135 ++++++++----------- ansible/roles/test/tasks/acltb.yml | 2 +- ansible/roles/test/tasks/dhcp_relay.yml | 2 +- ansible/roles/test/tasks/snmp/interfaces.yml | 6 +- 4 files changed, 64 insertions(+), 81 deletions(-) diff --git a/ansible/library/minigraph_facts.py b/ansible/library/minigraph_facts.py index 434b20d9dd9..6981a4acf63 100644 --- a/ansible/library/minigraph_facts.py +++ b/ansible/library/minigraph_facts.py @@ -73,12 +73,12 @@ def parse_png(png, hname): startport = link.find(str(QName(ns, "StartPort"))).text if enddevice == hname: - if port_alias_map.has_key(endport): - endport = port_alias_map[endport] + if port_alias_to_name_map.has_key(endport): + endport = port_alias_to_name_map[endport] neighbors[endport] = {'name': startdevice, 'port': startport} else: - if port_alias_map.has_key(startport): - startport = port_alias_map[startport] + if port_alias_to_name_map.has_key(startport): + startport = port_alias_to_name_map[startport] neighbors[startport] = {'name': enddevice, 'port': endport} if child.tag == str(QName(ns, "Devices")): @@ -133,8 +133,8 @@ def parse_dpg(dpg, hname): intfs = [] for ipintf in ipintfs.findall(str(QName(ns, "IPInterface"))): intfalias = ipintf.find(str(QName(ns, "AttachTo"))).text - if port_alias_map.has_key(intfalias): - intfname = port_alias_map[intfalias] + if port_alias_to_name_map.has_key(intfalias): + intfname = port_alias_to_name_map[intfalias] else: intfname = intfalias ipprefix = ipintf.find(str(QName(ns, "Prefix"))).text @@ -210,8 +210,8 @@ def parse_dpg(dpg, hname): pcintfmbr = pcintf.find(str(QName(ns, "AttachTo"))).text pcmbr_list = pcintfmbr.split(';', 1) for i, member in enumerate(pcmbr_list): - pcmbr_list[i] = port_alias_map[member] - ports[port_alias_map[member]] = {'name': port_alias_map[member], 'alias': member} + pcmbr_list[i] = port_alias_to_name_map[member] + ports[port_alias_to_name_map[member]] = {'name': port_alias_to_name_map[member], 'alias': member} pcs[pcintfname] = {'name': pcintfname, 'members': pcmbr_list} ports.pop(pcintfname) @@ -231,8 +231,8 @@ def parse_dpg(dpg, hname): vlandhcpservers = "" dhcp_servers = vlandhcpservers.split(";") for i, member in enumerate(vmbr_list): - vmbr_list[i] = port_alias_map[member] - ports[port_alias_map[member]] = {'name': port_alias_map[member], 'alias': member} + vmbr_list[i] = port_alias_to_name_map[member] + ports[port_alias_to_name_map[member]] = {'name': port_alias_to_name_map[member], 'alias': member} vlan_attributes = {'name': vintfname, 'members': vmbr_list, 'vlanid': vlanid} vlans[vintfname] = vlan_attributes ports.pop(vintfname) @@ -249,8 +249,8 @@ def parse_dpg(dpg, hname): acl_intfs.extend(pcs[member]['members']) # For ACL attaching to port channels, we break them into port channel members elif vlans.has_key(member): print >> sys.stderr, "Warning: ACL " + aclname + " is attached to a Vlan interface, which is currently not supported" - elif port_alias_map.has_key(member): - acl_intfs.append(port_alias_map[member]) + elif port_alias_to_name_map.has_key(member): + acl_intfs.append(port_alias_to_name_map[member]) if acl_intfs: acls[aclname] = acl_intfs @@ -379,7 +379,7 @@ def reconcile_mini_graph_locations(filename, hostname): 3. .ansible/minigraph/ folder (<24 hrs old) 4. Network Graph Service - post-NGS download, cache to the user folder: + post-download, cache to the user folder: ~/.ansible/minigraph/HOSTNAME_minigraph.xml :param filename: the filename to load (may be None) @@ -396,18 +396,18 @@ def reconcile_mini_graph_locations(filename, hostname): root = ET.parse(mini_graph_path).getroot() return mini_graph_path, root -def port_alias_map_50G(all_ports, s100G_ports): +def port_alias_to_name_map_50G(all_ports, s100G_ports): # 50G ports s50G_ports = list(set(all_ports) - set(s100G_ports)) for i in s50G_ports: - port_alias_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) - port_alias_map["Ethernet%d/3" % i] = "Ethernet%d" % ((i - 1) * 4 + 2) + port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) + port_alias_to_name_map["Ethernet%d/3" % i] = "Ethernet%d" % ((i - 1) * 4 + 2) for i in s100G_ports: - port_alias_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) + port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) - return port_alias_map + return port_alias_to_name_map def parse_xml(filename, hostname): mini_graph_path, root = reconcile_mini_graph_locations(filename, hostname) @@ -442,34 +442,34 @@ def parse_xml(filename, hostname): if child.tag == str(hostname_qn): hostname = child.text - global port_alias_map - # port_alias_map maps ngs port name to sonic port name + global port_alias_to_name_map + if hwsku == "Force10-S6000": for i in range(0, 128, 4): - port_alias_map["fortyGigE0/%d" % i] = "Ethernet%d" % i + port_alias_to_name_map["fortyGigE0/%d" % i] = "Ethernet%d" % i elif hwsku == "Force10-S6100": for i in range(0, 4): for j in range(0, 16): - port_alias_map["fortyGigE1/%d/%d" % (i+1, j+1)] = "Ethernet%d" % (i * 16 + j) + port_alias_to_name_map["fortyGigE1/%d/%d" % (i+1, j+1)] = "Ethernet%d" % (i * 16 + j) elif hwsku == "Force10-Z9100": for i in range(0, 128, 4): - port_alias_map["hundredGigE1/%d" % (i/4 + 1)] = "Ethernet%d" % i + port_alias_to_name_map["hundredGigE1/%d" % (i/4 + 1)] = "Ethernet%d" % i elif hwsku == "Arista-7050-QX32": for i in range(1, 25): - port_alias_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) + port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) for i in range(25, 33): - port_alias_map["Ethernet%d" % i] = "Ethernet%d" % ((i - 1) * 4) + port_alias_to_name_map["Ethernet%d" % i] = "Ethernet%d" % ((i - 1) * 4) elif hwsku == "Arista-7050-QX-32S": for i in range(5, 29): - port_alias_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 5) * 4) + port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 5) * 4) for i in range(29, 37): - port_alias_map["Ethernet%d" % i] = "Ethernet%d" % ((i - 5) * 4) + port_alias_to_name_map["Ethernet%d" % i] = "Ethernet%d" % ((i - 5) * 4) elif hwsku == "Arista-7260CX3-C64" or hwsku == "Arista-7170-64C": for i in range(1, 65): - port_alias_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) + port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) elif hwsku == "Arista-7060CX-32S-C32" or hwsku == "Arista-7060CX-32S-C32-T1": for i in range(1, 33): - port_alias_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) + port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) elif hwsku == "Mellanox-SN2700-D48C8": # 50G ports s50G_ports = [x for x in range(0, 24, 2)] + [x for x in range(40, 88, 2)] + [x for x in range(104, 128, 2)] @@ -479,13 +479,13 @@ def parse_xml(filename, hostname): for i in s50G_ports: alias = "etp%d" % (i / 4 + 1) + ("a" if i % 4 == 0 else "b") - port_alias_map[alias] = "Ethernet%d" % i + port_alias_to_name_map[alias] = "Ethernet%d" % i for i in s100G_ports: alias = "etp%d" % (i / 4 + 1) - port_alias_map[alias] = "Ethernet%d" % i + port_alias_to_name_map[alias] = "Ethernet%d" % i elif hwsku == "Mellanox-SN2700" or hwsku == "ACS-MSN2700": for i in range(1, 33): - port_alias_map["etp%d" % i] = "Ethernet%d" % ((i - 1) * 4) + port_alias_to_name_map["etp%d" % i] = "Ethernet%d" % ((i - 1) * 4) elif hwsku == "Arista-7060CX-32S-D48C8": # All possible breakout 50G port numbers: all_ports = [ x for x in range(1, 33)] @@ -494,7 +494,7 @@ def parse_xml(filename, hostname): s100G_ports = [ x for x in range(7, 11) ] s100G_ports += [ x for x in range(23, 27) ] - port_alias_map = port_alias_map_50G(all_ports, s100G_ports) + port_alias_to_name_map = port_alias_to_name_map_50G(all_ports, s100G_ports) elif hwsku == "Arista-7260CX3-D108C8": # All possible breakout 50G port numbers: all_ports = [ x for x in range(1, 65)] @@ -502,38 +502,38 @@ def parse_xml(filename, hostname): # 100G ports s100G_ports = [ x for x in range(13, 21) ] - port_alias_map = port_alias_map_50G(all_ports, s100G_ports) + port_alias_to_name_map = port_alias_to_name_map_50G(all_ports, s100G_ports) elif hwsku == "INGRASYS-S9100-C32": for i in range(1, 33): - port_alias_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) + port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) elif hwsku == "INGRASYS-S9100-C32" or hwsku == "INGRASYS-S9130-32X" or hwsku == "INGRASYS-S8810-32Q": for i in range(1, 33): - port_alias_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) + port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) elif hwsku == "INGRASYS-S8900-54XC": for i in range(1, 49): - port_alias_map["Ethernet%d" % i] = "Ethernet%d" % (i - 1) + port_alias_to_name_map["Ethernet%d" % i] = "Ethernet%d" % (i - 1) for i in range(49, 55): - port_alias_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 49) * 4 + 48) + port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 49) * 4 + 48) elif hwsku == "INGRASYS-S8900-64XC": for i in range(1, 49): - port_alias_map["Ethernet%d" % i] = "Ethernet%d" % (i - 1) + port_alias_to_name_map["Ethernet%d" % i] = "Ethernet%d" % (i - 1) for i in range(49, 65): - port_alias_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 49) * 4 + 48) + port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 49) * 4 + 48) elif hwsku == "Accton-AS7712-32X": for i in range(1, 33): - port_alias_map["hundredGigE%d" % i] = "Ethernet%d" % ((i - 1) * 4) + port_alias_to_name_map["hundredGigE%d" % i] = "Ethernet%d" % ((i - 1) * 4) elif hwsku == "Celestica-DX010-C32": for i in range(1, 33): - port_alias_map["etp%d" % i] = "Ethernet%d" % ((i - 1) * 4) + port_alias_to_name_map["etp%d" % i] = "Ethernet%d" % ((i - 1) * 4) elif hwsku == "Seastone-DX010": for i in range(1, 33): - port_alias_map["Eth%d" % i] = "Ethernet%d" % ((i - 1) * 4) + port_alias_to_name_map["Eth%d" % i] = "Ethernet%d" % ((i - 1) * 4) elif hwsku == "Celestica-E1031-T48S4": for i in range(1, 53): - port_alias_map["etp%d" % i] = "Ethernet%d" % ((i - 1)) + port_alias_to_name_map["etp%d" % i] = "Ethernet%d" % ((i - 1)) else: for i in range(0, 128, 4): - port_alias_map["Ethernet%d" % i] = "Ethernet%d" % i + port_alias_to_name_map["Ethernet%d" % i] = "Ethernet%d" % i for child in root: if child.tag == str(QName(ns, "DpgDec")): @@ -547,41 +547,24 @@ def parse_xml(filename, hostname): elif child.tag == str(QName(ns, "MetadataDeclaration")): (syslog_servers, ntp_servers, mgmt_routes, deployment_id) = parse_meta(child, hostname) - # Create port index map. Since we currently output a mix of NGS names - # and SONiC mapped names, we include both in this map. - # SONiC aliases, when sorted in natural sort order, match the phyical port - # index order, so we sort by SONiC port alias, and map - # back to NGS names after sorting using this inverted map - # # TODO: Move all alias-related code out of minigraph_facts.py and into # its own module to be used as another layer after parsing the minigraph. - inverted_port_alias_map = {v: k for k, v in port_alias_map.iteritems()} - # Start by creating a list of all port aliases - port_alias_list = [] - for k, v in port_alias_map.iteritems(): - port_alias_list.append(v) + # Create inverse mapping between port name and alias + port_name_to_alias_map = {v: k for k, v in port_alias_to_name_map.iteritems()} + + # Create a map of SONiC port name to physical port index + # Start by creating a list of all port names + port_name_list = port_name_to_alias_map.keys() - # Sort the list in natural order - port_alias_list_sorted = natsorted(port_alias_list) + # Sort the list in natural order, because SONiC port names, when + # sorted in natural sort order, match the phyical port index order + port_name_list_sorted = natsorted(port_name_list) - # Create map from SONiC alias to physical index and NGS name to physical index + # Create mapping between port alias and physical index port_index_map = {} - for idx, val in enumerate(port_alias_list_sorted): + for idx, val in enumerate(port_name_list_sorted): port_index_map[val] = idx - port_index_map[inverted_port_alias_map[val]] = idx - - # Create maps: - # from SONiC phy iface name to NGS phy iface name - # from NGS phy iface name to SONiC phy iface name - # These maps include mappings from original name to original name too - iface_map_sonic_to_ngs = {} - iface_map_ngs_to_sonic = {} - for val in port_alias_list_sorted: - iface_map_sonic_to_ngs[val] = inverted_port_alias_map[val] - iface_map_sonic_to_ngs[inverted_port_alias_map[val]] = inverted_port_alias_map[val] - iface_map_ngs_to_sonic[inverted_port_alias_map[val]] = val - iface_map_ngs_to_sonic[val] = val # Generate results Tree = lambda: defaultdict(Tree) @@ -622,8 +605,8 @@ def parse_xml(filename, hostname): results['minigraph_underlay_neighbors'] = u_neighbors results['minigraph_underlay_devices'] = u_devices results['minigraph_port_indices'] = port_index_map - results['minigraph_map_sonic_to_ngs'] = iface_map_sonic_to_ngs - results['minigraph_map_ngs_to_sonic'] = iface_map_ngs_to_sonic + results['minigraph_port_name_to_alias_map'] = port_name_to_alias_map + results['minigraph_port_alias_to_name_map'] = port_alias_to_name_map results['minigraph_as_xml'] = mini_graph_path if devices != None: results['minigraph_console'] = get_console_info(devices, console_dev, console_port) @@ -638,7 +621,7 @@ def parse_xml(filename, hostname): return results ports = {} -port_alias_map = {} +port_alias_to_name_map = {} def main(): module = AnsibleModule( diff --git a/ansible/roles/test/tasks/acltb.yml b/ansible/roles/test/tasks/acltb.yml index b06eb4f9302..33515340b49 100644 --- a/ansible/roles/test/tasks/acltb.yml +++ b/ansible/roles/test/tasks/acltb.yml @@ -10,7 +10,7 @@ - name: Read port reverse alias mapping set_fact: - alias_reverse_map: "{{ minigraph_map_ngs_to_sonic }}" + alias_reverse_map: "{{ minigraph_port_alias_to_name_map }}" podset_number: 200 # Copy ACL config to the switch diff --git a/ansible/roles/test/tasks/dhcp_relay.yml b/ansible/roles/test/tasks/dhcp_relay.yml index 9829980a5f0..9827f4ef0b6 100644 --- a/ansible/roles/test/tasks/dhcp_relay.yml +++ b/ansible/roles/test/tasks/dhcp_relay.yml @@ -1,7 +1,7 @@ # We choose client port index to be index of first port on Vlan - name: Obtain client interface alias set_fact: - client_iface_alias: "{{ minigraph_map_sonic_to_ngs[minigraph_vlans[minigraph_vlans.keys()[0]]['members'][0]] }}" + client_iface_alias: "{{ minigraph_vlans[minigraph_vlans.keys()[0]]['members'][0] }}" - name: Obtain client port index set_fact: diff --git a/ansible/roles/test/tasks/snmp/interfaces.yml b/ansible/roles/test/tasks/snmp/interfaces.yml index c84277d3164..fbb09c07488 100644 --- a/ansible/roles/test/tasks/snmp/interfaces.yml +++ b/ansible/roles/test/tasks/snmp/interfaces.yml @@ -27,11 +27,11 @@ set_fact: mg_intf: "{{ mg_intf + [minigraph_mgmt_interface.alias] }}" -- debug: var=minigraph_map_sonic_to_ngs +- debug: var=minigraph_port_name_to_alias_map - debug: var=snmp_intf - debug: var=mg_intf - name: Check for missing interfaces in SNMP - fail: msg="Minigraph interface {{ minigraph_map_sonic_to_ngs[item] if item in minigraph_map_sonic_to_ngs else item }} not in SNMP interfaces" - when: "{{ (item in minigraph_map_sonic_to_ngs and minigraph_map_sonic_to_ngs[item] not in snmp_intf) or (item not in minigraph_map_sonic_to_ngs and item not in snmp_intf) }}" + fail: msg="Minigraph interface {{ minigraph_port_name_to_alias_map[item] if item in minigraph_port_name_to_alias_map else item }} not in SNMP interfaces" + when: "{{ (item in minigraph_port_name_to_alias_map and minigraph_port_name_to_alias_map[item] not in snmp_intf) or (item not in minigraph_port_name_to_alias_map and item not in snmp_intf) }}" with_items: "{{ mg_intf }}" From 0886551e5c242966ab94a5c8e2898c380701217c Mon Sep 17 00:00:00 2001 From: Gord Chen Date: Wed, 10 Jul 2019 03:23:30 +0800 Subject: [PATCH 109/218] [dhcp_relay] Correct to use port alias instead of port name (#998) The dhcp_relay daemon use port interface's alias to fill option82 circuit ID instead of using port inteface's name. In current yml, use minigraph_vlans' member as client port alias name, but there are only port interface's name in minigraph_vlans. Therefore add to use minigraph_port_name_to_alias_map to obtain the port interface's alias --- ansible/roles/test/tasks/dhcp_relay.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ansible/roles/test/tasks/dhcp_relay.yml b/ansible/roles/test/tasks/dhcp_relay.yml index 9827f4ef0b6..da1b5dbc682 100644 --- a/ansible/roles/test/tasks/dhcp_relay.yml +++ b/ansible/roles/test/tasks/dhcp_relay.yml @@ -1,11 +1,15 @@ # We choose client port index to be index of first port on Vlan -- name: Obtain client interface alias +- name: Obtain client interface name set_fact: - client_iface_alias: "{{ minigraph_vlans[minigraph_vlans.keys()[0]]['members'][0] }}" + client_iface_name: "{{ minigraph_vlans[minigraph_vlans.keys()[0]]['members'][0] }}" +- name: Obtain client interface alias + set_fact: + client_iface_alias: "{{ minigraph_port_name_to_alias_map[client_iface_name] }}" + - name: Obtain client port index set_fact: - client_port_index: "{{ minigraph_port_indices[client_iface_alias] }}" + client_port_index: "{{ minigraph_port_indices[client_iface_name] }}" - name: Obtain leaf port indices set_fact: From dc4ef33f19bb61f758bc1f93f86c40315c3084ac Mon Sep 17 00:00:00 2001 From: zzhiyuan Date: Fri, 27 Sep 2019 12:37:34 -0700 Subject: [PATCH 110/218] Add SKU sensors data for Arista-7050-QX-32S (#1132) --- ansible/group_vars/sonic/sku-sensors-data.yml | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/ansible/group_vars/sonic/sku-sensors-data.yml b/ansible/group_vars/sonic/sku-sensors-data.yml index 1ca98b919f3..c4b01b23b45 100644 --- a/ansible/group_vars/sonic/sku-sensors-data.yml +++ b/ansible/group_vars/sonic/sku-sensors-data.yml @@ -1989,3 +1989,86 @@ sensors_checks: power: [] temp: [] psu_skips: {} + + Arista-7050-QX-32S: + alarms: + fan: + - pmbus-i2c-5-58/fan1/fan1_alarm + - pmbus-i2c-6-58/fan1/fan1_alarm + + power: + - pmbus-i2c-5-58/iin/curr1_max_alarm + - pmbus-i2c-5-58/iout1/curr2_crit_alarm + - pmbus-i2c-5-58/iout1/curr2_max_alarm + - pmbus-i2c-5-58/iout2/curr3_crit_alarm + - pmbus-i2c-5-58/vin/in1_alarm + - pmbus-i2c-5-58/vout1/in2_crit_alarm + - pmbus-i2c-5-58/vout1/in2_lcrit_alarm + + temp: + - max6658-i2c-2-4c/Board temp sensor/temp1_max_alarm + - max6658-i2c-2-4c/Board temp sensor/temp1_min_alarm + - max6658-i2c-2-4c/Board temp sensor/temp1_crit_alarm + - max6658-i2c-2-4c/Front panel temp sensor/temp2_max_alarm + - max6658-i2c-2-4c/Front panel temp sensor/temp2_min_alarm + - max6658-i2c-2-4c/Front panel temp sensor/temp2_crit_alarm + - max6658-i2c-3-4c/Cpu board temp sensor/temp1_max_alarm + - max6658-i2c-3-4c/Cpu board temp sensor/temp1_min_alarm + - max6658-i2c-3-4c/Cpu board temp sensor/temp1_crit_alarm + - max6658-i2c-3-4c/Back panel temp sensor/temp2_max_alarm + - max6658-i2c-3-4c/Back panel temp sensor/temp2_min_alarm + - max6658-i2c-3-4c/Back panel temp sensor/temp2_crit_alarm + - pmbus-i2c-5-58/Power supply 2 hotspot sensor/temp1_alarm + - pmbus-i2c-5-58/Power supply 2 inlet temp sensor/temp2_alarm + - pmbus-i2c-5-58/Power supply 2 sensor/temp3_alarm + + compares: + fan: [] + power: + - - pmbus-i2c-5-58/iin/curr1_input + - pmbus-i2c-5-58/iin/curr1_max + - - pmbus-i2c-5-58/iout1/curr2_input + - pmbus-i2c-5-58/iout1/curr2_max + + temp: + - - k10temp-pci-00c3/Cpu temp sensor/temp1_input + - k10temp-pci-00c3/Cpu temp sensor/temp1_max + - - max6658-i2c-2-4c/Board temp sensor/temp1_input + - max6658-i2c-2-4c/Board temp sensor/temp1_max + - - max6658-i2c-2-4c/Front panel temp sensor/temp2_input + - max6658-i2c-2-4c/Front panel temp sensor/temp2_max + - - max6658-i2c-3-4c/Cpu board temp sensor/temp1_input + - max6658-i2c-3-4c/Cpu board temp sensor/temp1_max + - - max6658-i2c-3-4c/Back panel temp sensor/temp2_input + - max6658-i2c-3-4c/Back panel temp sensor/temp2_max + + non_zero: + fan: + - crow_cpld-i2c-3-60/fan1/fan1_input + - crow_cpld-i2c-3-60/fan2/fan2_input + - crow_cpld-i2c-3-60/fan3/fan3_input + - crow_cpld-i2c-3-60/fan4/fan4_input + - pmbus-i2c-5-58/fan1/fan1_input + - pmbus-i2c-6-58/fan1/fan1_input + + power: + - pmbus-i2c-5-58/iin/curr1_input + - pmbus-i2c-5-58/iout1/curr2_input + - pmbus-i2c-5-58/iout2/curr3_input + - pmbus-i2c-5-58/pin/power1_input + - pmbus-i2c-5-58/pout1/power2_input + - pmbus-i2c-5-58/pout2/power3_input + - pmbus-i2c-5-58/vin/in1_input + - pmbus-i2c-5-58/vout1/in2_input + + temp: + - k10temp-pci-00c3/Cpu temp sensor/temp1_input + - max6658-i2c-2-4c/Board temp sensor/temp1_input + - max6658-i2c-2-4c/Front panel temp sensor/temp2_input + - max6658-i2c-3-4c/Cpu board temp sensor/temp1_input + - max6658-i2c-3-4c/Back panel temp sensor/temp2_input + - pmbus-i2c-5-58/Power supply 2 hotspot sensor/temp1_input + - pmbus-i2c-5-58/Power supply 2 inlet temp sensor/temp2_input + - pmbus-i2c-5-58/Power supply 2 sensor/temp3_input + + psu_skips: {} From dcf774ded06d8a4fc8468731b7e132ed30b6e8e9 Mon Sep 17 00:00:00 2001 From: Nazarii Hnydyn Date: Wed, 2 Oct 2019 12:28:49 +0300 Subject: [PATCH 111/218] Fix Asym PFC teardown: provide valid port OID. (#1147) Signed-off-by: Nazarii Hnydyn --- ansible/roles/test/files/saitests/pfc_asym.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/files/saitests/pfc_asym.py b/ansible/roles/test/files/saitests/pfc_asym.py index 767b405c86d..a3699b1ce2b 100644 --- a/ansible/roles/test/files/saitests/pfc_asym.py +++ b/ansible/roles/test/files/saitests/pfc_asym.py @@ -113,7 +113,7 @@ def tearDown(self): sched_prof_id = sai_thrift_create_scheduler_profile(self.client, self.RELEASE_PORT_MAX_RATE) attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(int(self.non_server_port['index']),attr) + self.client.sai_thrift_set_port_attribute(port_list[int(self.non_server_port['index'])],attr) PfcAsymBaseTest.tearDown(self) From c3a5909b8624fc88d0ffdf9345ef1a51a50547c8 Mon Sep 17 00:00:00 2001 From: pavel-shirshov Date: Wed, 2 Oct 2019 12:53:31 -0700 Subject: [PATCH 112/218] Warm-reboot control plane assistant test (#1124) --- ansible/roles/test/files/helpers/ferret.py | 301 +++++++++++++++++++ ansible/roles/test/files/ptftests/wr_arp.py | 308 ++++++++++++++++++++ ansible/roles/test/tasks/wr_arp.yml | 124 ++++++++ ansible/roles/test/templates/ferret.conf.j2 | 10 + ansible/roles/test/vars/testcases.yml | 6 + 5 files changed, 749 insertions(+) create mode 100644 ansible/roles/test/files/helpers/ferret.py create mode 100644 ansible/roles/test/files/ptftests/wr_arp.py create mode 100644 ansible/roles/test/tasks/wr_arp.yml create mode 100644 ansible/roles/test/templates/ferret.conf.j2 diff --git a/ansible/roles/test/files/helpers/ferret.py b/ansible/roles/test/files/helpers/ferret.py new file mode 100644 index 00000000000..2087a1238e6 --- /dev/null +++ b/ansible/roles/test/files/helpers/ferret.py @@ -0,0 +1,301 @@ +#/usr/bin/env python + +# python t.py -f /tmp/vxlan_decap.json -s 192.168.8.1 + +import SimpleHTTPServer +import SocketServer +import select +import shutil +import json +import BaseHTTPServer +import time +import socket +import ctypes +import struct +import binascii +import itertools +import argparse +import os + +from pprint import pprint + +from cStringIO import StringIO +from functools import partial +from collections import namedtuple + + +Record = namedtuple('Record', ['hostname', 'family', 'expired', 'lo', 'mac', 'vxlan_id']) + +class Ferret(BaseHTTPServer.BaseHTTPRequestHandler): + server_version = "FerretHTTP/0.1" + + def do_POST(self): + if not self.path.startswith('/Ferret/NeighborAdvertiser/Slices/'): + self.send_error(404, "URL is not supported") + else: + info = self.extract_info() + self.update_db(info) + self.send_resp(info) + + def extract_info(self): + c_len = int(self.headers.getheader('content-length', 0)) + body = self.rfile.read(c_len) + j = json.loads(body) + return j + + def generate_entries(self, hostname, family, expire, lo, info, mapping_family): + for i in info['vlanInterfaces']: + vxlan_id = int(i['vxlanId']) + for j in i[mapping_family]: + mac = str(j['macAddr']).replace(':', '') + addr = str(j['ipAddr']) + r = Record(hostname=hostname, family=family, expired=expire, lo=lo, mac=mac, vxlan_id=vxlan_id) + self.db[addr] = r + + return + + def update_db(self, info): + hostname = str(info['switchInfo']['name']) + lo_ipv4 = str(info['switchInfo']['ipv4Addr']) + lo_ipv6 = str(info['switchInfo']['ipv6Addr']) + duration = int(info['respondingSchemes']['durationInSec']) + expired = time.time() + duration + + self.generate_entries(hostname, 'ipv4', expired, lo_ipv4, info, 'ipv4AddrMappings') + self.generate_entries(hostname, 'ipv6', expired, lo_ipv6, info, 'ipv6AddrMappings') + + return + + def send_resp(self, info): + result = { + 'ipv4Addr': self.src_ip + } + f, l = self.generate_response(result) + self.send_response(200) + self.send_header("Content-type", "application/json") + self.send_header("Content-Length", str(l)) + self.send_header("Last-Modified", self.date_time_string()) + self.end_headers() + shutil.copyfileobj(f, self.wfile) + f.close() + return + + def generate_response(self, response): + f = StringIO() + json.dump(response, f) + l = f.tell() + f.seek(0) + return f, l + + +class RestAPI(object): + PORT = 85 + + def __init__(self, obj, db, src_ip): + self.httpd = SocketServer.TCPServer(("", self.PORT), obj) + self.db = db + obj.db = db + obj.src_ip = src_ip + + def handler(self): + return self.httpd.fileno() + + def handle(self): + return self.httpd.handle_request() + + +class Interface(object): + ETH_P_ALL = 0x03 + RCV_TIMEOUT = 1000 + RCV_SIZE = 4096 + SO_ATTACH_FILTER = 26 + + def __init__(self, iface, bpf_src): + self.iface = iface + self.socket = socket.socket(socket.AF_PACKET, socket.SOCK_RAW, socket.htons(self.ETH_P_ALL)) + if bpf_src is not None: + blob = ctypes.create_string_buffer(''.join(struct.pack("HBBI", *e) for e in bpf_src)) + address = ctypes.addressof(blob) + bpf = struct.pack('HL', len(bpf_src), address) + self.socket.setsockopt(socket.SOL_SOCKET, self.SO_ATTACH_FILTER, bpf) + self.socket.bind((self.iface, 0)) + self.socket.settimeout(self.RCV_TIMEOUT) + + def __del__(self): + self.socket.close() + + def handler(self): + return self.socket.fileno() + + def recv(self): + return self.socket.recv(self.RCV_SIZE) + + def send(self, data): + self.socket.send(data) + + +class Poller(object): + def __init__(self, httpd, interfaces, responder): + self.responder = responder + self.mapping = {interface.handler(): interface for interface in interfaces} + self.httpd = httpd + + def poll(self): + handlers = self.mapping.keys() + [self.httpd.handler()] + while True: + (rdlist, _, _) = select.select(handlers, [], []) + for handler in rdlist: + if handler == self.httpd.handler(): + self.httpd.handle() + else: + self.responder.action(self.mapping[handler]) + + +class Responder(object): + ARP_PKT_LEN = 60 + ARP_OP_REQUEST = 1 + def __init__(self, db): + self.arp_chunk = binascii.unhexlify('08060001080006040002') # defines a part of the packet for ARP Reply + self.arp_pad = binascii.unhexlify('00' * 18) + self.db = db + + def hexdump(self, data): + print " ".join("%02x" % ord(d) for d in data) + + def action(self, interface): + data = interface.recv() + + ext_dst_mac = data[0x00:0x06] + ext_src_mac = data[0x06:0x0c] + ext_eth_type = data[0x0c:0x0e] + if ext_eth_type != binascii.unhexlify('0800'): + print "Not 0x800 eth type" + return + src_ip = data[0x001a:0x001e] + dst_ip = data[0x1e:0x22] + gre_flags = data[0x22:0x24] + gre_type = data[0x24:0x26] + # FIXME: check gre type and gre_flags + + arp_request = data[0x26:] + if len(arp_request) > self.ARP_PKT_LEN: + print "Too long packet" + return + + remote_mac, remote_ip, request_ip, op_type = self.extract_arp_info(arp_request) + # Don't send ARP response if the ARP op code is not request + if op_type != self.ARP_OP_REQUEST: + return + + request_ip_str = socket.inet_ntoa(request_ip) + + if request_ip_str not in self.db: + print "Not in db" + return + + r = self.db[request_ip_str] + if r.expired < time.time(): + print "Expired row in db" + del self.db[request_ip_str] + return + + if r.family == 'ipv4': + new_pkt = ext_src_mac + ext_dst_mac + ext_eth_type # outer eth frame + ipv4 = binascii.unhexlify('45000060977e400040110000') + dst_ip + src_ip # ip + crc = self.calculate_header_crc(ipv4) + ipv4 = ipv4[0:10] + crc + ipv4[12:] + new_pkt += ipv4 + new_pkt += binascii.unhexlify('c00012b5004c1280') # udp + new_pkt += binascii.unhexlify('08000000%06x00' % r.vxlan_id) # vxlan + + arp_reply = self.generate_arp_reply(binascii.unhexlify(r.mac), remote_mac, request_ip, remote_ip) + new_pkt += arp_reply + else: + print 'Support of family %s is not implemented' % r.family + return + + interface.send(new_pkt) + + return + + def calculate_header_crc(self, ipv4): + s = 0 + for l,r in zip(ipv4[::2], ipv4[1::2]): + l_u = struct.unpack("B", l)[0] + r_u = struct.unpack("B", r)[0] + s += (l_u << 8) + r_u + + c = s >> 16 + s = s & 0xffff + + while c != 0: + s += c + c = s >> 16 + s = s & 0xffff + + s = 0xffff - s + + return binascii.unhexlify("%x" % s) + + def extract_arp_info(self, data): + # remote_mac, remote_ip, request_ip, op_type + return data[6:12], data[28:32], data[38:42], (ord(data[20]) * 256 + ord(data[21])) + + def generate_arp_reply(self, local_mac, remote_mac, local_ip, remote_ip): + eth_hdr = remote_mac + local_mac + return eth_hdr + self.arp_chunk + local_mac + local_ip + remote_mac + remote_ip + self.arp_pad + +def get_bpf_for_bgp(): + bpf_src = [ + (0x28, 0, 0, 0x0000000c), # (000) ldh [12] + (0x15, 0, 2, 0x00000800), # (001) jeq #0x800 jt 2 jf 4 + (0x30, 0, 0, 0x00000017), # (002) ldb [23] + (0x15, 6, 7, 0x0000002f), # (003) jeq #0x2f jt 10 jf 11 + (0x15, 0, 6, 0x000086dd), # (004) jeq #0x86dd jt 5 jf 11 + (0x30, 0, 0, 0x00000014), # (005) ldb [20] + (0x15, 3, 0, 0x0000002f), # (006) jeq #0x2f jt 10 jf 7 + (0x15, 0, 3, 0x0000002c), # (007) jeq #0x2c jt 8 jf 11 + (0x30, 0, 0, 0x00000036), # (008) ldb [54] + (0x15, 0, 1, 0x0000002f), # (009) jeq #0x2f jt 10 jf 11 + (0x6, 0, 0, 0x00040000), # (010) ret #262144 + (0x6, 0, 0, 0x00000000), # (011) ret #0 + ] + return bpf_src + + +def extract_iface_names(config_file): + with open(config_file) as fp: + graph = json.load(fp) + + net_ports = [] + for name, val in graph['minigraph_portchannels'].items(): + members = ['eth%d' % graph['minigraph_port_indices'][member] for member in val['members']] + net_ports.extend(members) + + return net_ports + +def parse_args(): + parser = argparse.ArgumentParser(description='Ferret VXLAN API') + parser.add_argument('-f', '--config-file', help='file with configuration', required=True) + parser.add_argument('-s', '--src-ip', help='Ferret endpoint ip', required=True) + args = parser.parse_args() + if not os.path.isfile(args.config_file): + print "Can't open config file '%s'" % args.config_file + exit(1) + + return args.config_file, args.src_ip + +def main(): + db = {} + + config_file, src_ip = parse_args() + iface_names = extract_iface_names(config_file) + rest = RestAPI(Ferret, db, src_ip) + bpf_src = get_bpf_for_bgp() + ifaces = [Interface(iface_name, bpf_src) for iface_name in iface_names] + responder = Responder(db) + p = Poller(rest, ifaces, responder) + p.poll() + +if __name__ == '__main__': + main() diff --git a/ansible/roles/test/files/ptftests/wr_arp.py b/ansible/roles/test/files/ptftests/wr_arp.py new file mode 100644 index 00000000000..5115c3f72f5 --- /dev/null +++ b/ansible/roles/test/files/ptftests/wr_arp.py @@ -0,0 +1,308 @@ +# This is Control Plane Assistent test for Warm-Reboot. +# The test first start Ferret server, implemented in Python. Then initiate Warm-Rebbot procedure. +# While the host in Warm-Reboot test continiously sending ARP request to the Vlan member ports and +# expect to receive ARP replies. The test will fail as soon as there is no replies for more than 25 seconds +# for one of the Vlan member ports +# To Run the test from the command line: +# ptf --test-dir 1 1.ArpTest --platform-dir ptftests --platform remote -t "config_file='/tmp/vxlan_decap.json';ferret_ip='10.64.246.21';dut_ssh='10.3.147.243';how_long=370" +# +import time +import json +import subprocess +import datetime +import traceback +import sys +import threading +from collections import defaultdict +from pprint import pprint +from Queue import Queue + +import ptf +from ptf.base_tests import BaseTest +from ptf import config +import ptf.dataplane as dataplane +import ptf.testutils as testutils + + +class ArpTest(BaseTest): + def __init__(self): + BaseTest.__init__(self) + + log_file_name = '/root/wr_arp_test.log' + self.log_fp = open(log_file_name, 'a') + self.log_fp.write("\nNew test:\n") + + self.q_to_dut = Queue() + self.q_from_dut = Queue() + + return + + def __del__(self): + self.log_fp.close() + + return + + def log(self, message): + current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print "%s : %s" % (current_time, message) + self.log_fp.write("%s : %s\n" % (current_time, message)) + + return + + def cmd(self, cmds): + process = subprocess.Popen(cmds, + shell=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + return_code = process.returncode + + return stdout, stderr, return_code + + def ssh(self, cmds): + ssh_cmds = ["ssh", "-oStrictHostKeyChecking=no", "-oServerAliveInterval=2", "admin@" + self.dut_ssh] + ssh_cmds.extend(cmds) + stdout, stderr, return_code = self.cmd(ssh_cmds) + if stdout != []: + self.log("stdout from dut: '%s'" % str(stdout)) + if stderr != []: + self.log("stderr from dut '%s'" % str(stderr)) + self.log("return code from dut: '%s'" % str(return_code)) + + if return_code == 0: + return True, str(stdout) + elif return_code == 255 and 'Timeout, server' in stderr and 'not responding' in stderr: + return True, str(stdout) + else: + return False, "return code: %d. stdout = '%s' stderr = '%s'" % (return_code, str(stdout), str(stderr)) + + def dut_thr(self, q_from, q_to): + while True: + cmd = q_from.get() + if cmd == 'WR': + self.log("Rebooting remote side") + res, res_text = self.ssh(["sudo", "warm-reboot", "-c", self.ferret_ip]) + if res: + q_to.put('ok: %s' % res_text) + else: + q_to.put('error: %s' % res_text) + elif cmd == 'uptime': + self.log("Check uptime remote side") + res, res_text = self.ssh(["uptime", "-s"]) + if res: + q_to.put('ok: %s' % res_text) + else: + q_to.put('error: %s' % res_text) + elif cmd == 'quit': + q_to.put("done") + break + else: + self.log('Unsupported cmd: %s' % cmd) + q_to.put("error: unsupported cmd: %s" % cmd) + self.log("Quiting from dut_thr") + return + + def readMacs(self): + addrs = {} + for intf in os.listdir('/sys/class/net'): + with open('/sys/class/net/%s/address' % intf) as fp: + addrs[intf] = fp.read().strip() + + return addrs + + def generate_VlanPrefixes(self, gw, prefixlen, acc_ports): + res = {} + n_hosts = 2**(32 - prefixlen) - 3 + nr_of_dataplane_ports = len(self.dataplane.ports) + + if nr_of_dataplane_ports > n_hosts: + raise Exception("The prefix len size is too small for the test") + + gw_addr_n = struct.unpack(">I", socket.inet_aton(gw))[0] + mask = (2**32 - 1) ^ (2**(32 - prefixlen) - 1) + net_addr_n = gw_addr_n & mask + + addr = 1 + for port in acc_ports: + while True: + host_addr_n = net_addr_n + addr + host_ip = socket.inet_ntoa(struct.pack(">I", host_addr_n)) + if host_ip != gw: + break + else: + addr += 1 # skip gw + res[port] = host_ip + addr += 1 + + return res + + def generatePkts(self, gw, port_ip, port_mac): + pkt = testutils.simple_arp_packet( + ip_snd=port_ip, + ip_tgt=gw, + eth_src=port_mac, + hw_snd=port_mac, + ) + exp_pkt = testutils.simple_arp_packet( + ip_snd=gw, + ip_tgt=port_ip, + eth_src=self.dut_mac, + eth_dst=port_mac, + hw_snd=self.dut_mac, + hw_tgt=port_mac, + arp_op=2, + ) + + return str(pkt), str(exp_pkt) + + def generatePackets(self): + self.gen_pkts = {} + for test in self.tests: + for port in test['acc_ports']: + gw = test['vlan_gw'] + port_ip = test['vlan_ip_prefixes'][port] + port_mac = self.ptf_mac_addrs['eth%d' % port] + self.gen_pkts[port] = self.generatePkts(gw, port_ip, port_mac) + + return + + def get_param(self, param_name, required=True, default = None): + params = testutils.test_params_get() + if param_name not in params: + if required: + raise Exception("required parameter '%s' is not presented" % param_name) + else: + return default + else: + return params[param_name] + + def setUp(self): + self.dataplane = ptf.dataplane_instance + + config = self.get_param('config_file') + self.ferret_ip = self.get_param('ferret_ip') + self.dut_ssh = self.get_param('dut_ssh') + self.how_long = int(self.get_param('how_long', required=False, default=300)) + + if not os.path.isfile(config): + raise Exception("the config file %s doesn't exist" % config) + + with open(config) as fp: + graph = json.load(fp) + + self.tests = [] + vni_base = 0 + for name, data in graph['minigraph_vlans'].items(): + test = {} + test['acc_ports'] = [graph['minigraph_port_indices'][member] for member in data['members']] + vlan_id = int(name.replace('Vlan', '')) + test['vni'] = vni_base + vlan_id + + gw = None + prefixlen = None + for d in graph['minigraph_vlan_interfaces']: + if d['attachto'] == name: + gw = d['addr'] + prefixlen = int(d['prefixlen']) + break + else: + raise Exception("Vlan '%s' is not found" % name) + + test['vlan_gw'] = gw + test['vlan_ip_prefixes'] = self.generate_VlanPrefixes(gw, prefixlen, test['acc_ports']) + + self.tests.append(test) + + self.dut_mac = graph['dut_mac'] + + self.ptf_mac_addrs = self.readMacs() + + self.generatePackets() + + self.cmd(["supervisorctl", "restart", "ferret"]) + + self.dataplane.flush() + + return + + def tearDown(self): + self.cmd(["supervisorctl", "stop", "ferret"]) + return + + def runTest(self): + print + thr = threading.Thread(target=self.dut_thr, kwargs={'q_from': self.q_to_dut, 'q_to': self.q_from_dut}) + thr.setDaemon(True) + thr.start() + + uptime_before = self.req_dut('uptime') + if uptime_before.startswith('error'): + self.log("DUT returned error for first uptime request") + self.req_dut('quit') + self.assertTrue(False, "DUT returned error for first uptime request") + + records = defaultdict(dict) + stop_at = time.time() + self.how_long + rebooted = False + while time.time() < stop_at: + for test in self.tests: + for port in test['acc_ports']: + nr_rcvd = self.testPort(port) + records[port][time.time()] = nr_rcvd + if not rebooted: + result = self.req_dut('WR') + if result.startswith('ok'): + rebooted = True + else: + self.log("Error in WR") + self.req_dut('quit') + self.assertTrue(False, "Error in WR") + + uptime_after = self.req_dut('uptime') + if uptime_after.startswith('error'): + self.log("DUT returned error for second uptime request") + self.req_dut('quit') + self.assertTrue(False, "DUT returned error for second uptime request") + + self.req_dut('quit') + + if uptime_before == uptime_after: + self.log("The DUT wasn't rebooted. Uptime: %s vs %s" % (uptime_before, uptime_after)) + self.assertTrue(uptime_before != uptime_after, "The DUT wasn't rebooted. Uptime: %s vs %s" % (uptime_before, uptime_after)) + + # check that every port didn't have pauses more than 25 seconds + pauses = defaultdict(list) + for port, data in records.items(): + was_active = True + last_inactive = None + for t in sorted(data.keys()): + active = data[t] > 0 + if was_active and not active: + last_inactive = t + elif not was_active and active: + pauses[port].append(t - last_inactive) + was_active = active + if not was_active: + pauses[port].append(sorted(data.keys())[-1] - last_inactive) + + m_pauses = { port:max(pauses[port]) for port in pauses.keys() if max(pauses[port]) > 25 } + for port in m_pauses.keys(): + self.log("Port eth%d. Max pause in arp_response %d sec" % (port, int(m_pauses[port]))) + print + sys.stdout.flush() + self.assertTrue(len(m_pauses) == 0, "Too long pauses in arp responses") + + return + + def testPort(self, port): + pkt, exp_pkt = self.gen_pkts[port] + testutils.send_packet(self, port, pkt) + nr_rcvd = testutils.count_matched_packets(self, exp_pkt, port, timeout=0.2) + return nr_rcvd + + def req_dut(self, cmd): + self.log("cmd: %s" % cmd) + self.q_to_dut.put(cmd) + reply = self.q_from_dut.get() + self.log("reply: %s" % reply) + return reply diff --git a/ansible/roles/test/tasks/wr_arp.yml b/ansible/roles/test/tasks/wr_arp.yml new file mode 100644 index 00000000000..e886769e69b --- /dev/null +++ b/ansible/roles/test/tasks/wr_arp.yml @@ -0,0 +1,124 @@ +- fail: msg="Please set ptf_host variable" + when: ptf_host is not defined + +- name: Remove existing ip from ptf host + script: roles/test/files/helpers/remove_ip.sh + delegate_to: "{{ ptf_host }}" + +- name: Make all mac addresses in ptf unique - should be done in vm_set + script: roles/test/files/helpers/change_mac.sh + delegate_to: "{{ ptf_host }}" + +- name: Remove old keys + file: + path: "{{ item }}" + state: absent + with_items: + - /root/.ssh/id_rsa + - /root/.ssh/id_rsa.pub + delegate_to: "{{ ptf_host }}" + +- name: Check that file /root/.ssh/known_hosts exists + stat: path=/root/.ssh/known_hosts + delegate_to: "{{ ptf_host }}" + register: known_hosts + +- name: Remove old entry about DUT + shell: ssh-keygen -f /root/.ssh/known_hosts -R {{ ansible_host }} + delegate_to: "{{ ptf_host }}" + when: known_hosts.stat.exists + +- name: Generate public key for ptf host + shell: ssh-keygen -b 2048 -t rsa -f /root/.ssh/id_rsa -q -N "" + args: + creates: /root/.ssh/id_rsa + delegate_to: "{{ ptf_host }}" + +- name: read authorized key from ptf host + fetch: + src: '/root/.ssh/id_rsa.pub' + dest: /tmp/ + flat: yes + delegate_to: "{{ ptf_host }}" + +- name: set authorized key taken from file + authorized_key: + user: "{{ ansible_ssh_user }}" + state: present + key: "{{ lookup('file', '/tmp/id_rsa.pub') }}" + +- name: Restart DUT. Wait 240 seconds after SONiC started ssh + include: reboot.yml + vars: + ready_timeout: 240 + +- name: Find appropriate VXLAN sender + shell: ip route show proto zebra type unicast | sed -e '/default/d' -ne '/0\//p' | head -n 1 | sed -ne 's/0\/.*$/1/p' + register: dip_output + +- name: Check that VXLAN DIP was found + fail: msg="We didn't find DIP ip" + when: dip_output.stdout | length == 0 + +- name: Check if the DUT has access to our ptf host + command: ip route get {{ ptf_host }} + register: route_output + +- name: Find the gateway for management port + shell: ip route show table default | sed -n 's/default //p' + register: gw_output + +- name: Check gw + fail: msg="We didn't find gw for ptf" + when: gw_output.stdout | length == 0 + +- name: Install explicit route through eth0 (mgmt) interface, when we don't have correct route to ptf + command: ip route add {{ ptf_host }}/32 {{ gw_output.stdout }} + become: yes + when: "'PortChannel' in route_output.stdout" + +- name: Update supervisor configuration + include: "roles/test/tasks/common_tasks/update_supervisor.yml" + vars: + supervisor_host: "{{ ptf_host }}" + +- name: Copy tests to the PTF container + copy: src=roles/test/files/ptftests dest=/root + delegate_to: "{{ ptf_host }}" + +- name: Copy Ferret to the PTF container + copy: src=roles/test/files/helpers/ferret.py dest=/opt + delegate_to: "{{ ptf_host }}" + +- name: Copy Ferret supervisor configuration to the PTF container + template: src=ferret.conf.j2 dest=/etc/supervisor/conf.d/ferret.conf + vars: + - ferret_args: '-f /tmp/vxlan_decap.json -s {{ dip_output.stdout }}' + delegate_to: "{{ ptf_host }}" + +- name: Render DUT parameters to json file for the test + template: src=vxlan_decap.json.j2 dest=/tmp/vxlan_decap.json + delegate_to: "{{ ptf_host }}" + +- name: Dump debug info. DUT ip + debug: var=ansible_eth0.ipv4.address + +- name: Dump debug info. DUT gw + debug: var=gw_output.stdout + +- name: Dump debug info. DIP + debug: var=dip_output.stdout + +- include: ptf_runner.yml + vars: + ptf_test_name: Warm-Reboot Control-Plane assist feature + ptf_test_dir: ptftests + ptf_test_path: wr_arp.ArpTest + ptf_platform: remote + ptf_platform_dir: ptftests + ptf_qlen: 1000 + ptf_test_params: + - ferret_ip='{{ ptf_host }}' + - dut_ssh='{{ ansible_eth0.ipv4.address }}' + - config_file='/tmp/vxlan_decap.json' + - how_long=370 diff --git a/ansible/roles/test/templates/ferret.conf.j2 b/ansible/roles/test/templates/ferret.conf.j2 new file mode 100644 index 00000000000..485153c819f --- /dev/null +++ b/ansible/roles/test/templates/ferret.conf.j2 @@ -0,0 +1,10 @@ +[program:ferret] +command=/usr/bin/python /opt/ferret.py {{ ferret_args }} +process_name=ferret +stdout_logfile=/tmp/ferret.out.log +stderr_logfile=/tmp/ferret.err.log +redirect_stderr=false +autostart=false +autorestart=true +startsecs=1 +numprocs=1 diff --git a/ansible/roles/test/vars/testcases.yml b/ansible/roles/test/vars/testcases.yml index 69d0723b6ae..82ae0cb1e44 100644 --- a/ansible/roles/test/vars/testcases.yml +++ b/ansible/roles/test/vars/testcases.yml @@ -249,6 +249,12 @@ testcases: required_vars: ptf_host: + wr_arp: + filename: wr_arp.yml + topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116] + required_vars: + ptf_host: + pfc_asym: filename: pfc_asym.yml topologies: [t0] From 7999ce03c5733029a922046f05b76e9ccae7f21f Mon Sep 17 00:00:00 2001 From: Sumukha Tumkur Vani Date: Thu, 3 Oct 2019 11:50:57 -0700 Subject: [PATCH 113/218] M0 testcases (#1144) * Add topology to NOTOPO testcases * Add topology to testcases --- ansible/roles/test/vars/testcases.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ansible/roles/test/vars/testcases.yml b/ansible/roles/test/vars/testcases.yml index 82ae0cb1e44..e1f084eee35 100644 --- a/ansible/roles/test/vars/testcases.yml +++ b/ansible/roles/test/vars/testcases.yml @@ -29,7 +29,7 @@ testcases: bgp_speaker: filename: bgp_speaker.yml - topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116] + topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116] required_vars: ptf_host: testbed_type: @@ -40,7 +40,7 @@ testcases: continuous_reboot: filename: continuous_reboot.yml - topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] + topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] copp: filename: copp.yml @@ -154,7 +154,7 @@ testcases: mem_check: filename: mem_check.yml - topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] mtu: filename: mtu.yml @@ -175,7 +175,7 @@ testcases: ntp: filename: ntp.yml - topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] pfc_wd: filename: pfc_wd.yml @@ -191,7 +191,7 @@ testcases: reboot: filename: reboot.yml - topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] repeat_harness: filename: repeat_harness.yml @@ -211,15 +211,15 @@ testcases: sensors: filename: sensors_check.yml - topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] service_acl: filename: service_acl.yml - topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] snmp: filename: snmp.yml - topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] syslog: filename: syslog.yml From 1b9a128feb165abeb4420c4e08a5336c611cb415 Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Fri, 4 Oct 2019 09:45:01 -0700 Subject: [PATCH 114/218] Test case 1 of PFC watchdog against warm-reboot: happy path (#825) * First test case of PFC watchdog against warm-reboot Signed-off-by: Wenda Ni * Add more comments for code readability Signed-off-by: Wenda Ni * Use boolean variable to determine the test run type: regular pfc wd test or pfcwd warm-reboot test Signed-off-by: Wenda Ni * Feed reboot type to reboot_sonic.yml in warm-reboot happy path test Signed-off-by: Wenda Ni --- ansible/roles/test/tasks/pfc_wd.yml | 84 ++++++---- .../functional_test_restore.yml | 142 ++++++++++++++++ .../functional_test/functional_test_storm.yml | 158 ++++++++++++++++++ .../functional_test_warm_reboot.yml | 25 +++ 4 files changed, 373 insertions(+), 36 deletions(-) create mode 100644 ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml create mode 100644 ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml create mode 100644 ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_warm_reboot.yml diff --git a/ansible/roles/test/tasks/pfc_wd.yml b/ansible/roles/test/tasks/pfc_wd.yml index dc1657ad3a4..896a8e6a0bc 100644 --- a/ansible/roles/test/tasks/pfc_wd.yml +++ b/ansible/roles/test/tasks/pfc_wd.yml @@ -105,10 +105,15 @@ - set_fact: select_test_ports: "{{select_test_ports | default({}) | combine({item.key: item.value})}}" with_dict: "{{test_ports}}" - when: item.value.test_port_id | int % 15 == seed | int % 15 + when: (item.value.test_port_id | int % 15) == (seed | int % 15) - debug: msg="{{select_test_ports}}" +- name: Run default test if user has not specified warm reboot test + set_fact: + warm_reboot_test: false + when: warm_reboot_test is not defined + #****************************************# # Start tests # #****************************************# @@ -124,41 +129,48 @@ minigraph_vlan_interfaces: [] when: minigraph_vlan_interfaces is undefined - - name: Test PFC WD configuration validation. - vars: - pfc_wd_template: roles/test/templates/pfc_wd_config.j2 - include: roles/test/tasks/pfc_wd/config_test/config_test.yml - - - name: Test PFC WD Functional tests. - include: roles/test/tasks/pfc_wd/functional_test/functional_test.yml - with_dict: "{{select_test_ports}}" - - - name: Test PFC WD Timer accuracy. - include: roles/test/tasks/pfc_wd/functional_test/check_timer_accuracy_test.yml - - - name: Test PFC WD extreme case when all ports have storm - include: roles/test/tasks/pfc_wd/functional_test/storm_all_test.yml - - - name: Set vlan members - set_fact: - vlan_members: "{{ minigraph_vlans[minigraph_vlan_interfaces[0]['attachto']]['members']}}" - when: - - pfc_asym is defined - - - name: Enable asymmetric PFC on all server interfaces - command: config interface pfc asymmetric on {{ item }} - become: yes - with_items: "{{ vlan_members }}" - when: - - pfc_asym is defined - - testbed_type in ['t0'] - - - name: Test PFC WD Functional tests. - include: roles/test/tasks/pfc_wd/functional_test/functional_test.yml - with_dict: "{{select_test_ports}}" - when: - - pfc_asym is defined - - testbed_type in ['t0'] + - block: + - name: Test PFC WD configuration validation. + vars: + pfc_wd_template: roles/test/templates/pfc_wd_config.j2 + include: roles/test/tasks/pfc_wd/config_test/config_test.yml + + - name: Test PFC WD Functional tests. + include: roles/test/tasks/pfc_wd/functional_test/functional_test.yml + with_dict: "{{select_test_ports}}" + + - name: Test PFC WD Timer accuracy. + include: roles/test/tasks/pfc_wd/functional_test/check_timer_accuracy_test.yml + + - name: Test PFC WD extreme case when all ports have storm + include: roles/test/tasks/pfc_wd/functional_test/storm_all_test.yml + + - name: Set vlan members + set_fact: + vlan_members: "{{ minigraph_vlans[minigraph_vlan_interfaces[0]['attachto']]['members']}}" + when: + - pfc_asym is defined + + - name: Enable asymmetric PFC on all server interfaces + command: config interface pfc asymmetric on {{ item }} + become: yes + with_items: "{{ vlan_members }}" + when: + - pfc_asym is defined + - testbed_type in ['t0'] + + - name: Test PFC WD Functional tests. + include: roles/test/tasks/pfc_wd/functional_test/functional_test.yml + with_dict: "{{select_test_ports}}" + when: + - pfc_asym is defined + - testbed_type in ['t0'] + when: warm_reboot_test | bool == false + + - block: + - name: Test PFC WD function against warm reboot + include: roles/test/tasks/pfc_wd/functional_test/functional_test_warm_reboot.yml + when: warm_reboot_test | bool == true always: - name: General cleanup. diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml new file mode 100644 index 00000000000..f50a9961ccf --- /dev/null +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml @@ -0,0 +1,142 @@ +#------------------------------------ +# Test the PFC restore action +# 1. Verify restore is detected via syslog entry analysis on the dut +# 2. Verity the forward action on both ingress and egress via ptf dataplane traffic test +#------------------------------------ + +- name: Set fact for item values + set_fact: + peer_device: "{{item.value.peer_device}}" + pfc_wd_test_port: "{{item.key}}" + pfc_wd_rx_port: "{{item.value.rx_port}}" + pfc_wd_test_neighbor_addr: "{{item.value.test_neighbor_addr}}" + pfc_wd_rx_neighbor_addr: "{{item.value.rx_neighbor_addr}}" + pfc_wd_test_port_id: "{{item.value.test_port_id}}" + pfc_wd_rx_port_id: "{{item.value.rx_port_id}}" + port_type: "{{item.value.test_port_type}}" + +- name: Set fact for test port ids and include portchannel other members if exists + set_fact: + pfc_wd_test_port_ids: "[{{item.value.test_portchannel_members | join(' ')}}]" + when: port_type == "portchannel" + +- set_fact: + pfc_wd_test_port_ids: "[{{pfc_wd_test_port_id}}]" + when: port_type in ["vlan", "interface"] + +- name: Remove existing IPs from PTF host + script: roles/test/files/helpers/remove_ip.sh + delegate_to: "{{ptf_host}}" + when: port_type == "vlan" + +- name: "Set {{pfc_wd_test_neighbor_addr}} to eth{{pfc_wd_test_port_id}} on PTF host" + shell: ifconfig eth{{pfc_wd_test_port_id}} {{pfc_wd_test_neighbor_addr}} + delegate_to: "{{ptf_host}}" + when: port_type == "vlan" + +- name: "Update ARP entry on DUT" + shell: ping {{minigraph_vlan_interfaces[0]['addr']}} -c 10 + delegate_to: "{{ptf_host}}" + when: port_type == "vlan" + +- name: "Update ARP entry on DUT" + shell: docker exec -i swss arping {{pfc_wd_test_neighbor_addr}} -c 5 + when: port_type == "vlan" + +- conn_graph_facts: host={{ peer_device }} + connection: local + become: no + +- name: Prepare variables required for PFC test + set_fact: + pfc_queue_index: 4 + pfc_frames_number: 100000000 + pfc_wd_test_pkt_count: 100 + pfc_fanout_interface: "{{neighbors[pfc_wd_test_port]['peerport']}}" + peer_hwsku: "{{device_info['HwSku']}}" + peer_mgmt: "{{device_info['mgmtip']}}" + testname: functional_test + +- set_fact: + class_enable: "{{(1).__lshift__(pfc_queue_index)}}" + +- set_fact: + peer_login: "{{switch_login[hwsku_map[peer_hwsku]]}}" + +- name: set pfc storm templates based on fanout platform sku + include: roles/test/tasks/pfc_wd/functional_test/set_pfc_storm_templates.yml + + +- block: + # 1. Verify restore is detected via syslog entry analysis on the dut + - set_fact: + test_expect_file: "expect_pfc_wd_restore" + + - name: Initialize loganalyzer + include: roles/test/files/tools/loganalyzer/loganalyzer_init.yml + + - name: Stop PFC storm on fanout switch + action: apswitch template="{{pfc_wd_storm_stop_template}}" + args: + host: "{{peer_mgmt}}" + login: "{{peer_login}}" + connection: switch + + - name: Wait for queue to recover from PFC storm + pause: + seconds: 1 + + - name: Check if logs contain message that PFC WD restored from deadlock + include: roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml + + - name: Check if logs contain message that PFC WD restored from deadlock + include: roles/test/files/tools/loganalyzer/loganalyzer_end.yml + + # 2. Verity the forward action on both ingress and egress via ptf dataplane traffic test + - name: "Send packets via {{pfc_wd_test_port}}" + include: roles/test/tasks/ptf_runner.yml + vars: + ptf_test_name: PFC WD test + ptf_test_dir: ptftests + ptf_test_path: pfc_wd.PfcWdTest + ptf_platform: remote + ptf_platform_dir: ptftests + ptf_test_params: + - testbed_type='{{testbed_type}}' + - router_mac='{{ansible_ethernet0_mac_addr}}' + - queue_index='{{pfc_queue_index}}' + - pkt_count='{{pfc_wd_test_pkt_count}}' + - port_src='{{pfc_wd_rx_port_id[0]}}' + - port_dst='{{pfc_wd_test_port_ids}}' + - ip_dst='{{pfc_wd_test_neighbor_addr}}' + - port_type='{{port_type}}' + - wd_action='forward' + ptf_extra_options: "--relax --debug info --log-file /tmp/pfc_wd.PfcWdTest.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log " + + - name: "Send packets to {{pfc_wd_test_port}}" + include: roles/test/tasks/ptf_runner.yml + vars: + ptf_test_name: PFC WD test + ptf_test_dir: ptftests + ptf_test_path: pfc_wd.PfcWdTest + ptf_platform: remote + ptf_platform_dir: ptftests + ptf_test_params: + - testbed_type='{{testbed_type}}' + - router_mac='{{ansible_ethernet0_mac_addr}}' + - queue_index='{{pfc_queue_index}}' + - pkt_count='{{pfc_wd_test_pkt_count}}' + - port_src='{{pfc_wd_test_port_id}}' + - port_dst='[{{pfc_wd_rx_port_id | join(' ')}}]' + - ip_dst='{{pfc_wd_rx_neighbor_addr}}' + - port_type='{{port_type}}' + - wd_action='forward' + ptf_extra_options: "--relax --debug info --log-file /tmp/pfc_wd.PfcWdTest.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log " + + rescue: + - name: Stop PFC storm on fanout switch + action: apswitch template="{{pfc_wd_storm_stop_template}}" + args: + host: "{{peer_mgmt}}" + login: "{{peer_login}}" + connection: switch diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml new file mode 100644 index 00000000000..bf0e9015240 --- /dev/null +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml @@ -0,0 +1,158 @@ +#------------------------------------ +# Test the PFC storm action +# 1. Verify storm is detected via syslog entry analysis on the dut +# 2. Verify the drop action in place on both ingress and egress via ptf dataplane traffic test +# +# Storm will not be stopped on exiting the yml if test success +#------------------------------------ + +- name: Set fact for item values + set_fact: + peer_device: "{{item.value.peer_device}}" + pfc_wd_test_port: "{{item.key}}" + pfc_wd_rx_port: "{{item.value.rx_port}}" + pfc_wd_test_neighbor_addr: "{{item.value.test_neighbor_addr}}" + pfc_wd_rx_neighbor_addr: "{{item.value.rx_neighbor_addr}}" + pfc_wd_test_port_id: "{{item.value.test_port_id}}" + pfc_wd_rx_port_id: "{{item.value.rx_port_id}}" + port_type: "{{item.value.test_port_type}}" + +- name: Set fact for test port ids and include portchannel other members if exists + set_fact: + pfc_wd_test_port_ids: "[{{item.value.test_portchannel_members | join(' ')}}]" + when: port_type == "portchannel" + +- set_fact: + pfc_wd_test_port_ids: "[{{pfc_wd_test_port_id}}]" + when: port_type in ["vlan", "interface"] + +- name: Remove existing IPs from PTF host + script: roles/test/files/helpers/remove_ip.sh + delegate_to: "{{ptf_host}}" + when: port_type == "vlan" + +- name: "Set {{pfc_wd_test_neighbor_addr}} to eth{{pfc_wd_test_port_id}} on PTF host" + shell: ifconfig eth{{pfc_wd_test_port_id}} {{pfc_wd_test_neighbor_addr}} + delegate_to: "{{ptf_host}}" + when: port_type == "vlan" + +- name: "Update ARP entry on DUT" + shell: ping {{minigraph_vlan_interfaces[0]['addr']}} -c 10 + delegate_to: "{{ptf_host}}" + when: port_type == "vlan" + +- name: "Update ARP entry on DUT" + shell: docker exec -i swss arping {{pfc_wd_test_neighbor_addr}} -c 5 + when: port_type == "vlan" + +- conn_graph_facts: host={{ peer_device }} + connection: local + become: no + +- name: Prepare variables required for PFC test + set_fact: + pfc_queue_index: 4 + pfc_frames_number: 100000000 + pfc_wd_test_pkt_count: 100 + pfc_fanout_interface: "{{neighbors[pfc_wd_test_port]['peerport']}}" + peer_hwsku: "{{device_info['HwSku']}}" + peer_mgmt: "{{device_info['mgmtip']}}" + testname: functional_test + +- set_fact: + class_enable: "{{(1).__lshift__(pfc_queue_index)}}" + +- set_fact: + peer_login: "{{switch_login[hwsku_map[peer_hwsku]]}}" + +- name: set pfc storm templates based on fanout platform sku + include: roles/test/tasks/pfc_wd/functional_test/set_pfc_storm_templates.yml + + +- block: + - set_fact: + pfc_gen_file: pfc_gen.py + + - name: Deploy pfc packet generater file to fanout switch + include: roles/test/tasks/pfc_wd/functional_test/deploy_pfc_pktgen.yml + + - name: copy the test to ptf container + copy: src=roles/test/files/ptftests dest=/root + delegate_to: "{{ptf_host}}" + + # 1. Verify storm is detected via syslog entry analysis on the dut + - set_fact: + test_expect_file: "expect_pfc_wd_detect" + test_ignore_file: "ignore_pfc_wd_messages" + + - name: Initialize loganalyzer + include: roles/test/files/tools/loganalyzer/loganalyzer_init.yml + + - name: Generate PFC storm on fanout switch + action: apswitch template="{{pfc_wd_storm_template}}" + args: + host: "{{peer_mgmt}}" + login: "{{peer_login}}" + connection: switch + register: pid + + - debug: msg="{{pid}}" + + - name: Let PFC storm happen for a while + pause: + seconds: 5 + + - name: Check if logs contain message that PFC WD detected storm + include: roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml + + - name: Check if logs contain message that PFC WD detected storm + include: roles/test/files/tools/loganalyzer/loganalyzer_end.yml + + # 2. Verify the drop action in place on both ingress and egress via ptf dataplane traffic test + - name: "check egress drop, tx port {{pfc_wd_test_port}}" + include: roles/test/tasks/ptf_runner.yml + vars: + ptf_test_name: PFC WD test + ptf_test_dir: ptftests + ptf_test_path: pfc_wd.PfcWdTest + ptf_platform: remote + ptf_platform_dir: ptftests + ptf_test_params: + - testbed_type='{{testbed_type}}' + - router_mac='{{ansible_ethernet0_mac_addr}}' + - queue_index='{{pfc_queue_index}}' + - pkt_count='{{pfc_wd_test_pkt_count}}' + - port_src='{{pfc_wd_rx_port_id[0]}}' + - port_dst='[{{pfc_wd_test_port_id}}]' + - ip_dst='{{pfc_wd_test_neighbor_addr}}' + - port_type='{{port_type}}' + - wd_action='drop' + ptf_extra_options: "--relax --debug info --log-file /tmp/pfc_wd.PfcWdTest.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log " + + - name: "Check ingress drop, rx port {{pfc_wd_test_port}}" + include: roles/test/tasks/ptf_runner.yml + vars: + ptf_test_name: PFC WD test + ptf_test_dir: ptftests + ptf_test_path: pfc_wd.PfcWdTest + ptf_platform: remote + ptf_platform_dir: ptftests + ptf_test_params: + - testbed_type='{{testbed_type}}' + - router_mac='{{ansible_ethernet0_mac_addr}}' + - queue_index='{{pfc_queue_index}}' + - pkt_count='{{pfc_wd_test_pkt_count}}' + - port_src='{{pfc_wd_test_port_id}}' + - port_dst='[{{pfc_wd_rx_port_id | join(' ')}}]' + - ip_dst='{{pfc_wd_rx_neighbor_addr}}' + - port_type='{{port_type}}' + - wd_action='drop' + ptf_extra_options: "--relax --debug info --log-file /tmp/pfc_wd.PfcWdTest.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log " + + rescue: + - name: Stop PFC storm on fanout switch + action: apswitch template="{{pfc_wd_storm_stop_template}}" + args: + host: "{{peer_mgmt}}" + login: "{{peer_login}}" + connection: switch diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_warm_reboot.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_warm_reboot.yml new file mode 100644 index 00000000000..644a9add423 --- /dev/null +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_warm_reboot.yml @@ -0,0 +1,25 @@ +# Test case 1: +# PFC storm detected and restored before warm-reboot +# no on-going storm before warm-reboot emission +# PFC storm detect and restore functional after warm-reboot +- block: + - name: Test PFC WD storm function + include: roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml + with_dict: "{{select_test_ports}}" + + - name: Test PFC WD restore function + include: roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml + with_dict: "{{select_test_ports}}" + + - name: Issue warm reboot on the device + include: roles/test/tasks/common_tasks/reboot_sonic.yml + vars: + reboot_type: warm-reboot + + - name: Test PFC WD storm function + include: roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml + with_dict: "{{select_test_ports}}" + + - name: Test PFC WD restore function + include: roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml + with_dict: "{{select_test_ports}}" From 5a29d6977c8df8049baa214e4416771c80def4cb Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Fri, 4 Oct 2019 14:12:15 -0700 Subject: [PATCH 115/218] Let storm all test run on queue index 3 (#1133) Signed-off-by: Wenda Ni --- .../roles/test/tasks/pfc_wd/functional_test/storm_all_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/storm_all_test.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/storm_all_test.yml index a74ed80d5f3..fcbfcbad534 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/storm_all_test.yml +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/storm_all_test.yml @@ -1,7 +1,7 @@ # pfc_frames_number intends to be large enough so that PFC storm keeps happenning until runs pfc_storm_stop command. - name: Prepare variables required for PFC test set_fact: - pfc_queue_index: 4 + pfc_queue_index: 3 pfc_frames_number: 10000000 pfc_wd_detect_time: 200 pfc_wd_restore_time: 200 From fe9900dc88598aebe1ba79594bff89515f880cdb Mon Sep 17 00:00:00 2001 From: Kebo Liu Date: Wed, 9 Oct 2019 00:27:09 +0800 Subject: [PATCH 116/218] [pytest]porting back changes from master to 201811 (#1145) * Porting back pytest change from master to 201811 update device info to add more facts add log analyzer add check daemon status test add check interface status test add Mellanox check sfp presence test update reboot, config reload and sequential restart test update sfp test update check sysfs test update platform fixture * fix review comments Rebase to add some new master PR: https://github.com/Azure/sonic-mgmt/pull/1130 [platform] Fix the reboot SONiC stuck issue https://github.com/Azure/sonic-mgmt/pull/1120 [platform] Disable log analyzer for the reload and restart cases https://github.com/Azure/sonic-mgmt/pull/1125 [pytest] Fix pytest conftest.py issue * update loganalyzer ignore log * [tests/platform/mellanox] check PSU state against sysfs on Mellanox devices (#1082) * [psu test case] check psu state against vendor specific info. for mellanox, check sysfs * [test_platform_info.py]handle "NOT PRESENT" in test_show_platform_psustatus * [psu testcase] address comments Conflicts: tests/platform/mellanox/check_sysfs.py * [check_sysfs] rewords. * [check_sysfs.py] rewording Conflicts: tests/platform/mellanox/check_sysfs.py * reduce redundant code and rename function * remove redundant code --- tests/common/devices.py | 45 +- tests/common/mellanox_data.py | 83 ++- tests/conftest.py | 36 +- tests/loganalyzer/README.md | 135 ++++ tests/loganalyzer/__init__.py | 1 + tests/loganalyzer/loganalyzer.py | 232 ++++++ .../loganalyzer/loganalyzer_common_expect.txt | 0 .../loganalyzer/loganalyzer_common_ignore.txt | 2 + .../loganalyzer/loganalyzer_common_match.txt | 6 + tests/loganalyzer/system_msg_handler.py | 665 ++++++++++++++++++ tests/platform/check_all_interface_info.py | 19 + tests/platform/check_daemon_status.py | 32 + tests/platform/check_interface_status.py | 32 +- tests/platform/conftest.py | 0 tests/platform/mellanox/check_sysfs.py | 132 +++- tests/platform/mellanox/conftest.py | 0 .../mellanox/test_check_sfp_presence.py | 35 + tests/platform/mellanox/test_check_sysfs.py | 1 + tests/platform/platform_fixtures.py | 1 + tests/platform/test_platform_info.py | 25 +- tests/platform/test_reboot.py | 200 +++++- tests/platform/test_reload_config.py | 11 +- tests/platform/test_sequential_restart.py | 12 +- tests/platform/test_sfp.py | 16 +- 24 files changed, 1605 insertions(+), 116 deletions(-) create mode 100644 tests/loganalyzer/README.md create mode 100644 tests/loganalyzer/__init__.py create mode 100644 tests/loganalyzer/loganalyzer.py create mode 100644 tests/loganalyzer/loganalyzer_common_expect.txt create mode 100644 tests/loganalyzer/loganalyzer_common_ignore.txt create mode 100644 tests/loganalyzer/loganalyzer_common_match.txt create mode 100644 tests/loganalyzer/system_msg_handler.py create mode 100644 tests/platform/check_all_interface_info.py create mode 100644 tests/platform/check_daemon_status.py create mode 100644 tests/platform/conftest.py create mode 100644 tests/platform/mellanox/conftest.py create mode 100644 tests/platform/mellanox/test_check_sfp_presence.py diff --git a/tests/common/devices.py b/tests/common/devices.py index 9aac450cbb0..7ee8bf0e7ad 100644 --- a/tests/common/devices.py +++ b/tests/common/devices.py @@ -9,11 +9,13 @@ """ import json import logging -from multiprocessing import Process, Queue +import os +from multiprocessing.pool import ThreadPool from errors import RunAnsibleModuleFail from errors import UnsupportedAnsibleModule + class AnsibleHostBase(object): """ @summary: The base class for various objects. @@ -44,13 +46,11 @@ def _run(self, *module_args, **complex_args): module_async = complex_args.pop('module_async', False) if module_async: - q = Queue() - def run_module(queue, module_args, complex_args): - res = self.module(*module_args, **complex_args) - q.put(res[self.hostname]) - p = Process(target=run_module, args=(q, module_args, complex_args)) - p.start() - return p, q + def run_module(module_args, complex_args): + return self.module(*module_args, **complex_args)[self.hostname] + pool = ThreadPool() + result = pool.apply_async(run_module, (module_args, complex_args)) + return pool, result res = self.module(*module_args, **complex_args)[self.hostname] if res.is_failed and not module_ignore_errors: @@ -149,7 +149,8 @@ def is_service_fully_started(self, service): return True else: return False - except: + except Exception as e: + logging.error("Failed to get service status, exception: %s" % repr(e)) return False def critical_services_fully_started(self): @@ -163,7 +164,6 @@ def critical_services_fully_started(self): logging.debug("Status of critical services: %s" % str(result)) return all(result.values()) - def get_crm_resources(self): """ @summary: Run the "crm show resources all" command and parse its output @@ -185,11 +185,32 @@ def get_crm_resources(self): fields = line.split() if len(fields) == 5: result["acl_resources"].append({"stage": fields[0], "bind_point": fields[1], - "resource_name": fields[2], "used_count": int(fields[3]), "available_count": int(fields[4])}) + "resource_name": fields[2], "used_count": int(fields[3]), + "available_count": int(fields[4])}) if current_table == 3: # content of the third table, table resources fields = line.split() if len(fields) == 4: result["table_resources"].append({"table_id": fields[0], "resource_name": fields[1], - "used_count": int(fields[2]), "available_count": int(fields[3])}) + "used_count": int(fields[2]), "available_count": int(fields[3])}) return result + + def get_pmon_daemon_list(self): + """ + @summary: in 201811 use different way to get the pmon daemon list since + config file (/usr/share/sonic/device/{platform}/{hwsku}/pmon_daemon_control.json) is not available. + Check the availability of two plugins led_control.py and sfputil.py, they are for ledd and xcvrd. + If one of them does not exist, then the related daemon is not expected to be running on this platform. + """ + daemon_list = [] + + led_plugin_path = os.path.join('/usr/share/sonic/device', self.facts["platform"], 'plugins/led_control.py') + sfp_plugin_path = os.path.join('/usr/share/sonic/device', self.facts["platform"], 'plugins/sfputil.py') + + if os.path.isfile(led_plugin_path): + daemon_list.append('ledd') + if os.path.isfile(sfp_plugin_path): + daemon_list.append('xcvrd') + + logging.info("Pmon daemon list for this platform is %s" % str(daemon_list)) + return daemon_list diff --git a/tests/common/mellanox_data.py b/tests/common/mellanox_data.py index 7ef9aa424bd..65b25cabe9a 100644 --- a/tests/common/mellanox_data.py +++ b/tests/common/mellanox_data.py @@ -17,6 +17,15 @@ "psus": { "number": 2, "hot_swappable": True + }, + "cpu_pack": { + "number": 1 + }, + "cpu_cores": { + "number": 2 + }, + "ports": { + "number": 32 } }, "ACS-MSN2740": { @@ -32,6 +41,15 @@ "psus": { "number": 2, "hot_swappable": True + }, + "cpu_pack": { + "number": 0 + }, + "cpu_cores": { + "number": 4 + }, + "ports": { + "number": 32 } }, "ACS-MSN2410": { @@ -47,6 +65,15 @@ "psus": { "number": 2, "hot_swappable": True + }, + "cpu_pack": { + "number": 1 + }, + "cpu_cores": { + "number": 2 + }, + "ports": { + "number": 56 } }, "ACS-MSN2010": { @@ -62,6 +89,15 @@ "psus": { "number": 2, "hot_swappable": False + }, + "cpu_pack": { + "number": 0 + }, + "cpu_cores": { + "number": 4 + }, + "ports": { + "number": 22 } }, "ACS-MSN2100": { @@ -77,6 +113,15 @@ "psus": { "number": 2, "hot_swappable": False + }, + "cpu_pack": { + "number": 0 + }, + "cpu_cores": { + "number": 4 + }, + "ports": { + "number": 16 } }, "ACS-MSN3800": { @@ -92,6 +137,15 @@ "psus": { "number": 2, "hot_swappable": True + }, + "cpu_pack": { + "number": 1 + }, + "cpu_cores": { + "number": 4 + }, + "ports": { + "number": 64 } }, "ACS-MSN3700": { @@ -107,6 +161,15 @@ "psus": { "number": 2, "hot_swappable": True + }, + "cpu_pack": { + "number": 1 + }, + "cpu_cores": { + "number": 4 + }, + "ports": { + "number": 32 } }, "ACS-MSN3700C": { @@ -122,21 +185,15 @@ "psus": { "number": 2, "hot_swappable": True - } - }, - "ACS-MSN3510": { - "reboot": { - "cold_reboot": True, - "fast_reboot": True, - "warm_reboot": False }, - "fans": { - "number": 6, - "hot_swappable": True + "cpu_pack": { + "number": 1 }, - "psus": { - "number": 2, - "hot_swappable": True + "cpu_cores": { + "number": 2 + }, + "ports": { + "number": 32 } } } diff --git a/tests/conftest.py b/tests/conftest.py index 2701fdf06fd..7288a74b823 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,14 +7,10 @@ import ipaddr as ipaddress from ansible_host import AnsibleHost +from loganalyzer import LogAnalyzer pytest_plugins = ('ptf_fixtures', 'ansible_fixtures') -# Add the tests folder to sys.path, for importing the lib package -_current_file_dir = os.path.dirname(os.path.realpath(__file__)) -if _current_file_dir not in sys.path: - sys.path.append(current_file_dir) - class TestbedInfo(object): """ @@ -35,7 +31,6 @@ def __init__(self, testbed_file): name = '' for key in line: if ('uniq-name' in key or 'conf-name' in key) and '#' in line[key]: - ### skip comment line continue elif 'uniq-name' in key or 'conf-name' in key: name = line[key] @@ -52,6 +47,8 @@ def __init__(self, testbed_file): def pytest_addoption(parser): parser.addoption("--testbed", action="store", default=None, help="testbed name") parser.addoption("--testbed_file", action="store", default=None, help="testbed file name") + parser.addoption("--disable_loganalyzer", action="store_true", default=False, + help="disable loganalyzer analysis for 'loganalyzer' fixture") @pytest.fixture(scope="session") @@ -78,11 +75,12 @@ def testbed_devices(ansible_adhoc, testbed): @param testbed: Fixture for parsing testbed configuration file. @return: Return the created device objects in a dictionary """ - from common.devices import SonicHost, Localhost + from common.devices import SonicHost, Localhost, PTFHost + + devices = { + "localhost": Localhost(ansible_adhoc), + "dut": SonicHost(ansible_adhoc, testbed["dut"], gather_facts=True)} - devices = {} - devices["localhost"] = Localhost(ansible_adhoc) - devices["dut"] = SonicHost(ansible_adhoc, testbed["dut"], gather_facts=True) if "ptf" in testbed: devices["ptf"] = PTFHost(ansible_adhoc, testbed["ptf"]) @@ -121,3 +119,21 @@ def eos(): with open('eos/eos.yml') as stream: eos = yaml.safe_load(stream) return eos + + +@pytest.fixture(autouse=True) +def loganalyzer(duthost, request): + loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix=request.node.name) + # Add start marker into DUT syslog + marker = loganalyzer.init() + yield loganalyzer + if not request.config.getoption("--disable_loganalyzer") and "disable_loganalyzer" not in request.keywords: + # Read existed common regular expressions located with legacy loganalyzer module + loganalyzer.load_common_config() + # Parse syslog and process result. Raise "LogAnalyzerError" exception if: total match or expected missing + # match is not equal to zero + loganalyzer.analyze(marker) + else: + # Add end marker into DUT syslog + loganalyzer._add_end_marker(marker) + diff --git a/tests/loganalyzer/README.md b/tests/loganalyzer/README.md new file mode 100644 index 00000000000..4b6bfe7d9f4 --- /dev/null +++ b/tests/loganalyzer/README.md @@ -0,0 +1,135 @@ +#### Loganalyzer API usage example + +Below is described possibility of loganalyzer fixture/module usage. + +##### Loganalyzer fixture +In the root conftest there is implemented "loganalyzer" pytest fixture, which starts automatically for all test cases. +Fixture main flow: +- loganalyzer will add start marker before test case start +- loganalyzer will add stop marker after test case finish +- if loganalyzer analysis is not disabled for current test case it will analyze DUT syslog and display results. +If loganalyzer find specified messages which corresponds to defined regular expressions, it will display found messages and pytest will generate 'error'. + +#### To skip loganalyzer analysis for: +- all test cases - use pytest command line option ```--disable_loganalyzer``` +- specific test case: mark test case with ```@pytest.mark.disable_loganalyzer``` decorator. Example is shown below. + + +#### Notes: +loganalyzer.init() - can be called several times without calling "loganalyzer.analyze(marker)" between calls. Each call return its unique marker, which is used for "analyze" phase - loganalyzer.analyze(marker). + + +### Loganalyzer usage example + +#### Example calling loganalyzer init/analyze methods automatically by using with statement +```python + # Read existed common regular expressions located with legacy loganalyzer module + loganalyzer.load_common_config() + # Analyze syslog for code executed inside with statement + with loganalyzer as analyzer: + logging.debug("============== Test steps ===================") + # Add test code here ... + time.sleep(1) + + # Separately analyze syslog for code executed inside each with statement + with loganalyzer as analyzer: + # Clear current regexp match list if there is a need to have clear configuration + loganalyzer.match_regex = [] + # Load regular expressions from the specified file + reg_exp = loganalyzer.parse_regexp_file(src=COMMON_MATCH) + # Extend currently configured match criteria (regular expressions) with data read from "COMMON_MATCH" file + loganalyzer.match_regex.extend(reg_exp) + # Add test code here ... + # Here will be executed syslog analysis on context manager __exit__ + time.sleep(1) + with loganalyzer as analyzer: + # Clear current regexp match list if there is a need to have clear configuration + loganalyzer.match_regex = [] + # Set match criteria (regular expression) to custom regexp - "test:.*Error" + loganalyzer.match_regex.extend(["test:.*Error"]) + # Add test code here ... + # Here will be executed syslog analysis on context manager __exit__ + time.sleep(1) + with loganalyzer as analyzer: + # Add test code here ... + # Here will be executed syslog analysis on context manager __exit__ + time.sleep(1) +``` + +#### Example calling loganalyzer init/analyze methods directly in test case +```python + # Example 1 + # Read existed common regular expressions located with legacy loganalyzer module + loganalyzer.load_common_config() + # Add start marker to the DUT syslog + marker = loganalyzer.init() + # PERFORM TEST CASE STEPS ... + # Verify that error messages were not found in DUT syslog. Exception will be raised if in DUT syslog will be found messages which fits regexp defined in COMMON_MATCH + loganalyzer.analyze(marker) + + # Example 2 + # Read existed common regular expressions located with legacy loganalyzer module + loganalyzer.load_common_config() + # Add start marker to the DUT syslog + marker = loganalyzer.init() + # PERFORM TEST CASE STEPS ... + # Get summary of analyzed DUT syslog + result = loganalyzer.analyze(marker, fail=False) + # Verify that specific amount of error messages found in syslog # Negative test case + assert result["total"]["match"] == 2, "Not found expected errors: {}".format(result) + + # Example 3 + # Download extracted syslog file from DUT to the local host + loganalyzer.save_extracted_log(dest="/tmp/log/syslog") + + # Example 4 + # Update previously configured marker + # Now start marker will have new prefix - test_bgp + loganalyzer.update_marker_prefix("test_bgp") + + def get_platform_info(dut): + """ + Example callback which gets DUT platform information and returns obtained string + """ + return dut.command("show platform summary") + + # Example 5 + # Execute specific function and analyze logs during function execution + run_cmd_result = loganalyzer.run_cmd(get_platform_info, ans_host) + # Process result of "get_platform_info" callback + assert all(item in run_cmd_result["stdout"] for item in ["Platform", "HwSKU", "ASIC"]) is True, "Unexpected output returned after command execution: {}".format(run_cmd_result) + + # Example 6 + # Clear current regexp match list + loganalyzer.match_regex = [] + # Load regular expressions from the specified file defined in COMMON_MATCH variable + reg_exp = loganalyzer.parse_regexp_file(src=COMMON_MATCH) + # Extend currently configured match criteria (regular expressions) with data read from "COMMON_MATCH" file + loganalyzer.match_regex.extend(reg_exp) + marker = loganalyzer.init() + # PERFORM TEST CASE STEPS ... + # Verify that error messages were not found in DUT syslog. Exception will be raised if in DUT syslog will be found messages which fits regexp defined in COMMON_MATCH + loganalyzer.analyze(marker) + + # Example 7 + loganalyzer.expect_regex = [] + # Add specific EXPECTED regular expression + # Means that in the DUT syslog loganalyzer will search for message which matches with "kernel:.*Oops" regular expression + # If such message will not be present in DUT syslog, it will raise exception + loganalyzer.expect_regex.append("kernel:.*Oops") + # Add start marker to the DUT syslog + marker = loganalyzer.init() + # PERFORM TEST CASE STEPS ... + # Verify that expected error messages WERE FOUND in DUT syslog. Exception will be raised if in DUT syslog will NOT be found messages which fits to "kernel:.*Oops" regular expression + loganalyzer.analyze(marker) + + # Example 8 + loganalyzer.expect_regex = [] + # Add specific EXPECTED regular expression + # Means that in the DUT syslog loganalyzer will search for message which matches with "kernel:.*Oops" regular expression + # If such message will not be present in DUT syslog, it will raise exception + loganalyzer.expect_regex.append("kernel:.*Oops") + # PERFORM TEST CASE STEPS ... + # Verify that expected error messages WERE FOUND in DUT syslog. Exception will be raised if in DUT syslog will NOT be found messages which fits to "kernel:.*Oops" regular expression + loganalyzer.run_cmd(ans_host.command, "echo '---------- kernel: says Oops --------------' >> /var/log/syslog") +``` diff --git a/tests/loganalyzer/__init__.py b/tests/loganalyzer/__init__.py new file mode 100644 index 00000000000..e32a3567489 --- /dev/null +++ b/tests/loganalyzer/__init__.py @@ -0,0 +1 @@ +from .loganalyzer import LogAnalyzer, COMMON_MATCH, COMMON_IGNORE, COMMON_EXPECT, LogAnalyzerError diff --git a/tests/loganalyzer/loganalyzer.py b/tests/loganalyzer/loganalyzer.py new file mode 100644 index 00000000000..85d31fbde75 --- /dev/null +++ b/tests/loganalyzer/loganalyzer.py @@ -0,0 +1,232 @@ +import sys +import logging +import os +import re +import time +import pprint +import system_msg_handler + +from system_msg_handler import AnsibleLogAnalyzer as ansible_loganalyzer +from os.path import join, split +from os.path import normpath + +ANSIBLE_LOGANALYZER_MODULE = system_msg_handler.__file__.replace(r".pyc", ".py") +COMMON_MATCH = join(split(__file__)[0], "loganalyzer_common_match.txt") +COMMON_IGNORE = join(split(__file__)[0], "loganalyzer_common_ignore.txt") +COMMON_EXPECT = join(split(__file__)[0], "loganalyzer_common_expect.txt") +SYSLOG_TMP_FOLDER = "/tmp/pytest-run/syslog" + + +class LogAnalyzerError(Exception): + """Raised when loganalyzer found matches during analysis phase.""" + def __repr__(self): + return pprint.pformat(self.message) + + +class LogAnalyzer: + def __init__(self, ansible_host, marker_prefix, dut_run_dir="/tmp"): + self.ansible_host = ansible_host + self.dut_run_dir = dut_run_dir + self.extracted_syslog = os.path.join(self.dut_run_dir, "syslog") + self.marker_prefix = marker_prefix + self.ansible_loganalyzer = ansible_loganalyzer(self.marker_prefix, False) + + self.match_regex = [] + self.expect_regex = [] + self.ignore_regex = [] + self._markers = [] + + def _add_end_marker(self, marker): + """ + @summary: Add stop marker into syslog on the DUT. + + @return: True for successfull execution False otherwise + """ + self.ansible_host.copy(src=ANSIBLE_LOGANALYZER_MODULE, dest=os.path.join(self.dut_run_dir, "loganalyzer.py")) + + cmd = "python {run_dir}/loganalyzer.py --action add_end_marker --run_id {marker}".format(run_dir=self.dut_run_dir, marker=marker) + + logging.debug("Adding end marker '{}'".format(marker)) + self.ansible_host.command(cmd) + + def __enter__(self): + """ + Store start markers which are used in analyze phase. + """ + self._markers.append(self.init()) + + def __exit__(self, *args): + """ + Analyze syslog messages. + """ + self.analyze(self._markers.pop()) + + def _verify_log(self, result): + """ + Verify that total match and expected missing match equals to zero or raise exception otherwise. + Verify that expected_match is not equal to zero when there is configured expected regexp in self.expect_regex list + """ + if not result: + raise LogAnalyzerError("Log analyzer failed - no result.") + if result["total"]["match"] != 0 or result["total"]["expected_missing_match"] != 0: + raise LogAnalyzerError(result) + + # Check for negative case + if self.expect_regex and result["total"]["expected_match"] == 0: + raise LogAnalyzerError(result) + + def update_marker_prefix(self, marker_prefix): + """ + @summary: Update configured marker prefix + """ + self.marker_prefix = marker_prefix + + def load_common_config(self): + """ + @summary: Load regular expressions from common files, which are localted in folder with legacy loganalyzer. + Loaded regular expressions are used by "analyze" method to match expected text in the downloaded log file. + """ + self.match_regex = self.ansible_loganalyzer.create_msg_regex([COMMON_MATCH])[1] + self.ignore_regex = self.ansible_loganalyzer.create_msg_regex([COMMON_IGNORE])[1] + self.expect_regex = self.ansible_loganalyzer.create_msg_regex([COMMON_EXPECT])[1] + + def parse_regexp_file(self, src): + """ + @summary: Get regular expressions defined in src file. + """ + return self.ansible_loganalyzer.create_msg_regex([src])[1] + + def run_cmd(self, callback, *args, **kwargs): + """ + @summary: Initialize loganalyzer, execute function and analyze syslog. + + @param callback: Python callable or function to be executed. + @param args: Input arguments for callback function. + @param kwargs: Input key value arguments for callback function. + + @return: Callback execution result + """ + marker = self.init() + try: + call_result = callback(*args, **kwargs) + except Exception as err: + logging.error("Error during callback execution:\n{}".format(err)) + logging.debug("Log analysis result\n".format(self.analyze(marker))) + raise err + self.analyze(marker) + + return call_result + + def init(self): + """ + @summary: Add start marker into syslog on the DUT. + + @return: True for successfull execution False otherwise + """ + logging.debug("Loganalyzer init") + + self.ansible_host.copy(src=ANSIBLE_LOGANALYZER_MODULE, dest=os.path.join(self.dut_run_dir, "loganalyzer.py")) + + start_marker = ".".join((self.marker_prefix, time.strftime("%Y-%m-%d-%H:%M:%S", time.gmtime()))) + cmd = "python {run_dir}/loganalyzer.py --action init --run_id {start_marker}".format(run_dir=self.dut_run_dir, start_marker=start_marker) + + logging.debug("Adding start marker '{}'".format(start_marker)) + self.ansible_host.command(cmd) + return start_marker + + def analyze(self, marker, fail=True): + """ + @summary: Extract syslog logs based on the start/stop markers and compose one file. Download composed file, analyze file based on defined regular expressions. + + @param marker: Marker obtained from "init" method. + @param fail: Flag to enable/disable raising exception when loganalyzer find error messages. + + @return: If "fail" is False - return dictionary of parsed syslog summary, if dictionary can't be parsed - return empty dictionary. If "fail" is True and if found match messages - raise exception. + """ + logging.debug("Loganalyzer analyze") + analyzer_summary = {"total": {"match": 0, "expected_match": 0, "expected_missing_match": 0}, + "match_files": {}, + "match_messages": {}, + "expect_messages": {}, + "unused_expected_regexp": [] + } + tmp_folder = ".".join((SYSLOG_TMP_FOLDER, time.strftime("%Y-%m-%d-%H:%M:%S", time.gmtime()))) + self.ansible_loganalyzer.run_id = marker + + # Add end marker into DUT syslog + self._add_end_marker(marker) + + try: + # Disable logrotate cron task + self.ansible_host.command("sed -i 's/^/#/g' /etc/cron.d/logrotate") + + logging.debug("Waiting for logrotate from previous cron task run to finish") + # Wait for logrotate from previous cron task run to finish + end = time.time() + 60 + while time.time() < end: + # Verify for exception because self.ansible_host automatically handle command return codes and raise exception for none zero code + try: + self.ansible_host.command("pgrep -f logrotate") + except Exception: + break + else: + time.sleep(5) + continue + else: + logging.error("Logrotate from previous task was not finished during 60 seconds") + + # On DUT extract syslog files from /var/log/ and create one file by location - /tmp/syslog + self.ansible_host.extract_log(directory='/var/log', file_prefix='syslog', start_string='start-LogAnalyzer-{}'.format(marker), target_filename=self.extracted_syslog) + finally: + # Enable logrotate cron task back + self.ansible_host.command("sed -i 's/^#//g' /etc/cron.d/logrotate") + + # Download extracted logs from the DUT to the temporal folder defined in SYSLOG_TMP_FOLDER + self.save_extracted_log(dest=tmp_folder) + + match_messages_regex = re.compile('|'.join(self.match_regex)) if len(self.match_regex) else None + ignore_messages_regex = re.compile('|'.join(self.ignore_regex)) if len(self.ignore_regex) else None + expect_messages_regex = re.compile('|'.join(self.expect_regex)) if len(self.expect_regex) else None + + analyzer_parse_result = self.ansible_loganalyzer.analyze_file_list([tmp_folder], match_messages_regex, ignore_messages_regex, expect_messages_regex) + # Print syslog file content and remove the file + with open(tmp_folder) as fo: + logging.debug("Syslog content:\n\n{}".format(fo.read())) + os.remove(tmp_folder) + + total_match_cnt = 0 + total_expect_cnt = 0 + expected_lines_total = [] + unused_regex_messages = [] + + for key, value in analyzer_parse_result.iteritems(): + matching_lines, expecting_lines = value + analyzer_summary["total"]["match"] += len(matching_lines) + analyzer_summary["total"]["expected_match"] += len(expecting_lines) + analyzer_summary["match_files"][key] = {"match": len(matching_lines), "expected_match": len(expecting_lines)} + analyzer_summary["match_messages"][key] = matching_lines + analyzer_summary["expect_messages"][key] = expecting_lines + expected_lines_total.extend(expecting_lines) + + # Find unused regex matches + for regex in self.expect_regex: + for line in expected_lines_total: + if re.search(regex, line): + break + else: + unused_regex_messages.append(regex) + analyzer_summary["total"]["expected_missing_match"] = len(unused_regex_messages) + analyzer_summary["unused_expected_regexp"] = unused_regex_messages + + if fail: + self._verify_log(analyzer_summary) + else: + return analyzer_summary + + def save_extracted_log(self, dest): + """ + @summary: Download extracted syslog log file to the ansible host. + + @param dest: File path to store downloaded log file. + """ + self.ansible_host.fetch(dest=dest, src=self.extracted_syslog, flat="yes") diff --git a/tests/loganalyzer/loganalyzer_common_expect.txt b/tests/loganalyzer/loganalyzer_common_expect.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/loganalyzer/loganalyzer_common_ignore.txt b/tests/loganalyzer/loganalyzer_common_ignore.txt new file mode 100644 index 00000000000..9f732b760a7 --- /dev/null +++ b/tests/loganalyzer/loganalyzer_common_ignore.txt @@ -0,0 +1,2 @@ +r, ".* ERR ntpd.*routing socket reports: No buffer space available.*" +r, ".* ERR snmp.*ERROR: MIBUpdater.*" diff --git a/tests/loganalyzer/loganalyzer_common_match.txt b/tests/loganalyzer/loganalyzer_common_match.txt new file mode 100644 index 00000000000..23c2870e4a9 --- /dev/null +++ b/tests/loganalyzer/loganalyzer_common_match.txt @@ -0,0 +1,6 @@ +r, "\.ERR", "\.WARN", "crash" +r, "kernel:.*Oops", "kernel:.*hung", "kernel.*oom\s" +r, "kernel:.*scheduling", "kernel:.*atomic", "kernel:.*panic" +r, "kernel:.*\serr", "kernel:.*allocation", "kernel:.*kill" +r, "kernel:.*kmemleak.*", "kernel:.* Err:" +s, "ERR" diff --git a/tests/loganalyzer/system_msg_handler.py b/tests/loganalyzer/system_msg_handler.py new file mode 100644 index 00000000000..0d963610681 --- /dev/null +++ b/tests/loganalyzer/system_msg_handler.py @@ -0,0 +1,665 @@ +''' +Owner: Hrachya Mughnetsyan + +Created on: 11/11/2016 + +Description: This file contains the log analyzer functionality in order + to verify no failures are detected in the system logs while + it can be that traffic/functionality works. + + Design is available in https://github.com/Azure/SONiC/wiki/LogAnalyzer + +Usage: Examples of how to use log analyzer + sudo python loganalyzer.py --out_dir /home/hrachya/projects/loganalyzer/log.analyzer.results --action analyze --run_id myTest114 --logs file3.log -m /home/hrachya/projects/loganalyzer/match.file.1.log,/home/hrachya/projects/loganalyzer/match.file.2.log -i ignore.file.1.log,ignore.file.2.log -v +''' + +#--------------------------------------------------------------------- +# Global imports +#--------------------------------------------------------------------- +import sys +import getopt +import re +import csv +import pprint +import logging +import logging.handlers +from __builtin__ import True + +#--------------------------------------------------------------------- +# Global variables +#--------------------------------------------------------------------- +tokenizer = ',' +comment_key = '#' +system_log_file = '/var/log/syslog' + +#-- List of ERROR codes to be returned by AnsibleLogAnalyzer +err_duplicate_start_marker = -1 +err_duplicate_end_marker = -2 +err_no_end_marker = -3 +err_no_start_marker = -4 +err_invalid_string_format = -5 +err_invalid_input = -6 + +class AnsibleLogAnalyzer: + ''' + @summary: Overview of functionality + + This class performs analysis of the log files, searching for concerning messages. + The definition of concerning messages is passed to analyze_file_list() method, + as a list of regular expressions. + Additionally there will be a list of regular expressions which we wish to ignore. + Any line in log file which will match to the set of matching regex expressions + AND will not match set of 'ignore' regex expressions, will be considered a + 'match' and will be reported. + + AnsibleLogAnalyzer will be called initially before any test has ran, and will be + instructed to place 'start' marker into all log files to be analyzed. + When tests have ran, AnsibleLogAnalyzer will be instructed to place end-marker + into the log files. After this, AnsibleLogAnalyzer will be invoked to perform the + analysis of logs. The analysis will be performed on specified log files. + For each log file only the content between start/end markers will be analyzed. + + For details see comments on analyze_file_list method. + ''' + + ''' + Prefixes used to build start and end markers. + The prefixes will be combined with a unique string, called run_id, passed by + the caller, to produce start/end markers for given analysis run. + ''' + + start_marker_prefix = "start-LogAnalyzer" + end_marker_prefix = "end-LogAnalyzer" + + def init_sys_logger(self): + logger = logging.getLogger('LogAnalyzer') + logger.setLevel(logging.DEBUG) + handler = logging.handlers.SysLogHandler(address = '/dev/log') + logger.addHandler(handler) + return logger + #--------------------------------------------------------------------- + + def __init__(self, run_id, verbose): + self.run_id = run_id + self.verbose = verbose + #--------------------------------------------------------------------- + + def print_diagnostic_message(self, message): + if (not self.verbose): + return + + print '[LogAnalyzer][diagnostic]:%s' % message + #--------------------------------------------------------------------- + + def create_start_marker(self): + return self.start_marker_prefix + "-" + self.run_id + + #--------------------------------------------------------------------- + + def is_filename_stdin(self, file_name): + return file_name == "-" + + #--------------------------------------------------------------------- + + def create_end_marker(self): + return self.end_marker_prefix + "-" + self.run_id + #--------------------------------------------------------------------- + + def place_marker_to_file(self, log_file, marker): + ''' + @summary: Place marker into each log file specified. + @param log_file : File path, to be applied with marker. + @param marker: Marker to be placed into log files. + ''' + if not len(log_file) or self.is_filename_stdin(log_file): + self.print_diagnostic_message('Log file {} not found. Skip adding marker.'.format(log_file)) + self.print_diagnostic_message('log file:{}, place marker {}'.format(log_file, marker)) + with open(log_file, 'a') as file: + file.write(marker) + file.write('\n') + file.flush() + + def place_marker_to_syslog(self, marker): + ''' + @summary: Place marker into '/dev/log'. + @param marker: Marker to be placed into syslog. + ''' + + syslogger = self.init_sys_logger() + syslogger.info(marker) + syslogger.info('\n') + + def place_marker(self, log_file_list, marker): + ''' + @summary: Place marker into '/dev/log' and each log file specified. + @param log_file_list : List of file paths, to be applied with marker. + @param marker: Marker to be placed into log files. + ''' + + for log_file in log_file_list: + self.place_marker_to_file(log_file, marker) + + self.place_marker_to_syslog(marker) + + return + #--------------------------------------------------------------------- + + def error_to_regx(self, error_string): + ''' + This method converts a (list of) strings to one regular expression. + + @summary: Meta characters are escaped by inserting a '\' beforehand + Digits are replaced with the arbitrary '\d+' code + A list is converted into an alteration statement (|) + + @param error_string: the string(s) to be converted into a regular expression + + @return: A SINGLE regular expression string + ''' + + #-- Check if error_string is a string or a list --# + if (isinstance(error_string, basestring)): + original_string = error_string + #-- Escapes out of all the meta characters --# + error_string = re.escape(error_string) + #-- Replaces a white space with the white space regular expression + error_string = re.sub(r"(\\\s+)+", "\\\\s+", error_string) + #-- Replaces a digit number with the digit regular expression + error_string = re.sub(r"\b\d+\b", "\\\\d+", error_string) + #-- Replaces a hex number with the hex regular expression + error_string = re.sub(r"0x[0-9a-fA-F]+", "0x[0-9a-fA-F]+", error_string) + self.print_diagnostic_message('Built error string: %s' % error_string) + + #-- If given a list, concatenate into one regx --# + else: + error_string = '|'.join(map(self.error_to_regx, error_string)) + + return error_string + #--------------------------------------------------------------------- + + def create_msg_regex(self, file_lsit): + ''' + @summary: This method reads input file containing list of regular expressions + to be matched against. + + @param file_list : List of file paths, contains search expressions. + + @return: A regex class instance, corresponding to loaded regex expressions. + Will be used for matching operations by callers. + ''' + messages_regex = [] + + if file_lsit is None or (0 == len(file_lsit)): + return None + + for filename in file_lsit: + self.print_diagnostic_message('processing match file:%s' % filename) + with open(filename, 'rb') as csvfile: + csvreader = csv.reader(csvfile, quotechar='"', delimiter=',', + skipinitialspace=True) + + for index, row in enumerate(csvreader): + row = [item for item in row if item != ""] + self.print_diagnostic_message('[diagnostic]:processing row:%d' % index) + self.print_diagnostic_message('row:%s'% row) + try: + #-- Ignore Empty Lines + if not row: + continue + #-- Ignore commented Lines + if row[0].startswith(comment_key): + self.print_diagnostic_message('[diagnostic]:skipping row[0]:%s' % row[0]) + continue + + #-- ('s' | 'r') = (Raw String | Regular Expression) + is_regex = row[0] + if ('s' == row[0]): + is_regex = False + elif ('r' == row[0]): + is_regex = True + else: + raise Exception('file:%s, malformed line:%d. ' + 'must be \'s\'(string) or \'r\'(regex)' + %(filename,index)) + + if (is_regex): + messages_regex.extend(row[1:]) + else: + messages_regex.append(self.error_to_regx(row[1:])) + + except Exception as e: + print 'ERROR: line %d is formatted incorrectly in file %s. Skipping line' % (index, filename) + print repr(e) + sys.exit(err_invalid_string_format) + + if (len(messages_regex)): + regex = re.compile('|'.join(messages_regex)) + else: + regex = None + return regex, messages_regex + #--------------------------------------------------------------------- + + def line_matches(self, str, match_messages_regex, ignore_messages_regex): + ''' + @summary: This method checks whether given string matches against the + set of regular expressions. + + @param str: string to match against 'match' and 'ignore' regex expressions. + A string which matched to the 'match' set will be reported. + A string which matches to 'match' set, but also matches to + 'ignore' set - will not be reported (will be ignored) + + @param match_messages_regex: + regex class instance containing messages to match against. + + @param ignore_messages_regex: + regex class instance containing messages to ignore match against. + + @return: True is str matches regex criteria, otherwise False. + ''' + + ret_code = False + + if ((match_messages_regex is not None) and (match_messages_regex.findall(str))): + if (ignore_messages_regex is None): + ret_code = True + + elif (not ignore_messages_regex.findall(str)): + self.print_diagnostic_message('matching line: %s' % str) + ret_code = True + + return ret_code + #--------------------------------------------------------------------- + + def line_is_expected(self, str, expect_messages_regex): + ''' + @summary: This method checks whether given string matches against the + set of "expected" regular expressions. + ''' + + ret_code = False + if (expect_messages_regex is not None) and (expect_messages_regex.findall(str)): + ret_code = True + + return ret_code + + def analyze_file(self, log_file_path, match_messages_regex, ignore_messages_regex, expect_messages_regex): + ''' + @summary: Analyze input file content for messages matching input regex + expressions. See line_matches() for details on matching criteria. + + @param log_file_path: Patch to the log file. + + @param match_messages_regex: + regex class instance containing messages to match against. + + @param ignore_messages_regex: + regex class instance containing messages to ignore match against. + + @param expect_messages_regex: + regex class instance containing messages that are expected to appear in logfile. + + @param end_marker_regex - end marker + + @return: List of strings match search criteria. + ''' + + + self.print_diagnostic_message('analyzing file: %s'% log_file_path) + + #-- indicates whether log analyzer currently is in the log range between start + #-- and end marker. see analyze_file method. + in_analysis_range = False + stdin_as_input = self.is_filename_stdin(log_file_path) + matching_lines = [] + expected_lines = [] + found_start_marker = False + found_end_marker = False + if stdin_as_input: + log_file = sys.stdin + else: + log_file = open(log_file_path, 'r') + + start_marker = self.create_start_marker() + end_marker = self.create_end_marker() + + for rev_line in reversed(log_file.readlines()): + if stdin_as_input: + in_analysis_range = True + else: + if rev_line.find(end_marker) != -1: + self.print_diagnostic_message('found end marker: %s' % end_marker) + if (found_end_marker): + print 'ERROR: duplicate end marker found' + sys.exit(err_duplicate_end_marker) + found_end_marker = True + in_analysis_range = True + continue + + if not stdin_as_input: + if rev_line.find(start_marker) != -1 and 'nsible' not in rev_line: + self.print_diagnostic_message('found start marker: %s' % start_marker) + if (found_start_marker): + print 'ERROR: duplicate start marker found' + sys.exit(err_duplicate_start_marker) + found_start_marker = True + + if(not in_analysis_range): + print 'ERROR: found start marker:%s without corresponding end marker' % rev_line + sys.exit(err_no_end_marker) + in_analysis_range = False + break + + if in_analysis_range : + if self.line_is_expected(rev_line, expect_messages_regex): + expected_lines.append(rev_line) + + elif self.line_matches(rev_line, match_messages_regex, ignore_messages_regex): + matching_lines.append(rev_line) + + # care about the markers only if input is not stdin + if not stdin_as_input: + if (not found_start_marker): + print 'ERROR: start marker was not found' + sys.exit(err_no_start_marker) + + if (not found_end_marker): + print 'ERROR: end marker was not found' + sys.exit(err_no_end_marker) + + return matching_lines, expected_lines + #--------------------------------------------------------------------- + + def analyze_file_list(self, log_file_list, match_messages_regex, ignore_messages_regex, expect_messages_regex): + ''' + @summary: Analyze input files messages matching input regex expressions. + See line_matches() for details on matching criteria. + + @param log_file_list: List of paths to the log files. + + @param match_messages_regex: + regex class instance containing messages to match against. + + @param ignore_messages_regex: + regex class instance containing messages to ignore match against. + + @param expect_messages_regex: + regex class instance containing messages that are expected to appear in logfile. + + @return: Returns map + ''' + res = {} + + for log_file in log_file_list: + if not len(log_file): + continue + match_strings, expect_strings = self.analyze_file(log_file, match_messages_regex, ignore_messages_regex, expect_messages_regex) + + match_strings.reverse() + expect_strings.reverse() + res[log_file] = [ match_strings, expect_strings ] + + return res + #--------------------------------------------------------------------- + +def usage(): + print 'loganalyzer input parameters:' + print '--help Print usage' + print '--verbose Print verbose output during the run' + print '--action init|analyze - action to perform.' + print ' init - initialize analysis by placing start-marker' + print ' to all log files specified in --logs parameter.' + print ' analyze - perform log analysis of files specified in --logs parameter.' + print ' add_end_marker - add end marker to all log files specified in --logs parameter.' + print '--out_dir path Directory path where to place output files, ' + print ' must be present when --action == analyze' + print '--logs path{,path} List of full paths to log files to be analyzed.' + print ' Implicitly system log file will be also processed' + print '--run_id string String passed to loganalyzer, uniquely identifying ' + print ' analysis session. Used to construct start/end markers. ' + print '--match_files_in path{,path} List of paths to files containing strings. A string from log file' + print ' By default syslog will be always analyzed and should be passed by match_files_in.' + print ' matching any string from match_files_in will be collected and ' + print ' reported. Must be present when action == analyze' + print '--ignore_files_in path{,path} List of paths to files containing string. ' + print ' A string from log file matching any string from these' + print ' files will be ignored during analysis. Must be present' + print ' when action == analyze.' + print '--expect_files_in path{,path} List of path to files containing string. ' + print ' All the strings from these files will be expected to present' + print ' in one of specified log files during the analysis. Must be present' + print ' when action == analyze.' + +#--------------------------------------------------------------------- + +def check_action(action, log_files_in, out_dir, match_files_in, ignore_files_in, expect_files_in): + ''' + @summary: This function validates command line parameter 'action' and + other related parameters. + + @return: True if input is correct + ''' + + ret_code = True + + if (action == 'init'): + ret_code = True + elif (action == 'add_end_marker'): + ret_code = True + elif (action == 'analyze'): + if out_dir is None or len(out_dir) == 0: + print 'ERROR: missing required out_dir for analyze action' + ret_code = False + + elif match_files_in is None or len(match_files_in) == 0: + print 'ERROR: missing required match_files_in for analyze action' + ret_code = False + + + else: + ret_code = False + print 'ERROR: invalid action:%s specified' % action + + return ret_code +#--------------------------------------------------------------------- + +def check_run_id(run_id): + ''' + @summary: Validate command line parameter 'run_id' + + @param run_id: Unique string identifying current run + + @return: True if input is correct + ''' + + ret_code = True + + if ((run_id is None) or (len(run_id) == 0)): + print 'ERROR: no run_id specified' + ret_code = False + + return ret_code +#--------------------------------------------------------------------- + +def write_result_file(run_id, out_dir, analysis_result_per_file, messages_regex_e, unused_regex_messages): + ''' + @summary: Write results of analysis into a file. + + @param run_id: Uinique string identifying current run + + @param out_dir: Full path to output directory where to place the result file. + + @param analysis_result_per_file: map file_name: [list of found matching strings] + + @return: void + ''' + + match_cnt = 0 + expected_cnt = 0 + expected_lines_total = [] + + with open(out_dir + "/result.loganalysis." + run_id + ".log", 'w') as out_file: + for key, val in analysis_result_per_file.iteritems(): + matching_lines, expected_lines = val + + out_file.write("\n-----------Matches found in file:'%s'-----------\n" % key) + for s in matching_lines: + out_file.write(s) + out_file.write('\nMatches:%d\n' % len(matching_lines)) + match_cnt += len(matching_lines) + + out_file.write("\n-------------------------------------------------\n\n") + + for i in expected_lines: + out_file.write(i) + expected_lines_total.append(i) + out_file.write('\nExpected and found matches:%d\n' % len(expected_lines)) + expected_cnt += len(expected_lines) + + out_file.write("\n-------------------------------------------------\n\n") + out_file.write('Total matches:%d\n' % match_cnt) + # Find unused regex matches + for regex in messages_regex_e: + for line in expected_lines_total: + if re.search(regex, line): + break + else: + unused_regex_messages.append(regex) + + out_file.write('Total expected and found matches:%d\n' % expected_cnt) + out_file.write('Total expected but not found matches: %d\n\n' % len(unused_regex_messages)) + for regex in unused_regex_messages: + out_file.write(regex + "\n") + + out_file.write("\n-------------------------------------------------\n\n") + out_file.flush() +#--------------------------------------------------------------------- + +def write_summary_file(run_id, out_dir, analysis_result_per_file, unused_regex_messages): + ''' + @summary: This function writes results summary into a file + + @param run_id: Unique string identifying current run + + @param out_dir: Output directory full path. + + @param analysis_result_per_file: map file_name:[list of matching strings] + + @return: void + ''' + + out_file = open(out_dir + "/summary.loganalysis." + run_id + ".log", 'w') + out_file.write("\nLOG ANALYSIS SUMMARY\n") + total_match_cnt = 0 + total_expect_cnt = 0 + for key, val in analysis_result_per_file.iteritems(): + matching_lines, expecting_lines = val + + file_match_cnt = len(matching_lines) + file_expect_cnt = len(expecting_lines) + out_file.write("FILE: %s MATCHES %d\n" % (key, file_match_cnt)) + out_file.write("FILE: %s EXPECTED MATCHES %d\n" % (key, file_expect_cnt)) + out_file.flush() + total_match_cnt += file_match_cnt + total_expect_cnt += file_expect_cnt + + out_file.write("-----------------------------------\n") + out_file.write("TOTAL MATCHES: %d\n" % total_match_cnt) + out_file.write("TOTAL EXPECTED MATCHES: %d\n" % total_expect_cnt) + out_file.write("TOTAL EXPECTED MISSING MATCHES: %d\n" % len(unused_regex_messages)) + out_file.write("-----------------------------------\n") + out_file.flush() + out_file.close() +#--------------------------------------------------------------------- + +def main(argv): + + action = None + run_id = None + log_files_in = "" + out_dir = None + match_files_in = None + ignore_files_in = None + expect_files_in = None + verbose = False + + try: + opts, args = getopt.getopt(argv, "a:r:l:o:m:i:e:vh", ["action=", "run_id=", "logs=", "out_dir=", "match_files_in=", "ignore_files_in=", "expect_files_in=", "verbose", "help"]) + + except getopt.GetoptError: + print "Invalid option specified" + usage() + sys.exit(err_invalid_input) + + for opt, arg in opts: + if (opt in ("-h", "--help")): + usage() + sys.exit(err_invalid_input) + + if (opt in ("-a", "--action")): + action = arg + + elif (opt in ("-r", "--run_id")): + run_id = arg + + elif (opt in ("-l", "--logs")): + log_files_in = arg + + elif (opt in ("-o", "--out_dir")): + out_dir = arg + + elif (opt in ("-m", "--match_files_in")): + match_files_in = arg + + elif (opt in ("-i", "--ignore_files_in")): + ignore_files_in = arg + + elif (opt in ("-e", "--expect_files_in")): + expect_files_in = arg + + elif (opt in ("-v", "--verbose")): + verbose = True + + if not (check_action(action, log_files_in, out_dir, match_files_in, ignore_files_in, expect_files_in) and check_run_id(run_id)): + usage() + sys.exit(err_invalid_input) + + analyzer = AnsibleLogAnalyzer(run_id, verbose) + + log_file_list = filter(None, log_files_in.split(tokenizer)) + + result = {} + if (action == "init"): + analyzer.place_marker(log_file_list, analyzer.create_start_marker()) + return 0 + elif (action == "analyze"): + match_file_list = match_files_in.split(tokenizer) + ignore_file_list = ignore_files_in.split(tokenizer) + expect_file_list = expect_files_in.split(tokenizer) + + analyzer.place_marker(log_file_list, analyzer.create_end_marker()) + + match_messages_regex, messages_regex_m = analyzer.create_msg_regex(match_file_list) + ignore_messages_regex, messages_regex_i = analyzer.create_msg_regex(ignore_file_list) + expect_messages_regex, messages_regex_e = analyzer.create_msg_regex(expect_file_list) + + # if no log file specified - add system log + if not log_file_list: + log_file_list.append(system_log_file) + + result = analyzer.analyze_file_list(log_file_list, match_messages_regex, + ignore_messages_regex, expect_messages_regex) + unused_regex_messages = [] + write_result_file(run_id, out_dir, result, messages_regex_e, unused_regex_messages) + write_summary_file(run_id, out_dir, result, unused_regex_messages) + elif (action == "add_end_marker"): + analyzer.place_marker(log_file_list, analyzer.create_end_marker()) + return 0 + + else: + print 'Unknown action:%s specified' % action + return len(result) +#--------------------------------------------------------------------- + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/tests/platform/check_all_interface_info.py b/tests/platform/check_all_interface_info.py new file mode 100644 index 00000000000..ba7b5b78e8c --- /dev/null +++ b/tests/platform/check_all_interface_info.py @@ -0,0 +1,19 @@ +""" +Helper script for checking all related information of interfaces + +This script contains re-usable functions for checking status of interfaces on SONiC. +""" +import logging +from check_transceiver_status import all_transceivers_detected +from check_interface_status import check_interface_status + + +def check_interface_information(dut, interfaces): + if not all_transceivers_detected(dut, interfaces): + logging.info("Not all transceivers are detected") + return False + if not check_interface_status(dut, interfaces): + logging.info("Not all interfaces are up") + return False + + return True diff --git a/tests/platform/check_daemon_status.py b/tests/platform/check_daemon_status.py new file mode 100644 index 00000000000..ea9fd6d6f64 --- /dev/null +++ b/tests/platform/check_daemon_status.py @@ -0,0 +1,32 @@ +""" +Helper script for checking status of platform daemon status + +This script contains re-usable functions for checking status of platform daemon status. +""" +import logging + + +def check_pmon_daemon_status(dut): + """ + @summary: check daemon running status inside pmon docker. + + This function use command "supervisorctl status" inside the container and check the status from the command output. + If the daemon status is "RUNNING" then return True, if daemon not exist or status is not "RUNNING", return false. + """ + daemon_list = dut.get_pmon_daemon_list() + daemon_status = {} + try: + for daemon in daemon_list: + output = dut.shell('docker exec pmon supervisorctl status | grep %s' % daemon, module_ignore_errors=True) + if bool(output["stdout_lines"]): + expected_line = output["stdout_lines"][0] + expected_line_list = expected_line.split() + daemon_status[daemon] = (daemon in expected_line_list and 'RUNNING' in expected_line_list) + logging.debug("Daemon %s status is %s" % (daemon, str(daemon_status[daemon]))) + else: + logging.debug("Daemon %s does not exist" % daemon) + return False + return all(daemon_status.values()) + except Exception as e: + logging.error("Failed to get platform daemon status, exception: %s" % repr(e)) + return False diff --git a/tests/platform/check_interface_status.py b/tests/platform/check_interface_status.py index a2aa4a4c578..0de7b1691e4 100644 --- a/tests/platform/check_interface_status.py +++ b/tests/platform/check_interface_status.py @@ -35,23 +35,39 @@ def check_interface_status(dut, interfaces): """ @summary: Check the admin and oper status of the specified interfaces on DUT. @param dut: The AnsibleHost object of DUT. For interacting with DUT. - @param hostname: @param interfaces: List of interfaces that need to be checked. """ logging.info("Check interface status using cmd 'intfutil'") - mg_ports = dut.minigraph_facts(host=dut.hostname)["ansible_facts"]["minigraph_ports"] + mg_ports = dut.minigraph_facts(host=dut.hostname)["ansible_facts"]["minigraph_ports"] output = dut.command("intfutil description") intf_status = parse_intf_status(output["stdout_lines"][2:]) + check_intf_presence_command = 'show interface transceiver presence {}' for intf in interfaces: expected_oper = "up" if intf in mg_ports else "down" expected_admin = "up" if intf in mg_ports else "down" - assert intf in intf_status, "Missing status for interface %s" % intf - assert intf_status[intf]["oper"] == expected_oper, \ - "Oper status of interface %s is %s, expected '%s'" % (intf, intf_status[intf]["oper"], expected_oper) - assert intf_status[intf]["admin"] == expected_oper, \ - "Admin status of interface %s is %s, expected '%s'" % (intf, intf_status[intf]["admin"], expected_admin) + if intf not in intf_status: + logging.info("Missing status for interface %s" % intf) + return False + if intf_status[intf]["oper"] != expected_oper: + logging.info("Oper status of interface %s is %s, expected '%s'" % (intf, intf_status[intf]["oper"], + expected_oper)) + return False + if intf_status[intf]["admin"] != expected_admin: + logging.info("Admin status of interface %s is %s, expected '%s'" % (intf, intf_status[intf]["admin"], + expected_admin)) + return False + + # Cross check the interface SFP presence status + check_presence_output = dut.command(check_intf_presence_command.format(intf)) + presence_list = check_presence_output["stdout_lines"][2].split() + assert intf in presence_list, "Wrong interface name in the output: %s" % str(presence_list) + assert 'Present' in presence_list, "Status is not expected, presence status: %s" % str(presence_list) logging.info("Check interface status using the interface_facts module") intf_facts = dut.interface_facts(up_ports=mg_ports)["ansible_facts"] down_ports = intf_facts["ansible_interface_link_down_ports"] - assert len(down_ports) == 0, "Some interfaces are down: %s" % str(down_ports) + if len(down_ports) != 0: + logging.info("Some interfaces are down: %s" % str(down_ports)) + return False + + return True diff --git a/tests/platform/conftest.py b/tests/platform/conftest.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/platform/mellanox/check_sysfs.py b/tests/platform/mellanox/check_sysfs.py index 364e977dfa5..72e9f365d43 100644 --- a/tests/platform/mellanox/check_sysfs.py +++ b/tests/platform/mellanox/check_sysfs.py @@ -22,14 +22,15 @@ def check_sysfs_broken_symbolinks(dut): assert len(broken_symbolinks) == 0, \ "Found some broken symbolinks: %s" % str(broken_symbolinks) + def check_sysfs_thermal(dut): logging.info("Check thermal") file_asic = dut.command("cat /bsp/thermal/asic") try: asic_temp = float(file_asic["stdout"]) / 1000 - assert asic_temp > 0 and asic_temp < 85, "Abnormal ASIC temperature: %s" % file_asic["stdout"] - except: - assert False, "Bad content in /bsp/thermal/asic: %s" % file_asic["stdout"] + assert 0 < asic_temp < 85, "Abnormal ASIC temperature: %s" % file_asic["stdout"] + except Exception as e: + assert False, "Bad content in /bsp/thermal/asic: %s, exception: %s" % (file_asic["stdout"], repr(e)) def check_sysfs_fan(dut): @@ -38,14 +39,20 @@ def check_sysfs_fan(dut): from common.mellanox_data import SWITCH_MODELS fan_count = SWITCH_MODELS[dut.facts["hwsku"]]["fans"]["number"] - if SWITCH_MODELS[dut.facts["hwsku"]]["fans"]["hot_swappable"]: - fan_status_list = ["/bsp/module/fan%d_status" % fan_id for fan_id in range(1, fan_count + 1)] - for fan_status in fan_status_list: - fan_status_content = dut.command("cat %s" % fan_status) - assert fan_status_content["stdout"] == "1", "Content of %s is not 1" % fan_status - - fan_min_list = ["/bsp/fan/fan%d_min" % fan_id for fan_id in range(1, fan_count + 1)] - for fan_min in fan_min_list: + fan_speed = 0 + fan_min_speed = 0 + fan_max_speed = 0 + fan_set_speed = 0 + for fan_id in range(1, fan_count + 1): + if SWITCH_MODELS[dut.facts["hwsku"]]["fans"]["hot_swappable"]: + fan_status = "/bsp/module/fan{}_status".format(fan_id) + try: + fan_status_content = dut.command("cat %s" % fan_status) + assert fan_status_content["stdout"] == "1", "Content of %s is not 1" % fan_status + except Exception as e: + assert False, "Get content from %s failed, exception: %s" % (fan_status, repr(e)) + + fan_min = "/bsp/fan/fan{}_min".format(fan_id) try: fan_min_content = dut.command("cat %s" % fan_min) fan_min_speed = int(fan_min_content["stdout"]) @@ -53,8 +60,7 @@ def check_sysfs_fan(dut): except Exception as e: assert False, "Get content from %s failed, exception: %s" % (fan_min, repr(e)) - fan_max_list = ["/bsp/fan/fan%d_max" % fan_id for fan_id in range(1, fan_count + 1)] - for fan_max in fan_max_list: + fan_max = "/bsp/fan/fan{}_max".format(fan_id) try: fan_max_content = dut.command("cat %s" % fan_max) fan_max_speed = int(fan_max_content["stdout"]) @@ -62,8 +68,7 @@ def check_sysfs_fan(dut): except Exception as e: assert False, "Get content from %s failed, exception: %s" % (fan_max, repr(e)) - fan_speed_get_list = ["/bsp/fan/fan%d_speed_get" % fan_id for fan_id in range(1, fan_count + 1)] - for fan_speed_get in fan_speed_get_list: + fan_speed_get = "/bsp/fan/fan{}_speed_get".format(fan_id) try: fan_speed_get_content = dut.command("cat %s" % fan_speed_get) fan_speed = int(fan_speed_get_content["stdout"]) @@ -71,10 +76,90 @@ def check_sysfs_fan(dut): except Exception as e: assert False, "Get content from %s failed, exception: %s" % (fan_speed_get, repr(e)) - fan_speed_set_list = ["/bsp/fan/fan%d_speed_set" % fan_id for fan_id in range(1, fan_count + 1)] - for fan_speed_set in fan_speed_set_list: - fan_speed_set_content = dut.command("cat %s" % fan_speed_set) - assert fan_speed_set_content["stdout"] == "153", "Fan speed should be set to 60%, 153/255" + assert fan_min_speed < fan_speed < fan_max_speed, \ + "Fan speed out of range: min speed: %d, speed: %d, max speed: %d" \ + % (fan_min_speed, fan_speed, fan_max_speed) + + fan_speed_set = "/bsp/fan/fan{}_speed_set".format(fan_id) + try: + fan_speed_set_content = dut.command("cat %s" % fan_speed_set) + assert fan_speed_set_content["stdout"] == "153", "Fan speed should be set to 60%, 153/255" + fan_set_speed = int(fan_speed_set_content["stdout"]) + except Exception as e: + assert False, "Get content from %s failed, exception: %s" % (fan_speed_set, repr(e)) + + max_tolerance_speed = ((float(fan_set_speed) / 256) * fan_max_speed) * (1 + 0.3) + min_tolerance_speed = ((float(fan_set_speed) / 256) * fan_max_speed) * (1 - 0.3) + assert min_tolerance_speed < fan_speed < max_tolerance_speed, "Speed out of tolerance speed range (%d, %d)" \ + % (min_tolerance_speed, max_tolerance_speed) + + +def check_sysfs_cpu(dut): + logging.info("Check cpu") + from common.mellanox_data import SWITCH_MODELS + cpu_pack_count = SWITCH_MODELS[dut.facts["hwsku"]]["cpu_pack"]["number"] + if cpu_pack_count != 0: + cpu_pack_temp_file = "/bsp/thermal/cpu_pack" + cpu_pack_temp_file_output = dut.command("cat %s" % cpu_pack_temp_file) + cpu_pack_temp = float(cpu_pack_temp_file_output["stdout"])/1000 + + cpu_pack_max_temp_file = "/bsp/thermal/cpu_pack_max" + cpu_pack_max_temp_file_output = dut.command("cat %s" % cpu_pack_max_temp_file) + cpu_pack_max_temp = float(cpu_pack_max_temp_file_output["stdout"])/1000 + + cpu_pack_crit_temp_file = "/bsp/thermal/cpu_pack_crit" + cpu_pack_crit_temp_file_output = dut.command("cat %s" % cpu_pack_crit_temp_file) + cpu_pack_crit_temp = float(cpu_pack_crit_temp_file_output["stdout"])/1000 + + assert cpu_pack_max_temp <= cpu_pack_crit_temp, "Bad CPU pack max temp or critical temp, %s, %s " \ + % (str(cpu_pack_max_temp), str(cpu_pack_crit_temp)) + assert cpu_pack_temp < cpu_pack_max_temp, "CPU pack overheated, temp: %s" % (str(cpu_pack_temp)) + + cpu_core_count = SWITCH_MODELS[dut.facts["hwsku"]]["cpu_cores"]["number"] + for core_id in range(0, cpu_core_count): + cpu_core_temp_file = "/bsp/thermal/cpu_core{}".format(core_id) + cpu_core_temp_file_output = dut.command("cat %s" % cpu_core_temp_file) + cpu_core_temp = float(cpu_core_temp_file_output["stdout"])/1000 + + cpu_core_max_temp_file = "/bsp/thermal/cpu_core{}_max".format(core_id) + cpu_core_max_temp_file_output = dut.command("cat %s" % cpu_core_max_temp_file) + cpu_core_max_temp = float(cpu_core_max_temp_file_output["stdout"])/1000 + + cpu_core_crit_temp_file = "/bsp/thermal/cpu_core{}_crit".format(core_id) + cpu_core_crit_temp_file_output = dut.command("cat %s" % cpu_core_crit_temp_file) + cpu_core_crit_temp = float(cpu_core_crit_temp_file_output["stdout"])/1000 + + assert cpu_core_max_temp <= cpu_core_crit_temp, "Bad CPU core%d max temp or critical temp, %s, %s " \ + % (core_id, str(cpu_core_max_temp), str(cpu_core_crit_temp)) + assert cpu_core_temp < cpu_core_max_temp, "CPU core%d overheated, temp: %s" % (core_id, str(cpu_core_temp)) + + +def check_psu_status_sysfs_consistency(dut, psu_id, psu_state): + """ + @summary: Check psu related sysfs under /bsp/module against psu_state + """ + psu_exist = "/bsp/module/psu%s_status" % psu_id + if psu_state == "NOT PRESENT": + psu_exist_content = dut.command("cat %s" % psu_exist) + logging.info("PSU state %s file %s read %s" % (psu_state, psu_exist, psu_exist_content["stdout"])) + assert psu_exist_content["stdout"] == "0", "CLI returns NOT PRESENT while %s contains %s" % \ + (psu_exist, psu_exist_content["stdout"]) + else: + from common.mellanox_data import SWITCH_MODELS + dut_hwsku = dut.facts["hwsku"] + hot_swappabe = SWITCH_MODELS[dut_hwsku]["psus"]["hot_swappable"] + if hot_swappabe: + psu_exist_content = dut.command("cat %s" % psu_exist) + logging.info("PSU state %s file %s read %s" % (psu_state, psu_exist, psu_exist_content["stdout"])) + assert psu_exist_content["stdout"] == "1", "CLI returns %s while %s contains %s" % \ + (psu_state, psu_exist, psu_exist_content["stdout"]) + + psu_pwr_state = "/bsp/module/psu%s_pwr_status" % psu_id + psu_pwr_state_content = dut.command("cat %s" % psu_pwr_state) + logging.info("PSU state %s file %s read %s" % (psu_state, psu_pwr_state, psu_pwr_state_content["stdout"])) + assert (psu_pwr_state_content["stdout"] == "1" and psu_state == "OK") \ + or (psu_pwr_state_content["stdout"] == "0" and psu_state == "NOT OK"),\ + "sysfs content %s mismatches with psu_state %s" % (psu_pwr_state_content["stdout"], psu_state) def check_sysfs_psu(dut): @@ -83,11 +168,8 @@ def check_sysfs_psu(dut): from common.mellanox_data import SWITCH_MODELS psu_count = SWITCH_MODELS[dut.facts["hwsku"]]["psus"]["number"] - if SWITCH_MODELS[dut.facts["hwsku"]]["psus"]["hot_swappable"]: - psu_status_list = ["/bsp/module/psu%d_status" % psu_id for psu_id in range(1, psu_count + 1)] - for psu_status in psu_status_list: - psu_status_content = dut.command("cat %s" % psu_status) - assert psu_status_content["stdout"] == "1", "Content of %s is not 1" % psu_status + for psu_id in range(1, psu_count + 1): + check_psu_status_sysfs_consistency(dut, psu_id, 'OK') def check_sysfs_qsfp(dut, interfaces): @@ -112,6 +194,8 @@ def check_sysfs(dut, interfaces): check_sysfs_fan(dut) + check_sysfs_cpu(dut) + check_sysfs_psu(dut) check_sysfs_qsfp(dut, interfaces) diff --git a/tests/platform/mellanox/conftest.py b/tests/platform/mellanox/conftest.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/platform/mellanox/test_check_sfp_presence.py b/tests/platform/mellanox/test_check_sfp_presence.py new file mode 100644 index 00000000000..5ed2583a1f4 --- /dev/null +++ b/tests/platform/mellanox/test_check_sfp_presence.py @@ -0,0 +1,35 @@ +""" +Cross check show sfp presence with qsfp_status +""" +import logging +import os +import json + +from platform_fixtures import conn_graph_facts + + +def test_check_sfp_presence(testbed_devices, conn_graph_facts): + """This test case is to check SFP presence status with CLI and sysfs. + """ + ans_host = testbed_devices["dut"] + ports_config = json.loads(ans_host.command("sudo sonic-cfggen -d --var-json PORT")["stdout"]) + check_qsfp_sysfs_command = 'cat /bsp/qsfp/qsfp{}_status' + check_intf_presence_command = 'show interface transceiver presence {}' + + logging.info("Use show interface status information") + for intf in conn_graph_facts["device_conn"]: + intf_lanes = ports_config[intf]["lanes"] + sfp_id = int(intf_lanes.split(",")[0])/4 + 1 + + check_presence_output = ans_host.command(check_intf_presence_command.format(intf)) + assert check_presence_output["rc"] == 0, "Failed to read interface %s transceiver presence" % intf + logging.info(str(check_presence_output["stdout_lines"][2])) + presence_list = check_presence_output["stdout_lines"][2].split() + logging.info(str(presence_list)) + assert intf in presence_list, "Wrong interface name in the output %s" % str(presence_list) + assert 'Present' in presence_list, "Status is not expected, output %s" % str(presence_list) + + check_sysfs_output = ans_host.command(check_qsfp_sysfs_command.format(str(sfp_id))) + logging.info('output of check sysfs %s' % (str(check_sysfs_output))) + assert check_sysfs_output["rc"] == 0, "Failed to read qsfp_status of sfp%s." % str(sfp_id) + assert check_sysfs_output["stdout"] == '1', "Content of qsfp_status of sfp%s is not correct" % str(sfp_id) diff --git a/tests/platform/mellanox/test_check_sysfs.py b/tests/platform/mellanox/test_check_sysfs.py index 973d653f6c6..fb6dd6b6bbe 100644 --- a/tests/platform/mellanox/test_check_sysfs.py +++ b/tests/platform/mellanox/test_check_sysfs.py @@ -5,6 +5,7 @@ https://github.com/Azure/SONiC/blob/master/doc/pmon/sonic_platform_test_plan.md """ import logging +import os try: from platform_fixtures import conn_graph_facts diff --git a/tests/platform/platform_fixtures.py b/tests/platform/platform_fixtures.py index 0b73940db3f..8be133d60db 100644 --- a/tests/platform/platform_fixtures.py +++ b/tests/platform/platform_fixtures.py @@ -1,4 +1,5 @@ import pytest +import os @pytest.fixture(scope="module") def conn_graph_facts(testbed_devices): diff --git a/tests/platform/test_platform_info.py b/tests/platform/test_platform_info.py index 67eb65676a5..93c30ffd674 100644 --- a/tests/platform/test_platform_info.py +++ b/tests/platform/test_platform_info.py @@ -7,6 +7,8 @@ import logging import re import time +import os +import sys import pytest @@ -37,6 +39,24 @@ def test_show_platform_summary(testbed_devices): "Unexpected output fields, actual=%s, expected=%s" % (str(actual_fields), str(expected_fields)) +def check_vendor_specific_psustatus(dut, psu_status_line): + """ + @summary: Vendor specific psu status check + """ + if dut.facts["asic_type"] in ["mellanox"]: + current_file_dir = os.path.dirname(os.path.realpath(__file__)) + sub_folder_dir = os.path.join(current_file_dir, "mellanox") + if sub_folder_dir not in sys.path: + sys.path.append(sub_folder_dir) + from check_sysfs import check_psu_status_sysfs_consistency + + psu_line_pattern = re.compile(r"PSU\s+(\d)+\s+(OK|NOT OK|NOT PRESENT)") + psu_match = psu_line_pattern.match(psu_status_line) + psu_id = psu_match.group(1) + psu_status = psu_match.group(2) + + check_psu_status_sysfs_consistency(dut, psu_id, psu_status) + def test_show_platform_psustatus(testbed_devices): """ @summary: Check output of 'show platform psustatus' @@ -45,9 +65,10 @@ def test_show_platform_psustatus(testbed_devices): logging.info("Check PSU status using '%s', hostname: %s" % (CMD_PLATFORM_PSUSTATUS, ans_host.hostname)) psu_status = ans_host.command(CMD_PLATFORM_PSUSTATUS) - psu_line_pattern = re.compile(r"PSU\s+\d+\s+(OK|NOT OK)") + psu_line_pattern = re.compile(r"PSU\s+\d+\s+(OK|NOT OK|NOT PRESENT)") for line in psu_status["stdout_lines"][2:]: assert psu_line_pattern.match(line), "Unexpected PSU status output" + check_vendor_specific_psustatus(ans_host, line) def test_turn_on_off_psu_and_check_psustatus(testbed_devices, psu_controller): @@ -108,6 +129,7 @@ def test_turn_on_off_psu_and_check_psustatus(testbed_devices, psu_controller): fields = line.split() if fields[2] != "OK": psu_under_test = fields[1] + check_vendor_specific_psustatus(ans_host, line) assert psu_under_test is not None, "No PSU is turned off" logging.info("Turn on PSU %s" % str(psu["psu_id"])) @@ -120,6 +142,7 @@ def test_turn_on_off_psu_and_check_psustatus(testbed_devices, psu_controller): fields = line.split() if fields[1] == psu_under_test: assert fields[2] == "OK", "Unexpected PSU status after turned it on" + check_vendor_specific_psustatus(ans_host, line) psu_test_results[psu_under_test] = True diff --git a/tests/platform/test_reboot.py b/tests/platform/test_reboot.py index 8278b96c42a..01ee03534ae 100644 --- a/tests/platform/test_reboot.py +++ b/tests/platform/test_reboot.py @@ -13,60 +13,126 @@ import time import sys +from datetime import datetime + import pytest from platform_fixtures import conn_graph_facts +from psu_controller import psu_controller from common.utilities import wait_until from check_critical_services import check_critical_services -from check_interface_status import check_interface_status from check_transceiver_status import check_transceiver_basic -from check_transceiver_status import all_transceivers_detected +from check_daemon_status import check_pmon_daemon_status +from check_all_interface_info import check_interface_information + +pytestmark = [pytest.mark.disable_loganalyzer] + +REBOOT_TYPE_WARM = "warm" +REBOOT_TYPE_COLD = "cold" +REBOOT_TYPE_FAST = "fast" +REBOOT_TYPE_POWEROFF = "power off" + +reboot_ctrl_dict = { + REBOOT_TYPE_POWEROFF: { + "timeout": 300, + "cause": "Power Loss" + }, + REBOOT_TYPE_COLD: { + "command": "reboot", + "timeout": 300, + "cause": "reboot" + }, + REBOOT_TYPE_FAST: { + "command": "fast-reboot", + "timeout": 180, + "cause": "fast-reboot" + }, + REBOOT_TYPE_WARM: { + "command": "warm-reboot", + "timeout": 180, + "cause": "warm-reboot" + } +} + + +def check_reboot_cause(dut, reboot_cause_expected): + """ + @summary: Check the reboot cause on DUT. + @param dut: The AnsibleHost object of DUT. + @param reboot_cause_expected: The expected reboot cause. + """ + logging.info("Check the reboot cause") + output = dut.shell("show reboot-cause") + reboot_cause_got = output["stdout"] + logging.debug("show reboot-cause returns {}".format(reboot_cause_got)) + m = re.search(reboot_cause_expected, reboot_cause_got) + assert m is not None, "got reboot-cause %s after rebooted by %s" % (reboot_cause_got, reboot_cause_expected) -def reboot_and_check(localhost, dut, interfaces, reboot_type="cold"): +def reboot_and_check(localhost, dut, interfaces, reboot_type=REBOOT_TYPE_COLD, reboot_helper=None, reboot_kwargs=None): """ Perform the specified type of reboot and check platform status. + @param localhost: The Localhost object. + @param dut: The AnsibleHost object of DUT. + @param interfaces: DUT's interfaces defined by minigraph + @param reboot_type: The reboot type, pre-defined const that has name convention of REBOOT_TYPE_XXX. + @param reboot_helper: The helper function used only by power off reboot + @param reboot_kwargs: The argument used by reboot_helper """ logging.info("Run %s reboot on DUT" % reboot_type) - if reboot_type == "cold": - reboot_cmd = "reboot" - reboot_timeout = 300 - elif reboot_type == "fast": - reboot_cmd = "fast-reboot" - reboot_timeout = 180 - elif reboot_type == "warm": - reboot_cmd = "warm-reboot" - reboot_timeout = 180 + + assert reboot_type in reboot_ctrl_dict.keys(), "Unknown reboot type %s" % reboot_type + + reboot_timeout = reboot_ctrl_dict[reboot_type]["timeout"] + reboot_cause = reboot_ctrl_dict[reboot_type]["cause"] + + dut_datetime = datetime.strptime(dut.command('date -u +"%Y-%m-%d %H:%M:%S"')["stdout"], "%Y-%m-%d %H:%M:%S") + + if reboot_type == REBOOT_TYPE_POWEROFF: + assert reboot_helper is not None, "A reboot function must be provided for power off reboot" + + reboot_helper(reboot_kwargs) + + localhost.wait_for(host=dut.hostname, port=22, state="stopped", delay=10, timeout=120) else: - assert False, "Reboot type %s is not supported" % reboot_type - process, queue = dut.command(reboot_cmd, module_async=True) - - logging.info("Wait for DUT to go down") - res = localhost.wait_for(host=dut.hostname, port=22, state="stopped", delay=10, timeout=120, - module_ignore_errors=True) - if "failed" in res: - if process.is_alive(): - logging.error("Command '%s' is not completed" % reboot_cmd) - process.terminate() - logging.error("reboot result %s" % str(queue.get())) - assert False, "DUT did not go down" + reboot_cmd = reboot_ctrl_dict[reboot_type]["command"] + reboot_task, reboot_res = dut.command(reboot_cmd, module_ignore_errors=True, module_async=True) + + logging.info("Wait for DUT to go down") + res = localhost.wait_for(host=dut.hostname, port=22, state="stopped", timeout=180, module_ignore_errors=True) + if "failed" in res: + try: + logging.error("Wait for switch down failed, try to kill any possible stuck reboot task") + pid = dut.command("pgrep -f '%s'" % reboot_cmd)["stdout"] + dut.command("kill -9 %s" % pid) + reboot_task.terminate() + logging.error("Result of command '%s': " + str(reboot_res.get(timeout=0))) + except Exception as e: + logging.error("Exception raised while cleanup reboot task and get result: " + repr(e)) logging.info("Wait for DUT to come back") localhost.wait_for(host=dut.hostname, port=22, state="started", delay=10, timeout=reboot_timeout) + logging.info("Check the uptime to verify whether reboot was performed") + dut_uptime = datetime.strptime(dut.command("uptime -s")["stdout"], "%Y-%m-%d %H:%M:%S") + assert float(dut_uptime.strftime("%s")) - float(dut_datetime.strftime("%s")) > 10, "Device did not reboot" + logging.info("Wait until all critical services are fully started") check_critical_services(dut) - logging.info("Wait some time for all the transceivers to be detected") - assert wait_until(300, 20, all_transceivers_detected, dut, interfaces), \ - "Not all transceivers are detected in 300 seconds" + logging.info("Check reboot cause") + check_reboot_cause(dut, reboot_cause) - logging.info("Check interface status") - check_interface_status(dut, interfaces) + logging.info("Wait some time for all the transceivers to be detected") + assert wait_until(300, 20, check_interface_information, dut, interfaces), \ + "Not all transceivers are detected or interfaces are up in 300 seconds" logging.info("Check transceiver status") check_transceiver_basic(dut, interfaces) + logging.info("Check pmon daemon status") + assert check_pmon_daemon_status(dut), "Not all pmon daemons running." + if dut.facts["asic_type"] in ["mellanox"]: current_file_dir = os.path.dirname(os.path.realpath(__file__)) @@ -86,7 +152,7 @@ def test_cold_reboot(testbed_devices, conn_graph_facts): ans_host = testbed_devices["dut"] localhost = testbed_devices["localhost"] - reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type="cold") + reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type=REBOOT_TYPE_COLD) def test_fast_reboot(testbed_devices, conn_graph_facts): @@ -96,7 +162,7 @@ def test_fast_reboot(testbed_devices, conn_graph_facts): ans_host = testbed_devices["dut"] localhost = testbed_devices["localhost"] - reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type="fast") + reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type=REBOOT_TYPE_FAST) def test_warm_reboot(testbed_devices, conn_graph_facts): @@ -112,4 +178,74 @@ def test_warm_reboot(testbed_devices, conn_graph_facts): if "disabled" in issu_capability: pytest.skip("ISSU is not supported on this DUT, skip this test case") - reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type="warm") + reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type=REBOOT_TYPE_WARM) + + +@pytest.fixture(params=[15, 5]) +def power_off_delay(request): + """ + @summary: used to parametrized test cases on power_off_delay + @param request: pytest request object + @return: power_off_delay + """ + return request.param + + +def _power_off_reboot_helper(kwargs): + """ + @summary: used to parametrized test cases on power_off_delay + @param kwargs: the delay time between turning off and on the PSU + """ + psu_ctrl = kwargs["psu_ctrl"] + all_psu = kwargs["all_psu"] + power_on_seq = kwargs["power_on_seq"] + delay_time = kwargs["delay_time"] + + for psu in all_psu: + logging.debug("turning off {}".format(psu)) + psu_ctrl.turn_off_psu(psu["psu_id"]) + time.sleep(delay_time) + logging.info("Power on {}".format(power_on_seq)) + for psu in power_on_seq: + logging.debug("turning on {}".format(psu)) + psu_ctrl.turn_on_psu(psu["psu_id"]) + + +def test_power_off_reboot(testbed_devices, conn_graph_facts, psu_controller, power_off_delay): + """ + @summary: This test case is to perform reboot via powercycle and check platform status + @param testbed_devices: Fixture initialize devices in testbed + @param conn_graph_facts: Fixture parse and return lab connection graph + @param psu_controller: The python object of psu controller + @param power_off_delay: Pytest fixture. The delay between turning off and on the PSU + """ + ans_host = testbed_devices["dut"] + localhost = testbed_devices["localhost"] + + psu_ctrl = psu_controller(ans_host.hostname, ans_host.facts["asic_type"]) + if psu_ctrl is None: + pytest.skip("No PSU controller for %s, skip rest of the testing in this case" % ans_host.hostname) + + all_psu = psu_ctrl.get_psu_status() + + # Purpose of this list is to control sequence of turning on PSUs in power off testing. + # If there are 2 PSUs, then 3 scenarios would be covered: + # 1. Turn off all PSUs, turn on PSU1, then check. + # 2. Turn off all PSUs, turn on PSU2, then check. + # 3. Turn off all PSUs, turn on one of the PSU, then turn on the other PSU, then check. + power_on_seq_list = [] + if all_psu: + power_on_seq_list = [[item] for item in all_psu] + power_on_seq_list.append(all_psu) + + logging.info("Got all power on sequences {}".format(power_on_seq_list)) + + poweroff_reboot_kwargs = {"dut": ans_host} + + for power_on_seq in power_on_seq_list: + poweroff_reboot_kwargs["psu_ctrl"] = psu_ctrl + poweroff_reboot_kwargs["all_psu"] = all_psu + poweroff_reboot_kwargs["power_on_seq"] = power_on_seq + poweroff_reboot_kwargs["delay_time"] = power_off_delay + reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], REBOOT_TYPE_POWEROFF, + _power_off_reboot_helper, poweroff_reboot_kwargs) diff --git a/tests/platform/test_reload_config.py b/tests/platform/test_reload_config.py index 047945dfab4..db64ee2b558 100644 --- a/tests/platform/test_reload_config.py +++ b/tests/platform/test_reload_config.py @@ -9,13 +9,14 @@ import os import time import sys +import pytest from platform_fixtures import conn_graph_facts from common.utilities import wait_until from check_critical_services import check_critical_services -from check_interface_status import check_interface_status from check_transceiver_status import check_transceiver_basic -from check_transceiver_status import all_transceivers_detected +from check_all_interface_info import check_interface_information +pytestmark = [pytest.mark.disable_loganalyzer] def test_reload_configuration(testbed_devices, conn_graph_facts): @@ -33,13 +34,9 @@ def test_reload_configuration(testbed_devices, conn_graph_facts): check_critical_services(ans_host) logging.info("Wait some time for all the transceivers to be detected") - assert wait_until(300, 20, all_transceivers_detected, ans_host, interfaces), \ + assert wait_until(300, 20, check_interface_information, ans_host, interfaces), \ "Not all transceivers are detected in 300 seconds" - logging.info("Check interface status") - time.sleep(60) - check_interface_status(ans_host, interfaces) - logging.info("Check transceiver status") check_transceiver_basic(ans_host, interfaces) diff --git a/tests/platform/test_sequential_restart.py b/tests/platform/test_sequential_restart.py index 78f087689fb..84cecf17873 100644 --- a/tests/platform/test_sequential_restart.py +++ b/tests/platform/test_sequential_restart.py @@ -15,9 +15,9 @@ from platform_fixtures import conn_graph_facts from common.utilities import wait_until from check_critical_services import check_critical_services -from check_interface_status import check_interface_status from check_transceiver_status import check_transceiver_basic -from check_transceiver_status import all_transceivers_detected +from check_all_interface_info import check_interface_information +pytestmark = [pytest.mark.disable_loganalyzer] def restart_service_and_check(localhost, dut, service, interfaces): @@ -32,12 +32,8 @@ def restart_service_and_check(localhost, dut, service, interfaces): check_critical_services(dut) logging.info("Wait some time for all the transceivers to be detected") - assert wait_until(300, 20, all_transceivers_detected, dut, interfaces), \ - "Not all transceivers are detected in 300 seconds" - - logging.info("Check interface status") - time.sleep(60) - check_interface_status(dut, interfaces) + assert wait_until(300, 20, check_interface_information, dut, interfaces), \ + "Not all interface information are detected within 300 seconds" logging.info("Check transceiver status") check_transceiver_basic(dut, interfaces) diff --git a/tests/platform/test_sfp.py b/tests/platform/test_sfp.py index bba52ad5473..7de20af0042 100644 --- a/tests/platform/test_sfp.py +++ b/tests/platform/test_sfp.py @@ -97,7 +97,9 @@ def test_check_sfp_status_and_configure_sfp(testbed_devices, conn_graph_facts): for intf in conn_graph_facts["device_conn"]: reset_result = ans_host.command("%s %s" % (cmd_sfp_reset, intf)) assert reset_result["rc"] == 0, "'%s %s' failed" % (cmd_sfp_reset, intf) - time.sleep(120) # Wait some time for SFP to fully recover after reset + time.sleep(5) + logging.info("Wait some time for SFP to fully recover after reset") + time.sleep(60) logging.info("Check sfp presence again after reset") sfp_presence = ans_host.command(cmd_sfp_presence) @@ -106,6 +108,12 @@ def test_check_sfp_status_and_configure_sfp(testbed_devices, conn_graph_facts): assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_sfp_presence assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'" + logging.info("Check interface status") + mg_facts = ans_host.minigraph_facts(host=ans_host.hostname)["ansible_facts"] + intf_facts = ans_host.interface_facts(up_ports=mg_facts["minigraph_ports"])["ansible_facts"] + assert len(intf_facts["ansible_interface_link_down_ports"]) == 0, \ + "Some interfaces are down: %s" % str(intf_facts["ansible_interface_link_down_ports"]) + def test_check_sfp_low_power_mode(testbed_devices, conn_graph_facts): """ @@ -164,3 +172,9 @@ def test_check_sfp_low_power_mode(testbed_devices, conn_graph_facts): for intf in conn_graph_facts["device_conn"]: assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_sfp_presence assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'" + + logging.info("Check interface status") + mg_facts = ans_host.minigraph_facts(host=ans_host.hostname)["ansible_facts"] + intf_facts = ans_host.interface_facts(up_ports=mg_facts["minigraph_ports"])["ansible_facts"] + assert len(intf_facts["ansible_interface_link_down_ports"]) == 0, \ + "Some interfaces are down: %s" % str(intf_facts["ansible_interface_link_down_ports"]) From 8aa1dc686d71ce198699bd4ba0042c7d24f7a908 Mon Sep 17 00:00:00 2001 From: Nazarii Hnydyn Date: Thu, 19 Sep 2019 23:09:34 +0300 Subject: [PATCH 117/218] Align Asym PFC configuration commands. (#1122) Signed-off-by: Nazarii Hnydyn --- ansible/roles/test/tasks/pfc_asym.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/tasks/pfc_asym.yml b/ansible/roles/test/tasks/pfc_asym.yml index 249f1fa6ff6..d1f0d077694 100644 --- a/ansible/roles/test/tasks/pfc_asym.yml +++ b/ansible/roles/test/tasks/pfc_asym.yml @@ -214,7 +214,7 @@ connection: switch - name: Enable asymmetric PFC on all server interfaces - command: config interface {{ item.dut_name }} pfc asymmetric on + command: config interface pfc asymmetric {{ item.dut_name }} on become: yes with_items: '{{ server_ports }}' @@ -266,7 +266,7 @@ always: - name: Disable asymmetric PFC on all server interfaces - command: config interface {{ item.dut_name }} pfc asymmetric off + command: config interface pfc asymmetric {{ item.dut_name }} off become: yes with_items: '{{ server_ports }}' From 4de6e7acea5da880030cb8018f341f467849fd2f Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Fri, 11 Oct 2019 21:54:03 +0800 Subject: [PATCH 118/218] Improve the scripts for preparing PTF interfaces 201811 (#1142) * Improve the scripts for preparing PTF interfaces Signed-off-by: Xin Wang * Limit the operation to interfaces eth%d * Update per review comments --- ansible/roles/test/files/helpers/change_mac.sh | 18 ++++++++++-------- ansible/roles/test/files/helpers/remove_ip.sh | 8 ++++---- tests/scripts/change_mac.sh | 14 +------------- tests/scripts/remove_ip.sh | 11 +---------- 4 files changed, 16 insertions(+), 35 deletions(-) mode change 100644 => 120000 tests/scripts/change_mac.sh mode change 100644 => 120000 tests/scripts/remove_ip.sh diff --git a/ansible/roles/test/files/helpers/change_mac.sh b/ansible/roles/test/files/helpers/change_mac.sh index 21420658f03..64eff8f735d 100644 --- a/ansible/roles/test/files/helpers/change_mac.sh +++ b/ansible/roles/test/files/helpers/change_mac.sh @@ -2,12 +2,14 @@ set -euo pipefail -INTF_LIST=$(ifconfig | grep eth | cut -f 1 -d ' ') - -for i in ${INTF_LIST}; do - prefix=$(ifconfig $i | grep HWaddr | cut -c39-53) - suffix=$( printf "%02x" ${i##eth}) - mac=$prefix$suffix - echo $i $mac - ifconfig $i hw ether $mac +INTF_LIST=$(ls /sys/class/net | grep -E "^eth[0-9]+$") + +for INTF in ${INTF_LIST}; do + ADDR="$(cat /sys/class/net/${INTF}/address)" + PREFIX="$(cut -c1-15 <<< ${ADDR})" + SUFFIX="$(printf "%02x" ${INTF##eth})" + MAC="${PREFIX}${SUFFIX}" + + echo "Update ${INTF} MAC address: ${ADDR}->$MAC" + ip link set dev ${INTF} address ${MAC} done diff --git a/ansible/roles/test/files/helpers/remove_ip.sh b/ansible/roles/test/files/helpers/remove_ip.sh index 34432772de9..abed39e4dc9 100755 --- a/ansible/roles/test/files/helpers/remove_ip.sh +++ b/ansible/roles/test/files/helpers/remove_ip.sh @@ -2,9 +2,9 @@ set -euo pipefail -INTF_IDX_LIST=$(cat /proc/net/dev | grep eth | awk -F'eth|:' '{print $2}') +INTF_LIST=$(ls /sys/class/net | grep -E "^eth[0-9]+$") -for i in ${INTF_IDX_LIST}; do - echo "Flush eth${i} IP address" - ip address flush dev eth$i +for INTF in ${INTF_LIST}; do + echo "Flush ${INTF} IP address" + ip addr flush dev ${INTF} done diff --git a/tests/scripts/change_mac.sh b/tests/scripts/change_mac.sh deleted file mode 100644 index 21420658f03..00000000000 --- a/tests/scripts/change_mac.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -INTF_LIST=$(ifconfig | grep eth | cut -f 1 -d ' ') - -for i in ${INTF_LIST}; do - prefix=$(ifconfig $i | grep HWaddr | cut -c39-53) - suffix=$( printf "%02x" ${i##eth}) - mac=$prefix$suffix - echo $i $mac - ifconfig $i hw ether $mac -done diff --git a/tests/scripts/change_mac.sh b/tests/scripts/change_mac.sh new file mode 120000 index 00000000000..9d08802dbb2 --- /dev/null +++ b/tests/scripts/change_mac.sh @@ -0,0 +1 @@ +../../ansible/roles/test/files/helpers/change_mac.sh \ No newline at end of file diff --git a/tests/scripts/remove_ip.sh b/tests/scripts/remove_ip.sh deleted file mode 100644 index 34432772de9..00000000000 --- a/tests/scripts/remove_ip.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -INTF_IDX_LIST=$(cat /proc/net/dev | grep eth | awk -F'eth|:' '{print $2}') - -for i in ${INTF_IDX_LIST}; do - echo "Flush eth${i} IP address" - ip address flush dev eth$i -done diff --git a/tests/scripts/remove_ip.sh b/tests/scripts/remove_ip.sh new file mode 120000 index 00000000000..a03b98f4042 --- /dev/null +++ b/tests/scripts/remove_ip.sh @@ -0,0 +1 @@ +../../ansible/roles/test/files/helpers/remove_ip.sh \ No newline at end of file From f523ec5484d84724a86cd849f54efb751cc64e02 Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Wed, 9 Oct 2019 17:52:21 -0700 Subject: [PATCH 119/218] Test case 2 of PFC watchdog against warm-reboot: sad path (#834) * First test case of PFC watchdog against warm-reboot Signed-off-by: Wenda Ni * Add more comments for code readability Signed-off-by: Wenda Ni * First test case of PFC watchdog against warm-reboot Signed-off-by: Wenda Ni * Add more comments for code readability Signed-off-by: Wenda Ni * Modify output message Signed-off-by: Wenda Ni * Allow log analyzer to take a specified start marker Signed-off-by: Wenda Ni * Use lookup('pipe', 'date +%H:%M:%S') in place of ansible_date_time.time, which uses cached time for a certain period of time https://github.com/ansible/ansible/issues/22561 Signed-off-by: Wenda Ni * Add the flexiblity to not start storm at fanout link partener in running functional_test_storm.yml Signed-off-by: Wenda Ni * Dump only the current result and summary files for debugging and troubleshooting purpose Signed-off-by: Wenda Ni * Add the capability to check if the number of exact matches is equal to to the target number Signed-off-by: Wenda Ni * Split the actual storm and restore tests into functional_test_storm_perq.yml and functional_test_restore_perq.yml, respectively Add the capability to storm multiple queues of a port Signed-off-by: Wenda Ni * Add test case 2 of PFC watchdog against warm-reboot: PFC storm started and detected before warm-reboot On-going storm on warm-reboot emission, and lasts past the warm-reboot finish PFC storm stopped and restored after warm-reboot Signed-off-by: Wenda Ni * Ignore trival syncd ERR during the warm-reboot, e.g., Mar 20 00:40:33.599212 str-a7050-acs-1 ERR syncd#syncd: _brcm_sai_cosq_stat_get:1146 cosq stat get failed with error Invalid parameter (0xfffffffc). Mar 20 00:40:33.599212 str-a7050-acs-1 DEBUG syncd#syncd: brcm_sai_get_queue_stats:724 cosq stat get failed with error -5 for port 1 qid 10 Mar 20 00:40:33.599212 str-a7050-acs-1 NOTICE syncd#syncd: :- setQueueCounterList: Queue oid:0x102150000000b does not has supported counters Signed-off-by: Wenda Ni * Use boolean variable to determine the test run type: regular pfc wd test or pfcwd warm-reboot test Signed-off-by: Wenda Ni * Feed reboot type to reboot_sonic.yml in warm-reboot happy path test Signed-off-by: Wenda Ni * Feed reboot type to reboot_sonic.yml in warm-reboot sad path test Signed-off-by: Wenda Ni * Add expected errors on mlnx platform Signed-off-by: Wenda Ni --- .../functional_test_restore.yml | 88 ++------------- .../functional_test_restore_perq.yml | 81 ++++++++++++++ .../functional_test/functional_test_storm.yml | 105 +++--------------- .../functional_test_storm_perq.yml | 99 +++++++++++++++++ .../functional_test_warm_reboot.yml | 27 +++++ .../functional_test/ignore_pfc_wd_messages | 2 + 6 files changed, 235 insertions(+), 167 deletions(-) create mode 100644 ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore_perq.yml create mode 100644 ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm_perq.yml diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml index f50a9961ccf..662ef573f63 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml @@ -1,5 +1,6 @@ #------------------------------------ # Test the PFC restore action +# Iterate the test over a list of lossless queues # 1. Verify restore is detected via syslog entry analysis on the dut # 2. Verity the forward action on both ingress and egress via ptf dataplane traffic test #------------------------------------ @@ -49,7 +50,7 @@ - name: Prepare variables required for PFC test set_fact: - pfc_queue_index: 4 + pfc_queue_indices: [4] pfc_frames_number: 100000000 pfc_wd_test_pkt_count: 100 pfc_fanout_interface: "{{neighbors[pfc_wd_test_port]['peerport']}}" @@ -57,8 +58,10 @@ peer_mgmt: "{{device_info['mgmtip']}}" testname: functional_test -- set_fact: - class_enable: "{{(1).__lshift__(pfc_queue_index)}}" +- name: Add queue index 3 to pfc_queue_indices when seed is an odd number + set_fact: + pfc_queue_indices: "{{pfc_queue_indices + [3]}}" + when: seed | int is odd - set_fact: peer_login: "{{switch_login[hwsku_map[peer_hwsku]]}}" @@ -67,76 +70,9 @@ include: roles/test/tasks/pfc_wd/functional_test/set_pfc_storm_templates.yml -- block: - # 1. Verify restore is detected via syslog entry analysis on the dut - - set_fact: - test_expect_file: "expect_pfc_wd_restore" - - - name: Initialize loganalyzer - include: roles/test/files/tools/loganalyzer/loganalyzer_init.yml - - - name: Stop PFC storm on fanout switch - action: apswitch template="{{pfc_wd_storm_stop_template}}" - args: - host: "{{peer_mgmt}}" - login: "{{peer_login}}" - connection: switch - - - name: Wait for queue to recover from PFC storm - pause: - seconds: 1 - - - name: Check if logs contain message that PFC WD restored from deadlock - include: roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml - - - name: Check if logs contain message that PFC WD restored from deadlock - include: roles/test/files/tools/loganalyzer/loganalyzer_end.yml - - # 2. Verity the forward action on both ingress and egress via ptf dataplane traffic test - - name: "Send packets via {{pfc_wd_test_port}}" - include: roles/test/tasks/ptf_runner.yml - vars: - ptf_test_name: PFC WD test - ptf_test_dir: ptftests - ptf_test_path: pfc_wd.PfcWdTest - ptf_platform: remote - ptf_platform_dir: ptftests - ptf_test_params: - - testbed_type='{{testbed_type}}' - - router_mac='{{ansible_ethernet0_mac_addr}}' - - queue_index='{{pfc_queue_index}}' - - pkt_count='{{pfc_wd_test_pkt_count}}' - - port_src='{{pfc_wd_rx_port_id[0]}}' - - port_dst='{{pfc_wd_test_port_ids}}' - - ip_dst='{{pfc_wd_test_neighbor_addr}}' - - port_type='{{port_type}}' - - wd_action='forward' - ptf_extra_options: "--relax --debug info --log-file /tmp/pfc_wd.PfcWdTest.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log " - - - name: "Send packets to {{pfc_wd_test_port}}" - include: roles/test/tasks/ptf_runner.yml - vars: - ptf_test_name: PFC WD test - ptf_test_dir: ptftests - ptf_test_path: pfc_wd.PfcWdTest - ptf_platform: remote - ptf_platform_dir: ptftests - ptf_test_params: - - testbed_type='{{testbed_type}}' - - router_mac='{{ansible_ethernet0_mac_addr}}' - - queue_index='{{pfc_queue_index}}' - - pkt_count='{{pfc_wd_test_pkt_count}}' - - port_src='{{pfc_wd_test_port_id}}' - - port_dst='[{{pfc_wd_rx_port_id | join(' ')}}]' - - ip_dst='{{pfc_wd_rx_neighbor_addr}}' - - port_type='{{port_type}}' - - wd_action='forward' - ptf_extra_options: "--relax --debug info --log-file /tmp/pfc_wd.PfcWdTest.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log " - - rescue: - - name: Stop PFC storm on fanout switch - action: apswitch template="{{pfc_wd_storm_stop_template}}" - args: - host: "{{peer_mgmt}}" - login: "{{peer_login}}" - connection: switch +- name: Test PFC restore function per queue + include: roles/test/tasks/pfc_wd/functional_test/functional_test_restore_perq.yml + vars: + pfc_queue_index: "{{item}}" + class_enable: "{{(1).__lshift__(item)}}" + with_items: "{{pfc_queue_indices}}" diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore_perq.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore_perq.yml new file mode 100644 index 00000000000..e7b1e751b0d --- /dev/null +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore_perq.yml @@ -0,0 +1,81 @@ +#------------------------------------ +# Test the PFC restore action +# 1. Verify restore is detected via syslog entry analysis on the dut +# 2. Verity the forward action on both ingress and egress via ptf dataplane traffic test +#------------------------------------ + +- block: + # 1. Verify restore is detected via syslog entry analysis on the dut + - set_fact: + test_expect_file: "expect_pfc_wd_restore" + + - name: Initialize loganalyzer + include: roles/test/files/tools/loganalyzer/loganalyzer_init.yml + vars: + testname_unique_gen: true + + - name: Stop PFC storm on fanout switch + action: apswitch template="{{pfc_wd_storm_stop_template}}" + args: + host: "{{peer_mgmt}}" + login: "{{peer_login}}" + connection: switch + + - name: Allow enough time for the PFC storm restoration to flow into the syslog + pause: + seconds: 1 + + - name: Check if logs contain message that PFC WD restored from deadlock + include: roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml + + - name: Check if logs contain message that PFC WD restored from deadlock + include: roles/test/files/tools/loganalyzer/loganalyzer_end.yml + + # 2. Verity the forward action on both ingress and egress via ptf dataplane traffic test + - name: "Send packets via {{pfc_wd_test_port}}" + include: roles/test/tasks/ptf_runner.yml + vars: + ptf_test_name: PFC WD test + ptf_test_dir: ptftests + ptf_test_path: pfc_wd.PfcWdTest + ptf_platform: remote + ptf_platform_dir: ptftests + ptf_test_params: + - testbed_type='{{testbed_type}}' + - router_mac='{{ansible_ethernet0_mac_addr}}' + - queue_index='{{pfc_queue_index}}' + - pkt_count='{{pfc_wd_test_pkt_count}}' + - port_src='{{pfc_wd_rx_port_id[0]}}' + - port_dst='{{pfc_wd_test_port_ids}}' + - ip_dst='{{pfc_wd_test_neighbor_addr}}' + - port_type='{{port_type}}' + - wd_action='forward' + ptf_extra_options: "--relax --debug info --log-file /tmp/pfc_wd.PfcWdTest.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log " + + - name: "Send packets to {{pfc_wd_test_port}}" + include: roles/test/tasks/ptf_runner.yml + vars: + ptf_test_name: PFC WD test + ptf_test_dir: ptftests + ptf_test_path: pfc_wd.PfcWdTest + ptf_platform: remote + ptf_platform_dir: ptftests + ptf_test_params: + - testbed_type='{{testbed_type}}' + - router_mac='{{ansible_ethernet0_mac_addr}}' + - queue_index='{{pfc_queue_index}}' + - pkt_count='{{pfc_wd_test_pkt_count}}' + - port_src='{{pfc_wd_test_port_id}}' + - port_dst='[{{pfc_wd_rx_port_id | join(' ')}}]' + - ip_dst='{{pfc_wd_rx_neighbor_addr}}' + - port_type='{{port_type}}' + - wd_action='forward' + ptf_extra_options: "--relax --debug info --log-file /tmp/pfc_wd.PfcWdTest.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log " + + rescue: + - name: Stop PFC storm on fanout switch + action: apswitch template="{{pfc_wd_storm_stop_template}}" + args: + host: "{{peer_mgmt}}" + login: "{{peer_login}}" + connection: switch diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml index bf0e9015240..10ef88d7c93 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml @@ -1,5 +1,6 @@ #------------------------------------ -# Test the PFC storm action +# Test the PFC storm action on a port +# Iterate the test over a list of lossless queues # 1. Verify storm is detected via syslog entry analysis on the dut # 2. Verify the drop action in place on both ingress and egress via ptf dataplane traffic test # @@ -51,7 +52,7 @@ - name: Prepare variables required for PFC test set_fact: - pfc_queue_index: 4 + pfc_queue_indices: [4] pfc_frames_number: 100000000 pfc_wd_test_pkt_count: 100 pfc_fanout_interface: "{{neighbors[pfc_wd_test_port]['peerport']}}" @@ -59,8 +60,10 @@ peer_mgmt: "{{device_info['mgmtip']}}" testname: functional_test -- set_fact: - class_enable: "{{(1).__lshift__(pfc_queue_index)}}" +- name: Add queue index 3 to pfc_queue_indices when seed is an odd number + set_fact: + pfc_queue_indices: "{{pfc_queue_indices + [3]}}" + when: seed | int is odd - set_fact: peer_login: "{{switch_login[hwsku_map[peer_hwsku]]}}" @@ -69,90 +72,10 @@ include: roles/test/tasks/pfc_wd/functional_test/set_pfc_storm_templates.yml -- block: - - set_fact: - pfc_gen_file: pfc_gen.py - - - name: Deploy pfc packet generater file to fanout switch - include: roles/test/tasks/pfc_wd/functional_test/deploy_pfc_pktgen.yml - - - name: copy the test to ptf container - copy: src=roles/test/files/ptftests dest=/root - delegate_to: "{{ptf_host}}" - - # 1. Verify storm is detected via syslog entry analysis on the dut - - set_fact: - test_expect_file: "expect_pfc_wd_detect" - test_ignore_file: "ignore_pfc_wd_messages" - - - name: Initialize loganalyzer - include: roles/test/files/tools/loganalyzer/loganalyzer_init.yml - - - name: Generate PFC storm on fanout switch - action: apswitch template="{{pfc_wd_storm_template}}" - args: - host: "{{peer_mgmt}}" - login: "{{peer_login}}" - connection: switch - register: pid - - - debug: msg="{{pid}}" - - - name: Let PFC storm happen for a while - pause: - seconds: 5 - - - name: Check if logs contain message that PFC WD detected storm - include: roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml - - - name: Check if logs contain message that PFC WD detected storm - include: roles/test/files/tools/loganalyzer/loganalyzer_end.yml - - # 2. Verify the drop action in place on both ingress and egress via ptf dataplane traffic test - - name: "check egress drop, tx port {{pfc_wd_test_port}}" - include: roles/test/tasks/ptf_runner.yml - vars: - ptf_test_name: PFC WD test - ptf_test_dir: ptftests - ptf_test_path: pfc_wd.PfcWdTest - ptf_platform: remote - ptf_platform_dir: ptftests - ptf_test_params: - - testbed_type='{{testbed_type}}' - - router_mac='{{ansible_ethernet0_mac_addr}}' - - queue_index='{{pfc_queue_index}}' - - pkt_count='{{pfc_wd_test_pkt_count}}' - - port_src='{{pfc_wd_rx_port_id[0]}}' - - port_dst='[{{pfc_wd_test_port_id}}]' - - ip_dst='{{pfc_wd_test_neighbor_addr}}' - - port_type='{{port_type}}' - - wd_action='drop' - ptf_extra_options: "--relax --debug info --log-file /tmp/pfc_wd.PfcWdTest.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log " - - - name: "Check ingress drop, rx port {{pfc_wd_test_port}}" - include: roles/test/tasks/ptf_runner.yml - vars: - ptf_test_name: PFC WD test - ptf_test_dir: ptftests - ptf_test_path: pfc_wd.PfcWdTest - ptf_platform: remote - ptf_platform_dir: ptftests - ptf_test_params: - - testbed_type='{{testbed_type}}' - - router_mac='{{ansible_ethernet0_mac_addr}}' - - queue_index='{{pfc_queue_index}}' - - pkt_count='{{pfc_wd_test_pkt_count}}' - - port_src='{{pfc_wd_test_port_id}}' - - port_dst='[{{pfc_wd_rx_port_id | join(' ')}}]' - - ip_dst='{{pfc_wd_rx_neighbor_addr}}' - - port_type='{{port_type}}' - - wd_action='drop' - ptf_extra_options: "--relax --debug info --log-file /tmp/pfc_wd.PfcWdTest.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log " - - rescue: - - name: Stop PFC storm on fanout switch - action: apswitch template="{{pfc_wd_storm_stop_template}}" - args: - host: "{{peer_mgmt}}" - login: "{{peer_login}}" - connection: switch +- name: Test PFC storm function per queue + include: roles/test/tasks/pfc_wd/functional_test/functional_test_storm_perq.yml + vars: + pfc_queue_index: "{{item}}" + class_enable: "{{(1).__lshift__(item)}}" + expected_matches_target: "{% if total_test_ports_num is defined %}{{(total_test_ports_num | int) * (pfc_queue_indices | length)}}{% endif %}" + with_items: "{{pfc_queue_indices}}" diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm_perq.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm_perq.yml new file mode 100644 index 00000000000..5453542773a --- /dev/null +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm_perq.yml @@ -0,0 +1,99 @@ +#------------------------------------ +# Test the PFC storm action +# 1. Verify storm is detected via syslog entry analysis on the dut +# 2. Verify the drop action in place on both ingress and egress via ptf dataplane traffic test +# +# Storm will not be stopped on exiting the yml if test success +#------------------------------------ + +- block: + - set_fact: + pfc_gen_file: pfc_gen.py + + - name: Deploy pfc packet generater file to fanout switch + include: roles/test/tasks/pfc_wd/functional_test/deploy_pfc_pktgen.yml + + - name: copy the test to ptf container + copy: src=roles/test/files/ptftests dest=/root + delegate_to: "{{ptf_host}}" + + # 1. Verify storm is detected via syslog entry analysis on the dut + - set_fact: + test_expect_file: "expect_pfc_wd_detect" + test_ignore_file: "ignore_pfc_wd_messages" + + - name: Initialize loganalyzer + include: roles/test/files/tools/loganalyzer/loganalyzer_init.yml + vars: + testname_unique_gen: true + + - name: Allow enough time for the start marker to flow into the syslog + pause: + seconds: 5 + + - name: Generate PFC storm on fanout switch + action: apswitch template="{{pfc_wd_storm_template}}" + args: + host: "{{peer_mgmt}}" + login: "{{peer_login}}" + connection: switch + when: start_storm is not defined or start_storm == true + + - name: Allow enough time for the PFC storm detection to flow into the syslog + pause: + seconds: 5 + + - name: Check if logs contain message that PFC WD detected storm + include: roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml + + - name: Check if logs contain message that PFC WD detected storm + include: roles/test/files/tools/loganalyzer/loganalyzer_end.yml + + # 2. Verify the drop action in place on both ingress and egress via ptf dataplane traffic test + - name: "check egress drop, tx port {{pfc_wd_test_port}}" + include: roles/test/tasks/ptf_runner.yml + vars: + ptf_test_name: PFC WD test + ptf_test_dir: ptftests + ptf_test_path: pfc_wd.PfcWdTest + ptf_platform: remote + ptf_platform_dir: ptftests + ptf_test_params: + - testbed_type='{{testbed_type}}' + - router_mac='{{ansible_ethernet0_mac_addr}}' + - queue_index='{{pfc_queue_index}}' + - pkt_count='{{pfc_wd_test_pkt_count}}' + - port_src='{{pfc_wd_rx_port_id[0]}}' + - port_dst='[{{pfc_wd_test_port_id}}]' + - ip_dst='{{pfc_wd_test_neighbor_addr}}' + - port_type='{{port_type}}' + - wd_action='drop' + ptf_extra_options: "--relax --debug info --log-file /tmp/pfc_wd.PfcWdTest.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log " + + - name: "Check ingress drop, rx port {{pfc_wd_test_port}}" + include: roles/test/tasks/ptf_runner.yml + vars: + ptf_test_name: PFC WD test + ptf_test_dir: ptftests + ptf_test_path: pfc_wd.PfcWdTest + ptf_platform: remote + ptf_platform_dir: ptftests + ptf_test_params: + - testbed_type='{{testbed_type}}' + - router_mac='{{ansible_ethernet0_mac_addr}}' + - queue_index='{{pfc_queue_index}}' + - pkt_count='{{pfc_wd_test_pkt_count}}' + - port_src='{{pfc_wd_test_port_id}}' + - port_dst='[{{pfc_wd_rx_port_id | join(' ')}}]' + - ip_dst='{{pfc_wd_rx_neighbor_addr}}' + - port_type='{{port_type}}' + - wd_action='drop' + ptf_extra_options: "--relax --debug info --log-file /tmp/pfc_wd.PfcWdTest.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}.log " + + rescue: + - name: Stop PFC storm on fanout switch + action: apswitch template="{{pfc_wd_storm_stop_template}}" + args: + host: "{{peer_mgmt}}" + login: "{{peer_login}}" + connection: switch diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_warm_reboot.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_warm_reboot.yml index 644a9add423..fd11463411b 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_warm_reboot.yml +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_warm_reboot.yml @@ -23,3 +23,30 @@ - name: Test PFC WD restore function include: roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml with_dict: "{{select_test_ports}}" + + +# Test case 2: +# PFC storm started and detected before warm-reboot +# On-going storm on warm-reboot emission, and lasts past the warm-reboot finish +# PFC storm stopped and restored after warm-reboot +- block: + - name: Test PFC WD storm function + include: roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml + with_dict: "{{select_test_ports}}" + + - name: Issue warm reboot on the device + include: roles/test/tasks/common_tasks/reboot_sonic.yml + vars: + reboot_type: warm-reboot + + - name: Test PFC WD storm function + include: roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml + vars: + start_storm: false + start_marker: "NOTICE swss#orchagent: :- setWarmStartState: orchagent warm start state changed to initialized" + total_test_ports_num: "{{select_test_ports | length}}" + with_dict: "{{select_test_ports}}" + + - name: Test PFC WD restore function + include: roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml + with_dict: "{{select_test_ports}}" diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages b/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages index 391d16ed990..7cff6813972 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages @@ -7,3 +7,5 @@ r, ".* snmp.*" r, ".* Trying to remove nonexisting queue from flex counter .*" r, ".* SAI_STATUS_BUFFER_OVERFLOW" r, ".* ERR ntpd.*routing socket reports: No buffer space available.*" +r, ".* ERR syncd.*" +r, ".* syncd .* ERROR +HOST_INTERFACE" From 4f69429638d163169f4f1851723d82f6d60b587c Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Mon, 14 Oct 2019 15:11:53 -0700 Subject: [PATCH 120/218] Test case 3 of PFC watchdog against warm-reboot: random storming (#837) * First test case of PFC watchdog against warm-reboot Signed-off-by: Wenda Ni * Add more comments for code readability Signed-off-by: Wenda Ni * Modify output message Signed-off-by: Wenda Ni * Allow log analyzer to take a specified start marker Signed-off-by: Wenda Ni * Use lookup('pipe', 'date +%H:%M:%S') in place of ansible_date_time.time, which uses cached time for a certain period of time https://github.com/ansible/ansible/issues/22561 Signed-off-by: Wenda Ni * Add the flexiblity to not start storm at fanout link partener in running functional_test_storm.yml Signed-off-by: Wenda Ni * Dump only the current result and summary files for debugging and troubleshooting purpose Signed-off-by: Wenda Ni * Add the capability to check if the number of exact matches is equal to to the target number Signed-off-by: Wenda Ni * Split the actual storm and restore tests into functional_test_storm_perq.yml and functional_test_restore_perq.yml, respectively Add the capability to storm multiple queues of a port Signed-off-by: Wenda Ni * Add test case 2 of PFC watchdog against warm-reboot: PFC storm started and detected before warm-reboot On-going storm on warm-reboot emission, and lasts past the warm-reboot finish PFC storm stopped and restored after warm-reboot Signed-off-by: Wenda Ni * Ignore trival syncd ERR during the warm-reboot, e.g., Mar 20 00:40:33.599212 str-a7050-acs-1 ERR syncd#syncd: _brcm_sai_cosq_stat_get:1146 cosq stat get failed with error Invalid parameter (0xfffffffc). Mar 20 00:40:33.599212 str-a7050-acs-1 DEBUG syncd#syncd: brcm_sai_get_queue_stats:724 cosq stat get failed with error -5 for port 1 qid 10 Mar 20 00:40:33.599212 str-a7050-acs-1 NOTICE syncd#syncd: :- setQueueCounterList: Queue oid:0x102150000000b does not has supported counters Signed-off-by: Wenda Ni * Run apswitch action asynchronously Using include asynchronously with with_items not supported From * Add the flexiblity to defer storm start and stop at fanout Signed-off-by: Wenda Ni * Randomly generate deferred time Signed-off-by: Wenda Ni * Move actual storming ops to per queue Signed-off-by: Wenda Ni * Clean debugging symbols Signed-off-by: Wenda Ni * Test cast 3 of PFC watchdog against warm-reboot PFC storm asynchronously starts at a random time and lasts a random period at fanout Warm-reboot emission Wait for all the PFC storms to finish Verify PFC storm detection and restoration functional Signed-off-by: Wenda Ni * Specify reboot type to be 'warm-reboot' Signed-off-by: Wenda Ni --- .../functional_test_restore.yml | 2 +- .../functional_test/functional_test_storm.yml | 2 +- .../functional_test_warm_reboot.yml | 23 ++++++++++++ .../functional_test/storm_from_neighbor.yml | 37 +++++++++++++++++++ .../storm_from_neighbor_perq.yml | 32 ++++++++++++++++ 5 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 ansible/roles/test/tasks/pfc_wd/functional_test/storm_from_neighbor.yml create mode 100644 ansible/roles/test/tasks/pfc_wd/functional_test/storm_from_neighbor_perq.yml diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml index 662ef573f63..249a0a17f00 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml @@ -70,7 +70,7 @@ include: roles/test/tasks/pfc_wd/functional_test/set_pfc_storm_templates.yml -- name: Test PFC restore function per queue +- name: Test PFC restore function per pg/queue include: roles/test/tasks/pfc_wd/functional_test/functional_test_restore_perq.yml vars: pfc_queue_index: "{{item}}" diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml index 10ef88d7c93..29523e14493 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml @@ -72,7 +72,7 @@ include: roles/test/tasks/pfc_wd/functional_test/set_pfc_storm_templates.yml -- name: Test PFC storm function per queue +- name: Test PFC storm function per pg/queue include: roles/test/tasks/pfc_wd/functional_test/functional_test_storm_perq.yml vars: pfc_queue_index: "{{item}}" diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_warm_reboot.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_warm_reboot.yml index fd11463411b..b96f032ff2b 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_warm_reboot.yml +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_warm_reboot.yml @@ -50,3 +50,26 @@ - name: Test PFC WD restore function include: roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml with_dict: "{{select_test_ports}}" + +# Test case 3: +# PFC storm asynchronously starts at a random time and lasts a random period at fanout +# Warm-reboot emission +# Wait for all the PFC storms to finish +# Verify PFC storm detection and restoration functional +- block: + - name: PFC storming from fanout switch + include: roles/test/tasks/pfc_wd/functional_test/storm_from_neighbor.yml + with_dict: "{{select_test_ports}}" + + - name: Issue warm reboot on the device + include: roles/test/tasks/common_tasks/reboot_sonic.yml + vars: + reboot_type: warm-reboot + + - name: Test PFC WD storm function + include: roles/test/tasks/pfc_wd/functional_test/functional_test_storm.yml + with_dict: "{{select_test_ports}}" + + - name: Test PFC WD restore function + include: roles/test/tasks/pfc_wd/functional_test/functional_test_restore.yml + with_dict: "{{select_test_ports}}" diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/storm_from_neighbor.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/storm_from_neighbor.yml new file mode 100644 index 00000000000..f6afa7d4a34 --- /dev/null +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/storm_from_neighbor.yml @@ -0,0 +1,37 @@ +- name: Set fact for item values + set_fact: + peer_device: "{{item.value.peer_device}}" + pfc_wd_test_port: "{{item.key}}" + +- conn_graph_facts: host={{ peer_device }} + connection: local + become: no + +- name: Prepare parameters required for PFC storming + set_fact: + pfc_queue_indices: [4] + pfc_frames_number: 100000000 + pfc_fanout_interface: "{{neighbors[pfc_wd_test_port]['peerport']}}" + peer_hwsku: "{{device_info['HwSku']}}" + peer_mgmt: "{{device_info['mgmtip']}}" + +- name: Add queue index 3 to pfc_queue_indices when seed is an odd number + set_fact: + pfc_queue_indices: "{{pfc_queue_indices + [3]}}" + when: seed | int is odd + +- set_fact: + pfc_gen_file: pfc_gen.py + +- name: Deploy pfc packet generater file to fanout switch + include: roles/test/tasks/pfc_wd/functional_test/deploy_pfc_pktgen.yml + +- set_fact: + peer_login: "{{switch_login[hwsku_map[peer_hwsku]]}}" + + +- name: PFC storming from fanout switch per queue + include: roles/test/tasks/pfc_wd/functional_test/storm_from_neighbor_perq.yml + vars: + pfc_queue_index: "{{item}}" + with_items: "{{pfc_queue_indices}}" diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/storm_from_neighbor_perq.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/storm_from_neighbor_perq.yml new file mode 100644 index 00000000000..61ed3d00dd1 --- /dev/null +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/storm_from_neighbor_perq.yml @@ -0,0 +1,32 @@ +- block: + - name: set pfc storm templates based on fanout platform sku + include: roles/test/tasks/pfc_wd/functional_test/set_pfc_storm_templates.yml + + - set_fact: + storm_defer_time: "{{120 | random()}}" + + - name: Generate PFC storm on fanout switch with defer time {{pfc_storm_defer_time}}s + action: apswitch template="{{pfc_wd_storm_template}}" + args: + host: "{{peer_mgmt}}" + login: "{{peer_login}}" + connection: switch + vars: + pfc_storm_defer_time: "{{storm_defer_time}}" + + - name: Stop PFC storm on fanout switch with defer time {{pfc_storm_stop_defer_time}}s + action: apswitch template="{{pfc_wd_storm_stop_template}}" + args: + host: "{{peer_mgmt}}" + login: "{{peer_login}}" + connection: switch + vars: + pfc_storm_stop_defer_time: "{{125 | random(start=(storm_defer_time | int + 5))}}" + + rescue: + - name: Stop PFC storm on fanout switch + action: apswitch template="{{pfc_wd_storm_stop_template}}" + args: + host: "{{peer_mgmt}}" + login: "{{peer_login}}" + connection: switch From 07003d0c28c20f6891c970036c752bd95002c3a6 Mon Sep 17 00:00:00 2001 From: lguohan Date: Fri, 27 Sep 2019 16:35:03 +0200 Subject: [PATCH 121/218] Merge pull request #1119 from wendani/qos_sai_master Upstream QoS SAI test --- ansible/library/testing_port_ip_facts.py | 70 + .../files/brcm/64_interface_to_front_map.ini | 65 + .../brcm/d108c8_interface_to_front_map.ini | 121 ++ .../mlnx/default_interface_to_front_map.ini | 64 +- .../roles/test/files/mlnx/packets_aging.py | 91 + .../test/files/saitests/sai_qos_tests.py | 1885 +++++++++++++++++ ansible/roles/test/files/saitests/switch.py | 79 +- .../test/tasks/qos_get_max_buff_size.yml | 146 ++ ansible/roles/test/tasks/qos_get_ports.yml | 343 +++ ansible/roles/test/tasks/qos_sai.yml | 604 ++++++ ansible/roles/test/tasks/qos_sai_ptf.yml | 16 + .../roles/test/templates/qos_lossy_profile.j2 | 24 + .../roles/test/templates/qos_pfc_profile.j2 | 22 + ansible/roles/test/vars/testcases.yml | 4 + ansible/vars/qos.yml | 712 +++++++ 15 files changed, 4211 insertions(+), 35 deletions(-) create mode 100644 ansible/library/testing_port_ip_facts.py create mode 100644 ansible/roles/test/files/brcm/64_interface_to_front_map.ini create mode 100644 ansible/roles/test/files/brcm/d108c8_interface_to_front_map.ini create mode 100755 ansible/roles/test/files/mlnx/packets_aging.py create mode 100644 ansible/roles/test/files/saitests/sai_qos_tests.py create mode 100644 ansible/roles/test/tasks/qos_get_max_buff_size.yml create mode 100644 ansible/roles/test/tasks/qos_get_ports.yml create mode 100644 ansible/roles/test/tasks/qos_sai.yml create mode 100644 ansible/roles/test/tasks/qos_sai_ptf.yml create mode 100644 ansible/roles/test/templates/qos_lossy_profile.j2 create mode 100644 ansible/roles/test/templates/qos_pfc_profile.j2 create mode 100644 ansible/vars/qos.yml diff --git a/ansible/library/testing_port_ip_facts.py b/ansible/library/testing_port_ip_facts.py new file mode 100644 index 00000000000..a7c2fa71a9a --- /dev/null +++ b/ansible/library/testing_port_ip_facts.py @@ -0,0 +1,70 @@ +#!/usr/bin/python +import netaddr + +DOCUMENTATION = ''' +--- +module: testing_port_ip_facts +version_added: "1.1" +author: Wenda Ni (wenni@microsoft.com) +short_description: Retrive bgp peer ip facts +description: + - Retrieve bgp peer ip for the ptf interfaces, indexed in testing_ports_id. + The ips are to be used as the src or dst port ips in ptf-generated packets. +options: + testing_ports_id: + description: a sublist of ptf_interfaces. + required: true + dut_switch_ports: + description: a list + required: true + minigraph_bgp: + description: a list + required: true + minigraph_neighbors: + description: a map + required: true +''' + +EXAMPLES = ''' +Retrieve bgp peer ips +- name: Get testing port IPs + testing_port_ip_facts: + testing_ports_id: "{{ testing_ports_id }}" + dut_switch_ports: "{{ dut_switch_ports }}" + minigraph_bgp: "{{ minigraph_bgp }}" + minigraph_neighbors: "{{ minigraph_neighbors }}" + connection: local +''' + + +def main(): + module = AnsibleModule( + argument_spec=dict( + testing_ports_id=dict(required=True), + dut_switch_ports=dict(required=True), + minigraph_bgp=dict(reguired=True), + minigraph_neighbors=dict(reguired=True), + ), + supports_check_mode=True + ) + + m_args = module.params + testing_ports_id = m_args['testing_ports_id'] + dut_switch_ports = m_args['dut_switch_ports'] + minigraph_bgp = m_args['minigraph_bgp'] + minigraph_neighbors = m_args['minigraph_neighbors'] + + testing_ports_ip = {} + + for port_id in testing_ports_id: + for peer in minigraph_bgp: + if peer['name'] == minigraph_neighbors[dut_switch_ports[int(port_id)]]['name'] and netaddr.valid_ipv4(peer['addr']): + testing_ports_ip[port_id] = peer['addr'] + break + + module.exit_json(ansible_facts={'testing_ports_ip': testing_ports_ip}) + +from ansible.module_utils.basic import * +if __name__== "__main__": + main() + diff --git a/ansible/roles/test/files/brcm/64_interface_to_front_map.ini b/ansible/roles/test/files/brcm/64_interface_to_front_map.ini new file mode 100644 index 00000000000..5e37187d106 --- /dev/null +++ b/ansible/roles/test/files/brcm/64_interface_to_front_map.ini @@ -0,0 +1,65 @@ +# ptf host interface @ switch front port name +0@Ethernet0 +1@Ethernet1 +2@Ethernet2 +3@Ethernet3 +4@Ethernet4 +5@Ethernet5 +6@Ethernet6 +7@Ethernet7 +8@Ethernet8 +9@Ethernet9 +10@Ethernet10 +11@Ethernet11 +12@Ethernet12 +13@Ethernet13 +14@Ethernet14 +15@Ethernet15 +16@Ethernet16 +17@Ethernet17 +18@Ethernet18 +19@Ethernet19 +20@Ethernet20 +21@Ethernet21 +22@Ethernet22 +23@Ethernet23 +24@Ethernet24 +25@Ethernet25 +26@Ethernet26 +27@Ethernet27 +28@Ethernet28 +29@Ethernet29 +30@Ethernet30 +31@Ethernet31 +32@Ethernet32 +33@Ethernet33 +34@Ethernet34 +35@Ethernet35 +36@Ethernet36 +37@Ethernet37 +38@Ethernet38 +39@Ethernet39 +40@Ethernet40 +41@Ethernet41 +42@Ethernet42 +43@Ethernet43 +44@Ethernet44 +45@Ethernet45 +46@Ethernet46 +47@Ethernet47 +48@Ethernet48 +49@Ethernet49 +50@Ethernet50 +51@Ethernet51 +52@Ethernet52 +53@Ethernet53 +54@Ethernet54 +55@Ethernet55 +56@Ethernet56 +57@Ethernet57 +58@Ethernet58 +59@Ethernet59 +60@Ethernet60 +61@Ethernet61 +62@Ethernet62 +63@Ethernet63 diff --git a/ansible/roles/test/files/brcm/d108c8_interface_to_front_map.ini b/ansible/roles/test/files/brcm/d108c8_interface_to_front_map.ini new file mode 100644 index 00000000000..f43acaf4e3b --- /dev/null +++ b/ansible/roles/test/files/brcm/d108c8_interface_to_front_map.ini @@ -0,0 +1,121 @@ +# ptf host interface @ switch front port name +0@Ethernet0 +1@Ethernet2 +2@Ethernet4 +3@Ethernet6 +4@Ethernet8 +5@Ethernet10 +6@Ethernet12 +7@Ethernet14 +8@Ethernet16 +9@Ethernet18 +10@Ethernet20 +11@Ethernet22 +12@Ethernet24 +13@Ethernet26 +14@Ethernet28 +15@Ethernet30 +16@Ethernet32 +17@Ethernet34 +18@Ethernet36 +19@Ethernet38 +20@Ethernet40 +21@Ethernet42 +22@Ethernet44 +23@Ethernet46 +24@Ethernet48 +25@Ethernet52 +26@Ethernet56 +27@Ethernet60 +28@Ethernet64 +29@Ethernet68 +30@Ethernet72 +31@Ethernet76 +32@Ethernet80 +33@Ethernet82 +34@Ethernet84 +35@Ethernet86 +36@Ethernet88 +37@Ethernet90 +38@Ethernet92 +39@Ethernet94 +40@Ethernet96 +41@Ethernet98 +42@Ethernet100 +43@Ethernet102 +44@Ethernet104 +45@Ethernet106 +46@Ethernet108 +47@Ethernet110 +48@Ethernet112 +49@Ethernet114 +50@Ethernet116 +51@Ethernet118 +52@Ethernet120 +53@Ethernet122 +54@Ethernet124 +55@Ethernet126 +56@Ethernet128 +57@Ethernet130 +58@Ethernet132 +59@Ethernet134 +60@Ethernet136 +61@Ethernet138 +62@Ethernet140 +63@Ethernet142 +64@Ethernet144 +65@Ethernet146 +66@Ethernet148 +67@Ethernet150 +68@Ethernet152 +69@Ethernet154 +70@Ethernet156 +71@Ethernet158 +72@Ethernet160 +73@Ethernet162 +74@Ethernet164 +75@Ethernet166 +76@Ethernet168 +77@Ethernet170 +78@Ethernet172 +79@Ethernet174 +80@Ethernet176 +81@Ethernet178 +82@Ethernet180 +83@Ethernet182 +84@Ethernet184 +85@Ethernet186 +86@Ethernet188 +87@Ethernet190 +88@Ethernet192 +89@Ethernet194 +90@Ethernet196 +91@Ethernet198 +92@Ethernet200 +93@Ethernet202 +94@Ethernet204 +95@Ethernet206 +96@Ethernet208 +97@Ethernet210 +98@Ethernet212 +99@Ethernet214 +100@Ethernet216 +101@Ethernet218 +102@Ethernet220 +103@Ethernet222 +104@Ethernet224 +105@Ethernet226 +106@Ethernet228 +107@Ethernet230 +108@Ethernet232 +109@Ethernet234 +110@Ethernet236 +111@Ethernet238 +112@Ethernet240 +113@Ethernet242 +114@Ethernet244 +115@Ethernet246 +116@Ethernet248 +117@Ethernet250 +118@Ethernet252 +119@Ethernet254 diff --git a/ansible/roles/test/files/mlnx/default_interface_to_front_map.ini b/ansible/roles/test/files/mlnx/default_interface_to_front_map.ini index 8ca7d1a774d..0db110025b8 100644 --- a/ansible/roles/test/files/mlnx/default_interface_to_front_map.ini +++ b/ansible/roles/test/files/mlnx/default_interface_to_front_map.ini @@ -1,33 +1,33 @@ # ptf host interface @ switch front port name -0@Ethernet1 -1@Ethernet2 -2@Ethernet3 -3@Ethernet4 -4@Ethernet5 -5@Ethernet6 -6@Ethernet7 -7@Ethernet8 -8@Ethernet9 -9@Ethernet10 -10@Ethernet11 -11@Ethernet12 -12@Ethernet13 -13@Ethernet14 -14@Ethernet15 -15@Ethernet16 -16@Ethernet17 -17@Ethernet18 -18@Ethernet19 -19@Ethernet20 -20@Ethernet21 -21@Ethernet22 -22@Ethernet23 -23@Ethernet24 -24@Ethernet25 -25@Ethernet26 -26@Ethernet27 -27@Ethernet28 -28@Ethernet29 -29@Ethernet30 -30@Ethernet31 -31@Ethernet32 \ No newline at end of file +0@Ethernet0 +1@Ethernet4 +2@Ethernet8 +3@Ethernet12 +4@Ethernet16 +5@Ethernet20 +6@Ethernet24 +7@Ethernet28 +8@Ethernet32 +9@Ethernet36 +10@Ethernet40 +11@Ethernet44 +12@Ethernet48 +13@Ethernet52 +14@Ethernet56 +15@Ethernet60 +16@Ethernet64 +17@Ethernet68 +18@Ethernet72 +19@Ethernet76 +20@Ethernet80 +21@Ethernet84 +22@Ethernet88 +23@Ethernet92 +24@Ethernet96 +25@Ethernet100 +26@Ethernet104 +27@Ethernet108 +28@Ethernet112 +29@Ethernet116 +30@Ethernet120 +31@Ethernet124 diff --git a/ansible/roles/test/files/mlnx/packets_aging.py b/ansible/roles/test/files/mlnx/packets_aging.py new file mode 100755 index 00000000000..584962a3d26 --- /dev/null +++ b/ansible/roles/test/files/mlnx/packets_aging.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python +''' +This file contains Python script to enable/disable packets aging in queues(buffers?). +''' + +import sys, errno +import os +import argparse +from python_sdk_api.sx_api import (SX_STATUS_SUCCESS, + sx_api_open, + sx_api_port_device_get, + sx_api_port_sll_set, + sx_api_port_sll_get, + sx_api_port_hll_set, + new_sx_port_attributes_t_arr, + new_uint32_t_p, + uint32_t_p_assign, + uint32_t_p_value, + sx_port_attributes_t_arr_getitem) + +parser = argparse.ArgumentParser(description='Toggle Mellanox-specific packet aging on egress queues') +parser.add_argument('command', choices=['enable', 'disable'], type=str, help='Enable/Disable packet aging') +args = parser.parse_args() + +# Open SDK +rc, handle = sx_api_open(None) +if (rc != SX_STATUS_SUCCESS): + print >> sys.stderr, "Failed to open api handle.\nPlease check that SDK is running." + sys.exit(errno.EACCES) + +# Get list of ports +port_attributes_list = new_sx_port_attributes_t_arr(64) +port_cnt_p = new_uint32_t_p() +uint32_t_p_assign(port_cnt_p, 64) + +rc = sx_api_port_device_get(handle, 1 , 0, port_attributes_list, port_cnt_p) +if (rc != SX_STATUS_SUCCESS): + print >> sys.stderr, "An error returned by sx_api_port_device_get." + sys.exit() +port_cnt = uint32_t_p_value(port_cnt_p) + +set_mode = False +if args.command == "enable": # enable packets aging + sll_time = 0x418937 + hll_time = 0x83127 + hll_stall = 7 + set_mode = True +else: + assert args.command == "disable" # disable packets aging + sll_time = 0xffffffffffffffff + hll_time = 0xffffffff + hll_stall = 0 + set_mode = True + +if set_mode: + rc = sx_api_port_sll_set(handle, sll_time) + if (rc != SX_STATUS_SUCCESS): + print >> sys.stderr, "An error returned by sx_api_port_sll_set." + sys.exit() +else: + sll_p = new_uint64_t_p() + rc = sx_api_port_sll_get(handle, sll_p) + if (rc != SX_STATUS_SUCCESS): + print >> sys.stderr, "An error returned by sx_api_port_sll_get." + sys.exit() + else: + sll = uint64_t_p_value(sll_p) + print >> sys.stderr, ("sll_max_time=0x%X" % sll) + +for i in range(0, port_cnt): + port_attributes = sx_port_attributes_t_arr_getitem(port_attributes_list,i) + log_port = int(port_attributes.log_port) + if log_port < 0xFFFFF: # only physical ports + if set_mode: + rc = sx_api_port_hll_set(handle, log_port, hll_time, hll_stall) + if (rc != SX_STATUS_SUCCESS): + print >> sys.stderr, "An error returned by sx_api_port_hll_set." + sys.exit() + else: + hll_max_time_p = new_uint32_t_p() + hll_stall_cnt_p = new_uint32_t_p() + rc = sx_api_port_hll_get(handle,log_port, hll_max_time_p, hll_stall_cnt_p) + if (rc != SX_STATUS_SUCCESS): + print >> sys.stderr, "An error returned by sx_api_port_hll_set." + sys.exit() + else: + hll_max_time = uint32_t_p_value(hll_max_time_p) + hll_stall_cnt = uint32_t_p_value(hll_stall_cnt_p) + print >> sys.stderr, ("Port%d(Ethernet%d, logical:0x%X) hll_time:0x%X, hll_stall:0x%X" % + (port_attributes.port_mapping.module_port, (port_attributes.port_mapping.module_port * 4), + log_port, hll_max_time, hll_stall_cnt)) diff --git a/ansible/roles/test/files/saitests/sai_qos_tests.py b/ansible/roles/test/files/saitests/sai_qos_tests.py new file mode 100644 index 00000000000..6e7eb0dbb16 --- /dev/null +++ b/ansible/roles/test/files/saitests/sai_qos_tests.py @@ -0,0 +1,1885 @@ +""" +SONiC Dataplane Qos tests +""" + +import time +import logging +import ptf.packet as scapy +import socket +import ptf.dataplane as dataplane +import sai_base_test +import operator +import sys +from ptf.testutils import (ptf_ports, + simple_arp_packet, + send_packet, + simple_tcp_packet, + simple_qinq_tcp_packet) +from ptf.mask import Mask +from switch import (switch_init, + sai_thrift_create_scheduler_profile, + sai_thrift_clear_all_counters, + sai_thrift_read_port_counters, + sai_port_list, + port_list, + sai_thrift_read_port_watermarks, + sai_thrift_read_pg_counters, + sai_thrift_read_buffer_pool_watermark) +from switch_sai_thrift.ttypes import (sai_thrift_attribute_value_t, + sai_thrift_attribute_t) +from switch_sai_thrift.sai_headers import (SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, + SAI_PORT_ATTR_PKT_TX_ENABLE) + +# Counters +# The index number comes from the append order in sai_thrift_read_port_counters +EGRESS_DROP = 0 +INGRESS_DROP = 1 +PFC_PRIO_3 = 5 +PFC_PRIO_4 = 6 +TRANSMITTED_OCTETS = 10 +TRANSMITTED_PKTS = 11 +QUEUE_0 = 0 +QUEUE_1 = 1 +QUEUE_2 = 2 +QUEUE_3 = 3 +QUEUE_4 = 4 +QUEUE_5 = 5 +QUEUE_6 = 6 +PG_NUM = 8 +QUEUE_NUM = 8 + +# Constants +STOP_PORT_MAX_RATE = 1 +RELEASE_PORT_MAX_RATE = 0 +ECN_INDEX_IN_HEADER = 53 # Fits the ptf hex_dump_buffer() parse function +DSCP_INDEX_IN_HEADER = 52 # Fits the ptf hex_dump_buffer() parse function + + +class ARPpopulate(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + router_mac = self.test_params['router_mac'] + # ARP Populate + index = 0 + for port in ptf_ports(): + arpreq_pkt = simple_arp_packet( + eth_dst='ff:ff:ff:ff:ff:ff', + eth_src=self.dataplane.get_mac(port[0],port[1]), + arp_op=1, + ip_snd='10.0.0.%d' % (index * 2 + 1), + ip_tgt='10.0.0.%d' % (index * 2), + hw_snd=self.dataplane.get_mac(port[0], port[1]), + hw_tgt='00:00:00:00:00:00') + send_packet(self, port[1], arpreq_pkt) + index += 1 + +class ReleaseAllPorts(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + switch_init(self.client) + + asic_type = self.test_params['sonic_asic_type'] + + if asic_type == 'mellanox': + sched_prof_id=sai_thrift_create_scheduler_profile(self.client, RELEASE_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + else: + # Resume egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=1) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + + for port in sai_port_list: + self.client.sai_thrift_set_port_attribute(port, attr) + +# DSCP to queue mapping +class DscpMappingPB(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + switch_init(self.client) + + router_mac = self.test_params['router_mac'] + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + print >> sys.stderr, "dst_port_id: %d, src_port_id: %d" % (dst_port_id, src_port_id) + print >> sys.stderr, "dst_port_mac: %s, src_port_mac: %s, src_port_ip: %s, dst_port_ip: %s" % (dst_port_mac, src_port_mac, src_port_ip, dst_port_ip) + exp_ip_id = 101 + exp_ttl = 63 + + # Get a snapshot of counter values + # port_results is not of our interest here + port_results, queue_results_base = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + + # DSCP Mapping test + try: + for dscp in range(0, 64): + tos = (dscp << 2) + tos |= 1 + pkt = simple_tcp_packet(pktlen=64, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_id=exp_ip_id, + ip_ttl=exp_ttl + 1 if router_mac != '' else exp_ttl) + send_packet(self, src_port_id, pkt, 1) + print >> sys.stderr, "dscp: %d, calling send_packet()" % (tos >> 2) + + cnt = 0 + dscp_received = False + while not dscp_received: + result = self.dataplane.poll(device_number=0, port_number=dst_port_id, timeout=3) + if isinstance(result, self.dataplane.PollFailure): + self.fail("Expected packet was not received on port %d. Total received: %d.\n%s" % (dst_port_id, cnt, result.format())) + recv_pkt = scapy.Ether(result.packet) + cnt += 1 + + # Verify dscp flag + try: + if (recv_pkt.payload.tos == tos) and (recv_pkt.payload.src == src_port_ip) and (recv_pkt.payload.dst == dst_port_ip) and \ + (recv_pkt.payload.ttl == exp_ttl) and (recv_pkt.payload.id == exp_ip_id): + dscp_received = True + print >> sys.stderr, "dscp: %d, total received: %d" % (tos >> 2, cnt) + except AttributeError: + print >> sys.stderr, "dscp: %d, total received: %d, attribute error!" % (tos >> 2, cnt) + continue + + # Read Counters + port_results, queue_results = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + + print >> sys.stderr, map(operator.sub, queue_results, queue_results_base) + # According to SONiC configuration all dscp are classified to queue 1 except: + # dscp 8 -> queue 0 + # dscp 5 -> queue 2 + # dscp 3 -> queue 3 + # dscp 4 -> queue 4 + # dscp 46 -> queue 5 + # dscp 48 -> queue 6 + # So for the 64 pkts sent the mapping should be -> 58 queue 1, and 1 for queue0, queue2, queue3, queue4, queue5, and queue6 + # Check results + assert(queue_results[QUEUE_0] == 1 + queue_results_base[QUEUE_0]) + assert(queue_results[QUEUE_1] == 58 + queue_results_base[QUEUE_1]) + assert(queue_results[QUEUE_2] == 1 + queue_results_base[QUEUE_2]) + assert(queue_results[QUEUE_3] == 1 + queue_results_base[QUEUE_3]) + assert(queue_results[QUEUE_4] == 1 + queue_results_base[QUEUE_4]) + assert(queue_results[QUEUE_5] == 1 + queue_results_base[QUEUE_5]) + assert(queue_results[QUEUE_6] == 1 + queue_results_base[QUEUE_6]) + + finally: + print >> sys.stderr, "END OF TEST" + +# DOT1P to queue mapping +class Dot1pToQueueMapping(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + switch_init(self.client) + + # Parse input parameters + router_mac = self.test_params['router_mac'] + print >> sys.stderr, "router_mac: %s" % (router_mac) + + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + print >> sys.stderr, "dst_port_id: %d, src_port_id: %d" % (dst_port_id, src_port_id) + print >> sys.stderr, "dst_port_mac: %s, src_port_mac: %s, src_port_ip: %s, dst_port_ip: %s" % (dst_port_mac, src_port_mac, src_port_ip, dst_port_ip) + vlan_id = int(self.test_params['vlan_id']) + + exp_ip_id = 102 + exp_ttl = 63 + + # According to SONiC configuration dot1ps are classified as follows: + # dot1p 0 -> queue 0 + # dot1p 1 -> queue 6 + # dot1p 2 -> queue 5 + # dot1p 3 -> queue 3 + # dot1p 4 -> queue 4 + # dot1p 5 -> queue 2 + # dot1p 6 -> queue 1 + # dot1p 7 -> queue 0 + queue_dot1p_map = { + 0 : [0, 7], + 1 : [6], + 2 : [5], + 3 : [3], + 4 : [4], + 5 : [2], + 6 : [1] + } + print >> sys.stderr, queue_dot1p_map + + try: + for queue, dot1ps in queue_dot1p_map.items(): + port_results, queue_results_base = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + + # send pkts with dot1ps that map to the same queue + for dot1p in dot1ps: + # ecn marked + tos = 1 + # Note that vlan tag can be stripped by a switch. + # To embrace this situation, we assemble a q-in-q double-tagged packet, + # and write the dot1p info into both vlan tags so that + # when we receive the packet we do not need to make any assumption + # on whether the outer tag is stripped by the switch or not, or + # more importantly, we do not need to care about, as in the single-tagged + # case, whether the immediate payload is the vlan tag or the ip + # header to determine the valid fields for receive validation + # purpose. With a q-in-q packet, we are sure that the next layer of + # header in either switching behavior case is still a vlan tag + pkt = simple_qinq_tcp_packet(pktlen=64, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + dl_vlan_outer=vlan_id, + dl_vlan_pcp_outer=dot1p, + vlan_vid=vlan_id, + vlan_pcp=dot1p, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_ttl=exp_ttl + 1 if router_mac != '' else exp_ttl) + send_packet(self, src_port_id, pkt, 1) + print >> sys.stderr, "dot1p: %d, calling send_packet" % (dot1p) + + # validate queue counters increment by the correct pkt num + time.sleep(8) + port_results, queue_results = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + print >> sys.stderr, queue_results_base + print >> sys.stderr, queue_results + print >> sys.stderr, map(operator.sub, queue_results, queue_results_base) + for i in range(0, QUEUE_NUM): + if i == queue: + assert(queue_results[queue] == queue_results_base[queue] + len(dot1ps)) + else: + assert(queue_results[i] == queue_results_base[i]) + + # confirm that dot1p pkts sent are received + total_recv_cnt = 0 + dot1p_recv_cnt = 0 + while dot1p_recv_cnt < len(dot1ps): + result = self.dataplane.poll(device_number=0, port_number=dst_port_id, timeout=3) + if isinstance(result, self.dataplane.PollFailure): + self.fail("Expected packet was not received on port %d. Total received: %d.\n%s" % (dst_port_id, total_recv_cnt, result.format())) + recv_pkt = scapy.Ether(result.packet) + total_recv_cnt += 1 + + # verify dot1p priority + dot1p = dot1ps[dot1p_recv_cnt] + try: + if (recv_pkt.payload.prio == dot1p) and (recv_pkt.payload.vlan == vlan_id): + + dot1p_recv_cnt += 1 + print >> sys.stderr, "dot1p: %d, total received: %d" % (dot1p, total_recv_cnt) + + except AttributeError: + print >> sys.stderr, "dot1p: %d, total received: %d, attribute error!" % (dot1p, total_recv_cnt) + continue + + finally: + print >> sys.stderr, "END OF TEST" + +# DSCP to pg mapping +class DscpToPgMapping(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + switch_init(self.client) + + # Parse input parameters + router_mac = self.test_params['router_mac'] + print >> sys.stderr, "router_mac: %s" % (router_mac) + + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + print >> sys.stderr, "dst_port_id: %d, src_port_id: %d" % (dst_port_id, src_port_id) + print >> sys.stderr, "dst_port_mac: %s, src_port_mac: %s, src_port_ip: %s, dst_port_ip: %s" % (dst_port_mac, src_port_mac, src_port_ip, dst_port_ip) + + exp_ip_id = 100 + exp_ttl = 63 + + # According to SONiC configuration all dscps are classified to pg 0 except: + # dscp 3 -> pg 3 + # dscp 4 -> pg 4 + # So for the 64 pkts sent the mapping should be -> 62 pg 0, 1 for pg 3, and 1 for pg 4 + lossy_dscps = range(0, 64) + lossy_dscps.remove(3) + lossy_dscps.remove(4) + pg_dscp_map = { + 3 : [3], + 4 : [4], + 0 : lossy_dscps + } + print >> sys.stderr, pg_dscp_map + + try: + for pg, dscps in pg_dscp_map.items(): + pg_cntrs_base = sai_thrift_read_pg_counters(self.client, port_list[src_port_id]) + + # send pkts with dscps that map to the same pg + for dscp in dscps: + tos = (dscp << 2) + tos |= 1 + pkt = simple_tcp_packet(pktlen=64, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_id=exp_ip_id, + ip_ttl=exp_ttl + 1 if router_mac != '' else exp_ttl) + send_packet(self, src_port_id, pkt, 1) + print >> sys.stderr, "dscp: %d, calling send_packet" % (tos >> 2) + + # validate pg counters increment by the correct pkt num + time.sleep(8) + pg_cntrs = sai_thrift_read_pg_counters(self.client, port_list[src_port_id]) + print >> sys.stderr, pg_cntrs_base + print >> sys.stderr, pg_cntrs + print >> sys.stderr, map(operator.sub, pg_cntrs, pg_cntrs_base) + for i in range(0, PG_NUM): + if i == pg: + assert(pg_cntrs[pg] == pg_cntrs_base[pg] + len(dscps)) + else: + assert(pg_cntrs[i] == pg_cntrs_base[i]) + + # confirm that dscp pkts are received + total_recv_cnt = 0 + dscp_recv_cnt = 0 + while dscp_recv_cnt < len(dscps): + result = self.dataplane.poll(device_number=0, port_number=dst_port_id, timeout=3) + if isinstance(result, self.dataplane.PollFailure): + self.fail("Expected packet was not received on port %d. Total received: %d.\n%s" % (dst_port_id, total_recv_cnt, result.format())) + recv_pkt = scapy.Ether(result.packet) + total_recv_cnt += 1 + + # verify dscp flag + tos = dscps[dscp_recv_cnt] << 2 + tos |= 1 + try: + if (recv_pkt.payload.tos == tos) and (recv_pkt.payload.src == src_port_ip) and (recv_pkt.payload.dst == dst_port_ip) and \ + (recv_pkt.payload.ttl == exp_ttl) and (recv_pkt.payload.id == exp_ip_id): + + dscp_recv_cnt += 1 + print >> sys.stderr, "dscp: %d, total received: %d" % (tos >> 2, total_recv_cnt) + + except AttributeError: + print >> sys.stderr, "dscp: %d, total received: %d, attribute error!" % (tos >> 2, total_recv_cnt) + continue + + finally: + print >> sys.stderr, "END OF TEST" + +# DOT1P to pg mapping +class Dot1pToPgMapping(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + switch_init(self.client) + + # Parse input parameters + router_mac = self.test_params['router_mac'] + print >> sys.stderr, "router_mac: %s" % (router_mac) + + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + print >> sys.stderr, "dst_port_id: %d, src_port_id: %d" % (dst_port_id, src_port_id) + print >> sys.stderr, "dst_port_mac: %s, src_port_mac: %s, src_port_ip: %s, dst_port_ip: %s" % (dst_port_mac, src_port_mac, src_port_ip, dst_port_ip) + vlan_id = int(self.test_params['vlan_id']) + + exp_ip_id = 103 + exp_ttl = 63 + + # According to SONiC configuration dot1ps are classified as follows: + # dot1p 0 -> pg 0 + # dot1p 1 -> pg 0 + # dot1p 2 -> pg 0 + # dot1p 3 -> pg 3 + # dot1p 4 -> pg 4 + # dot1p 5 -> pg 0 + # dot1p 6 -> pg 0 + # dot1p 7 -> pg 0 + pg_dot1p_map = { + 0 : [0, 1, 2, 5, 6, 7], + 3 : [3], + 4 : [4] + } + print >> sys.stderr, pg_dot1p_map + + try: + for pg, dot1ps in pg_dot1p_map.items(): + pg_cntrs_base = sai_thrift_read_pg_counters(self.client, port_list[src_port_id]) + + # send pkts with dot1ps that map to the same pg + for dot1p in dot1ps: + # ecn marked + tos = 1 + # Note that vlan tag can be stripped by a switch. + # To embrace this situation, we assemble a q-in-q double-tagged packet, + # and write the dot1p info into both vlan tags so that + # when we receive the packet we do not need to make any assumption + # on whether the outer tag is stripped by the switch or not, or + # more importantly, we do not need to care about, as in the single-tagged + # case, whether the immediate payload is the vlan tag or the ip + # header to determine the valid fields for receive validation + # purpose. With a q-in-q packet, we are sure that the next layer of + # header in either switching behavior case is still a vlan tag + pkt = simple_qinq_tcp_packet(pktlen=64, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + dl_vlan_outer=vlan_id, + dl_vlan_pcp_outer=dot1p, + vlan_vid=vlan_id, + vlan_pcp=dot1p, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_ttl=exp_ttl + 1 if router_mac != '' else exp_ttl) + send_packet(self, src_port_id, pkt, 1) + print >> sys.stderr, "dot1p: %d, calling send_packet" % (dot1p) + + # validate pg counters increment by the correct pkt num + time.sleep(8) + pg_cntrs = sai_thrift_read_pg_counters(self.client, port_list[src_port_id]) + print >> sys.stderr, pg_cntrs_base + print >> sys.stderr, pg_cntrs + print >> sys.stderr, map(operator.sub, pg_cntrs, pg_cntrs_base) + for i in range(0, PG_NUM): + if i == pg: + assert(pg_cntrs[pg] == pg_cntrs_base[pg] + len(dot1ps)) + else: + assert(pg_cntrs[i] == pg_cntrs_base[i]) + + # confirm that dot1p pkts sent are received + total_recv_cnt = 0 + dot1p_recv_cnt = 0 + while dot1p_recv_cnt < len(dot1ps): + result = self.dataplane.poll(device_number=0, port_number=dst_port_id, timeout=3) + if isinstance(result, self.dataplane.PollFailure): + self.fail("Expected packet was not received on port %d. Total received: %d.\n%s" % (dst_port_id, total_recv_cnt, result.format())) + recv_pkt = scapy.Ether(result.packet) + total_recv_cnt += 1 + + # verify dot1p priority + dot1p = dot1ps[dot1p_recv_cnt] + try: + if (recv_pkt.payload.prio == dot1p) and (recv_pkt.payload.vlan == vlan_id): + + dot1p_recv_cnt += 1 + print >> sys.stderr, "dot1p: %d, total received: %d" % (dot1p, total_recv_cnt) + + except AttributeError: + print >> sys.stderr, "dot1p: %d, total received: %d, attribute error!" % (dot1p, total_recv_cnt) + continue + + finally: + print >> sys.stderr, "END OF TEST" + +# This test is to measure the Xoff threshold, and buffer limit +class PFCtest(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + time.sleep(5) + switch_init(self.client) + + # Parse input parameters + dscp = int(self.test_params['dscp']) + ecn = int(self.test_params['ecn']) + router_mac = self.test_params['router_mac'] + pg = int(self.test_params['pg']) + 2 # The pfc counter index starts from index 2 in sai_thrift_read_port_counters + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + max_buffer_size = int(self.test_params['buffer_max_size']) + max_queue_size = int(self.test_params['queue_max_size']) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + asic_type = self.test_params['sonic_asic_type'] + pkts_num_leak_out = int(self.test_params['pkts_num_leak_out']) + pkts_num_trig_pfc = int(self.test_params['pkts_num_trig_pfc']) + pkts_num_trig_ingr_drp = int(self.test_params['pkts_num_trig_ingr_drp']) + + # Prepare TCP packet data + tos = dscp << 2 + tos |= ecn + ttl = 64 + default_packet_length = 64 + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_ttl=ttl) + # get a snapshot of counter values at recv and transmit ports + # queue_counters value is not of our interest here + recv_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # Add slight tolerance in threshold characterization to consider + # the case that cpu puts packets in the egress queue after we pause the egress + # or the leak out is simply less than expected as we have occasionally observed + margin = 2 + + if asic_type == 'mellanox': + # Close DST port + sched_prof_id = sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + else: + # Pause egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=0) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + + try: + # send packets short of triggering pfc + send_packet(self, src_port_id, pkt, pkts_num_leak_out + pkts_num_trig_pfc - 1 - margin) + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + # get a snapshot of counter values at recv and transmit ports + # queue counters value is not of our interest here + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # recv port no pfc + assert(recv_counters[pg] == recv_counters_base[pg]) + # recv port no ingress drop + assert(recv_counters[INGRESS_DROP] == recv_counters_base[INGRESS_DROP]) + # xmit port no egress drop + assert(xmit_counters[EGRESS_DROP] == xmit_counters_base[EGRESS_DROP]) + + # send 1 packet to trigger pfc + send_packet(self, src_port_id, pkt, 1 + 2 * margin) + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + # get a snapshot of counter values at recv and transmit ports + # queue counters value is not of our interest here + recv_counters_base = recv_counters + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # recv port pfc + assert(recv_counters[pg] > recv_counters_base[pg]) + # recv port no ingress drop + assert(recv_counters[INGRESS_DROP] == recv_counters_base[INGRESS_DROP]) + # xmit port no egress drop + assert(xmit_counters[EGRESS_DROP] == xmit_counters_base[EGRESS_DROP]) + + # send packets short of ingress drop + send_packet(self, src_port_id, pkt, pkts_num_trig_ingr_drp - pkts_num_trig_pfc - 1 - 2 * margin) + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + # get a snapshot of counter values at recv and transmit ports + # queue counters value is not of our interest here + recv_counters_base = recv_counters + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # recv port pfc + assert(recv_counters[pg] > recv_counters_base[pg]) + # recv port no ingress drop + assert(recv_counters[INGRESS_DROP] == recv_counters_base[INGRESS_DROP]) + # xmit port no egress drop + assert(xmit_counters[EGRESS_DROP] == xmit_counters_base[EGRESS_DROP]) + + # send 1 packet to trigger ingress drop + send_packet(self, src_port_id, pkt, 1 + 2 * margin) + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + # get a snapshot of counter values at recv and transmit ports + # queue counters value is not of our interest here + recv_counters_base = recv_counters + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # recv port pfc + assert(recv_counters[pg] > recv_counters_base[pg]) + # recv port ingress drop + assert(recv_counters[INGRESS_DROP] > recv_counters_base[INGRESS_DROP]) + # xmit port no egress drop + assert(xmit_counters[EGRESS_DROP] == xmit_counters_base[EGRESS_DROP]) + + finally: + if asic_type == 'mellanox': + # Release port + sched_prof_id = sai_thrift_create_scheduler_profile(self.client,RELEASE_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id],attr) + else: + # Resume egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=1) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + +# This test looks to measure xon threshold (pg_reset_floor) +class PFCXonTest(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + time.sleep(5) + switch_init(self.client) + last_pfc_counter = 0 + recv_port_counters = [] + transmit_port_counters = [] + + # Parse input parameters + dscp = int(self.test_params['dscp']) + ecn = int(self.test_params['ecn']) + router_mac = self.test_params['router_mac'] + max_buffer_size = int(self.test_params['buffer_max_size']) + pg = int(self.test_params['pg']) + 2 # The pfc counter index starts from index 2 in sai_thrift_read_port_counters + + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + asic_type = self.test_params['sonic_asic_type'] + + tos = dscp << 2 + tos |= ecn + ttl = 64 + + # TODO: pass in dst_port_id and _ip as a list + dst_port_2_id = int(self.test_params['dst_port_2_id']) + dst_port_2_ip = self.test_params['dst_port_2_ip'] + dst_port_2_mac = self.dataplane.get_mac(0, dst_port_2_id) + dst_port_3_id = int(self.test_params['dst_port_3_id']) + dst_port_3_ip = self.test_params['dst_port_3_ip'] + dst_port_3_mac = self.dataplane.get_mac(0, dst_port_3_id) + pkts_num_leak_out = int(self.test_params['pkts_num_leak_out']) + pkts_num_trig_pfc = int(self.test_params['pkts_num_trig_pfc']) + pkts_num_dismiss_pfc = int(self.test_params['pkts_num_dismiss_pfc']) + default_packet_length = 64 + # get a snapshot of counter values at recv and transmit ports + # queue_counters value is not of our interest here + recv_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + xmit_2_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_2_id]) + xmit_3_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_3_id]) + # The number of packets that will trek into the headroom space; + # We observe in test that if the packets are sent to multiple destination ports, + # the ingress may not trigger PFC sharp at its boundary + margin = 1 + + if asic_type == 'mellanox': + # Stop function of dst xmit ports + sched_prof_id = sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_3_id], attr) + else: + # Pause egress of dut xmit ports + attr_value = sai_thrift_attribute_value_t(booldata=0) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_3_id], attr) + + try: + # send packets to dst port 0 + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_ttl=ttl) + send_packet(self, src_port_id, pkt, pkts_num_leak_out + pkts_num_trig_pfc - pkts_num_dismiss_pfc) + # send packets to dst port 1 + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_2_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_2_ip, + ip_tos=tos, + ip_ttl=ttl) + send_packet(self, src_port_id, pkt, pkts_num_leak_out + margin + pkts_num_dismiss_pfc - 1) + # send 1 packet to dst port 2 + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_3_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_3_ip, + ip_tos=tos, + ip_ttl=ttl) + send_packet(self, src_port_id, pkt, pkts_num_leak_out + 1) + + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + # get a snapshot of counter values at recv and transmit ports + # queue counters value is not of our interest here + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + xmit_2_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_2_id]) + xmit_3_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_3_id]) + # recv port pfc + assert(recv_counters[pg] > recv_counters_base[pg]) + # recv port no ingress drop + assert(recv_counters[INGRESS_DROP] == recv_counters_base[INGRESS_DROP]) + # xmit port no egress drop + assert(xmit_counters[EGRESS_DROP] == xmit_counters_base[EGRESS_DROP]) + assert(xmit_2_counters[EGRESS_DROP] == xmit_2_counters_base[EGRESS_DROP]) + assert(xmit_3_counters[EGRESS_DROP] == xmit_3_counters_base[EGRESS_DROP]) + + if asic_type == 'mellanox': + # Release dst port 1 + sched_prof_id=sai_thrift_create_scheduler_profile(self.client, RELEASE_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) + else: + # Resume egress of dst port 1 + attr_value = sai_thrift_attribute_value_t(booldata=1) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) + + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + # get a snapshot of counter values at recv and transmit ports + # queue counters value is not of our interest here + recv_counters_base = recv_counters + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + xmit_2_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_2_id]) + xmit_3_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_3_id]) + # recv port pfc + assert(recv_counters[pg] > recv_counters_base[pg]) + # recv port no ingress drop + assert(recv_counters[INGRESS_DROP] == recv_counters_base[INGRESS_DROP]) + # xmit port no egress drop + assert(xmit_counters[EGRESS_DROP] == xmit_counters_base[EGRESS_DROP]) + assert(xmit_2_counters[EGRESS_DROP] == xmit_2_counters_base[EGRESS_DROP]) + assert(xmit_3_counters[EGRESS_DROP] == xmit_3_counters_base[EGRESS_DROP]) + + if asic_type == 'mellanox': + # Release dst port 2 + sched_prof_id=sai_thrift_create_scheduler_profile(self.client, RELEASE_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_3_id], attr) + else: + # Resume egress of dst port 2 + attr_value = sai_thrift_attribute_value_t(booldata=1) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_3_id], attr) + + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + # get new base counter values at recv ports + # queue counters value is not of our interest here + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + assert(recv_counters[INGRESS_DROP] == recv_counters_base[INGRESS_DROP]) + recv_counters_base = recv_counters + + time.sleep(30) + # get a snapshot of counter values at recv and transmit ports + # queue counters value is not of our interest here + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + xmit_2_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_2_id]) + xmit_3_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_3_id]) + # recv port no pfc + assert(recv_counters[pg] == recv_counters_base[pg]) + # recv port no ingress drop + assert(recv_counters[INGRESS_DROP] == recv_counters_base[INGRESS_DROP]) + # xmit port no egress drop + assert(xmit_counters[EGRESS_DROP] == xmit_counters_base[EGRESS_DROP]) + assert(xmit_2_counters[EGRESS_DROP] == xmit_2_counters_base[EGRESS_DROP]) + assert(xmit_3_counters[EGRESS_DROP] == xmit_3_counters_base[EGRESS_DROP]) + + finally: + if asic_type == 'mellanox': + # Release dst ports + sched_prof_id=sai_thrift_create_scheduler_profile(self.client, RELEASE_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_3_id], attr) + else: + # Resume egress of dut xmit ports + attr_value = sai_thrift_attribute_value_t(booldata=1) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_3_id], attr) + +class HdrmPoolSizeTest(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + time.sleep(5) + switch_init(self.client) + + # Parse input parameters + dscps = self.test_params['dscps'] + ecn = self.test_params['ecn'] + router_mac = self.test_params['router_mac'] + pgs = [pg + 2 for pg in self.test_params['pgs']] # The pfc counter index starts from index 2 in sai_thrift_read_port_counters + src_port_ids = self.test_params['src_port_ids'] + src_port_ips = self.test_params['src_port_ips'] + print >> sys.stderr, src_port_ips + sys.stderr.flush() + + dst_port_id = self.test_params['dst_port_id'] + dst_port_ip = self.test_params['dst_port_ip'] + pgs_num = self.test_params['pgs_num'] + asic_type = self.test_params['sonic_asic_type'] + pkts_num_leak_out = self.test_params['pkts_num_leak_out'] + pkts_num_trig_pfc = self.test_params['pkts_num_trig_pfc'] + pkts_num_hdrm_full = self.test_params['pkts_num_hdrm_full'] + pkts_num_hdrm_partial = self.test_params['pkts_num_hdrm_partial'] + print >> sys.stderr, ("pkts num: leak_out: %d, trig_pfc: %d, hdrm_full: %d, hdrm_partial: %d" % (pkts_num_leak_out, pkts_num_trig_pfc, pkts_num_hdrm_full, pkts_num_hdrm_partial)) + sys.stderr.flush() + + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_macs = [self.dataplane.get_mac(0, ptid) for ptid in src_port_ids] + margin = 0 + sidx_dscp_pg_tuples = [(sidx, dscp, pgs[pgidx]) for sidx, sid in enumerate(src_port_ids) for pgidx, dscp in enumerate(dscps)] + assert(len(sidx_dscp_pg_tuples) >= pgs_num) + print >> sys.stderr, sidx_dscp_pg_tuples + sys.stderr.flush() + + # get a snapshot of counter values at recv and transmit ports + # queue_counters value is not of our interest here + recv_counters_bases = [sai_thrift_read_port_counters(self.client, port_list[sid])[0] for sid in src_port_ids] + xmit_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + + # Pause egress of dut xmit port + if asic_type == 'mellanox': + # Close DST port + sched_prof_id = sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + else: + # Pause egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=0) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + + try: + # send packets to leak out + sidx = 0 + pkt = simple_tcp_packet(pktlen=64, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_macs[sidx], + ip_src=src_port_ips[sidx], + ip_dst=dst_port_ip, + ip_ttl=64) + send_packet(self, src_port_ids[sidx], pkt, pkts_num_leak_out) + + # send packets to all pgs to fill the service pool + # and trigger PFC on all pgs + for i in range(0, pgs_num): + # Prepare TCP packet data + tos = sidx_dscp_pg_tuples[i][1] << 2 + tos |= ecn + ttl = 64 + default_packet_length = 64 + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_macs[sidx_dscp_pg_tuples[i][0]], + ip_src=src_port_ips[sidx_dscp_pg_tuples[i][0]], + ip_dst=dst_port_ip, + ip_tos=tos, + ip_ttl=ttl) + send_packet(self, src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, pkts_num_trig_pfc) + + print >> sys.stderr, "Service pool almost filled" + sys.stderr.flush() + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + + for i in range(0, pgs_num): + # Prepare TCP packet data + tos = sidx_dscp_pg_tuples[i][1] << 2 + tos |= ecn + ttl = 64 + default_packet_length = 64 + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_macs[sidx_dscp_pg_tuples[i][0]], + ip_src=src_port_ips[sidx_dscp_pg_tuples[i][0]], + ip_dst=dst_port_ip, + ip_tos=tos, + ip_ttl=ttl) + pkt_cnt = 0 + + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_ids[sidx_dscp_pg_tuples[i][0]]]) + while (recv_counters[sidx_dscp_pg_tuples[i][2]] == recv_counters_bases[sidx_dscp_pg_tuples[i][0]][sidx_dscp_pg_tuples[i][2]]) and (pkt_cnt < 10): + send_packet(self, src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, 1) + pkt_cnt += 1 + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + + # get a snapshot of counter values at recv and transmit ports + # queue_counters value is not of our interest here + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_ids[sidx_dscp_pg_tuples[i][0]]]) + + if pkt_cnt == 10: + sys.exit("Too many pkts needed to trigger pfc: %d" % (pkt_cnt)) + assert(recv_counters[sidx_dscp_pg_tuples[i][2]] > recv_counters_bases[sidx_dscp_pg_tuples[i][0]][sidx_dscp_pg_tuples[i][2]]) + print >> sys.stderr, "%d packets for sid: %d, pg: %d to trigger pfc" % (pkt_cnt, src_port_ids[sidx_dscp_pg_tuples[i][0]], sidx_dscp_pg_tuples[i][2] - 2) + sys.stderr.flush() + + print >> sys.stderr, "PFC triggered" + sys.stderr.flush() + + # send packets to all pgs to fill the headroom pool + for i in range(0, pgs_num): + # Prepare TCP packet data + tos = sidx_dscp_pg_tuples[i][1] << 2 + tos |= ecn + ttl = 64 + default_packet_length = 64 + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_macs[sidx_dscp_pg_tuples[i][0]], + ip_src=src_port_ips[sidx_dscp_pg_tuples[i][0]], + ip_dst=dst_port_ip, + ip_tos=tos, + ip_ttl=ttl) + + send_packet(self, src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, pkts_num_hdrm_full if i != pgs_num - 1 else pkts_num_hdrm_partial) + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_ids[sidx_dscp_pg_tuples[i][0]]]) + # assert no ingress drop + assert(recv_counters[INGRESS_DROP] == recv_counters_bases[sidx_dscp_pg_tuples[i][0]][INGRESS_DROP]) + + print >> sys.stderr, "all but the last pg hdrms filled" + sys.stderr.flush() + + # last pg + i = pgs_num - 1 + # send 1 packet on last pg to trigger ingress drop + send_packet(self, src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, 1 + 2 * margin) + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_ids[sidx_dscp_pg_tuples[i][0]]]) + # assert ingress drop + assert(recv_counters[INGRESS_DROP] > recv_counters_bases[sidx_dscp_pg_tuples[i][0]][INGRESS_DROP]) + + # assert no egress drop at the dut xmit port + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + assert(xmit_counters[EGRESS_DROP] == xmit_counters_base[EGRESS_DROP]) + + print >> sys.stderr, "pg hdrm filled" + sys.stderr.flush() + + finally: + if asic_type == 'mellanox': + # Release port + sched_prof_id = sai_thrift_create_scheduler_profile(self.client,RELEASE_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id],attr) + else: + # Resume egress of dur xmit port + attr_value = sai_thrift_attribute_value_t(booldata=1) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + +# TODO: remove sai_thrift_clear_all_counters and change to use incremental counter values +class DscpEcnSend(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + switch_init(self.client) + + # Parse input parameters + dscp = int(self.test_params['dscp']) + ecn = int(self.test_params['ecn']) + router_mac = self.test_params['router_mac'] + default_packet_length = 64 + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + num_of_pkts = self.test_params['num_of_pkts'] + limit = self.test_params['limit'] + min_limit = self.test_params['min_limit'] + cell_size = self.test_params['cell_size'] + + #STOP PORT FUNCTION + sched_prof_id=sai_thrift_create_scheduler_profile(self.client,STOP_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + + # Clear Counters + sai_thrift_clear_all_counters(self.client) + + #send packets + try: + tos = dscp << 2 + tos |= ecn + ttl = 64 + for i in range(0, num_of_pkts): + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_ttl=ttl) + send_packet(self, 0, pkt) + + leaking_pkt_number = 0 + for (rcv_port_number, pkt_str, pkt_time) in self.dataplane.packets(0, 1): + leaking_pkt_number += 1 + print "leaking packet %d" % leaking_pkt_number + + # Read Counters + print "DST port counters: " + port_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + print port_counters + print queue_counters + + # Clear Counters + sai_thrift_clear_all_counters(self.client) + + # Set receiving socket buffers to some big value + for p in self.dataplane.ports.values(): + p.socket.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 41943040) + + # RELEASE PORT + sched_prof_id=sai_thrift_create_scheduler_profile(self.client,RELEASE_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id],attr) + + # if (ecn == 1) - capture and parse all incoming packets + marked_cnt = 0 + not_marked_cnt = 0 + if (ecn == 1): + print "" + print "ECN capable packets generated, releasing dst_port and analyzing traffic -" + + cnt = 0 + pkts = [] + for i in xrange(num_of_pkts): + (rcv_device, rcv_port, rcv_pkt, pkt_time) = dp_poll(self, device_number=0, port_number=dst_port_id, timeout=0.2) + if rcv_pkt is not None: + cnt += 1 + pkts.append(rcv_pkt) + else: # Received less packets then expected + assert (cnt == num_of_pkts) + print " Received packets: " + str(cnt) + + for pkt_to_inspect in pkts: + pkt_str = hex_dump_buffer(pkt_to_inspect) + + # Count marked and not marked amount of packets + if ( (int(pkt_str[ECN_INDEX_IN_HEADER]) & 0x03) == 1 ): + not_marked_cnt += 1 + elif ( (int(pkt_str[ECN_INDEX_IN_HEADER]) & 0x03) == 3 ): + assert (not_marked_cnt == 0) + marked_cnt += 1 + + print " ECN non-marked pkts: " + str(not_marked_cnt) + print " ECN marked pkts: " + str(marked_cnt) + print "" + + time.sleep(5) + # Read Counters + print "DST port counters: " + port_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + print port_counters + print queue_counters + if (ecn == 0): + transmitted_data = port_counters[TRANSMITTED_PKTS] * 2 * cell_size #num_of_pkts*pkt_size_in_cells*cell_size + assert (port_counters[TRANSMITTED_OCTETS] <= limit * 1.05) + assert (transmitted_data >= min_limit) + assert (marked_cnt == 0) + elif (ecn == 1): + non_marked_data = not_marked_cnt * 2 * cell_size + assert (non_marked_data <= limit*1.05) + assert (non_marked_data >= limit*0.95) + assert (marked_cnt == (num_of_pkts - not_marked_cnt)) + assert (port_counters[EGRESS_DROP] == 0) + assert (port_counters[INGRESS_DROP] == 0) + + finally: + # RELEASE PORT + sched_prof_id=sai_thrift_create_scheduler_profile(self.client,RELEASE_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id],attr) + print "END OF TEST" + +class WRRtest(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + switch_init(self.client) + + # Parse input parameters + ecn = int(self.test_params['ecn']) + router_mac = self.test_params['router_mac'] + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + print >> sys.stderr, "dst_port_id: %d, src_port_id: %d" % (dst_port_id, src_port_id) + print >> sys.stderr, "dst_port_mac: %s, src_port_mac: %s, src_port_ip: %s, dst_port_ip: %s" % (dst_port_mac, src_port_mac, src_port_ip, dst_port_ip) + asic_type = self.test_params['sonic_asic_type'] + default_packet_length = 1500 + exp_ip_id = 110 + queue_0_num_of_pkts = int(self.test_params['q0_num_of_pkts']) + queue_1_num_of_pkts = int(self.test_params['q1_num_of_pkts']) + queue_2_num_of_pkts = int(self.test_params['q2_num_of_pkts']) + queue_3_num_of_pkts = int(self.test_params['q3_num_of_pkts']) + queue_4_num_of_pkts = int(self.test_params['q4_num_of_pkts']) + queue_5_num_of_pkts = int(self.test_params['q5_num_of_pkts']) + queue_6_num_of_pkts = int(self.test_params['q6_num_of_pkts']) + limit = int(self.test_params['limit']) + pkts_num_leak_out = int(self.test_params['pkts_num_leak_out']) + + if asic_type == 'mellanox': + # Stop port function + sched_prof_id=sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + else: + attr_value = sai_thrift_attribute_value_t(booldata=0) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + + # Send packets to leak out + pkt = simple_tcp_packet(pktlen=64, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_ttl=64) + send_packet(self, src_port_id, pkt, pkts_num_leak_out) + + # Get a snapshot of counter values + port_counters_base, queue_counters_base = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + + # Send packets to each queue based on dscp field + dscp = 3 + tos = dscp << 2 + tos |= ecn + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_id=exp_ip_id, + ip_ttl=64) + send_packet(self, src_port_id, pkt, queue_3_num_of_pkts) + + dscp = 4 + tos = dscp << 2 + tos |= ecn + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_id=exp_ip_id, + ip_ttl=64) + send_packet(self, src_port_id, pkt, queue_4_num_of_pkts) + + dscp = 8 + tos = dscp << 2 + tos |= ecn + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_id=exp_ip_id, + ip_ttl=64) + send_packet(self, src_port_id, pkt, queue_0_num_of_pkts) + + dscp = 0 + tos = dscp << 2 + tos |= ecn + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_id=exp_ip_id, + ip_ttl=64) + send_packet(self, src_port_id, pkt, queue_1_num_of_pkts) + + dscp = 5 + tos = dscp << 2 + tos |= ecn + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_id=exp_ip_id, + ip_ttl=64) + send_packet(self, src_port_id, pkt, queue_2_num_of_pkts) + + dscp = 46 + tos = dscp << 2 + tos |= ecn + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_id=exp_ip_id, + ip_ttl=64) + send_packet(self, src_port_id, pkt, queue_5_num_of_pkts) + + dscp = 48 + tos = dscp << 2 + tos |= ecn + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_id=exp_ip_id, + ip_ttl=64) + send_packet(self, src_port_id, pkt, queue_6_num_of_pkts) + + # Set receiving socket buffers to some big value + for p in self.dataplane.ports.values(): + p.socket.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 41943040) + + # Release port + if asic_type == 'mellanox': + sched_prof_id=sai_thrift_create_scheduler_profile(self.client, RELEASE_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + else: + # Resume egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=1) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + + cnt = 0 + pkts = [] + recv_pkt = scapy.Ether() + + while recv_pkt: + received = self.dataplane.poll(device_number=0, port_number=dst_port_id, timeout=2) + if isinstance(received, self.dataplane.PollFailure): + recv_pkt = None + break + recv_pkt = scapy.Ether(received.packet) + + try: + if recv_pkt.payload.src == src_port_ip and recv_pkt.payload.dst == dst_port_ip and recv_pkt.payload.id == exp_ip_id: + cnt += 1 + pkts.append(recv_pkt) + except AttributeError: + continue + + queue_pkt_counters = [0] * 49 + queue_num_of_pkts = [0] * 49 + queue_num_of_pkts[8] = queue_0_num_of_pkts + queue_num_of_pkts[0] = queue_1_num_of_pkts + queue_num_of_pkts[5] = queue_2_num_of_pkts + queue_num_of_pkts[3] = queue_3_num_of_pkts + queue_num_of_pkts[4] = queue_4_num_of_pkts + queue_num_of_pkts[46] = queue_5_num_of_pkts + queue_num_of_pkts[48] = queue_6_num_of_pkts + total_pkts = 0 + + for pkt_to_inspect in pkts: + dscp_of_pkt = pkt_to_inspect.payload.tos >> 2 + total_pkts += 1 + + # Count packet ordering + + queue_pkt_counters[dscp_of_pkt] += 1 + if queue_pkt_counters[dscp_of_pkt] == queue_num_of_pkts[dscp_of_pkt]: + assert((queue_0_num_of_pkts + queue_1_num_of_pkts + queue_2_num_of_pkts + queue_3_num_of_pkts + queue_4_num_of_pkts + queue_5_num_of_pkts + queue_6_num_of_pkts) - total_pkts < limit) + + print >> sys.stderr, queue_pkt_counters + + # Read counters + print "DST port counters: " + port_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + print >> sys.stderr, map(operator.sub, queue_counters, queue_counters_base) + + # All packets sent should be received intact + assert(queue_0_num_of_pkts + queue_1_num_of_pkts + queue_2_num_of_pkts + queue_3_num_of_pkts + queue_4_num_of_pkts + queue_5_num_of_pkts + queue_6_num_of_pkts == total_pkts) + +class LossyQueueTest(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + switch_init(self.client) + + # Parse input parameters + dscp = int(self.test_params['dscp']) + ecn = int(self.test_params['ecn']) + pg = int(self.test_params['pg']) + 2 # The pfc counter index starts from index 2 in sai_thrift_read_port_counters + router_mac = self.test_params['router_mac'] + max_buffer_size = int(self.test_params['buffer_max_size']) + headroom_size = int(self.test_params['headroom_size']) + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + dst_port_2_id = int(self.test_params['dst_port_2_id']) + dst_port_2_ip = self.test_params['dst_port_2_ip'] + dst_port_2_mac = self.dataplane.get_mac(0, dst_port_2_id) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + asic_type = self.test_params['sonic_asic_type'] + + # prepare tcp packet data + tos = dscp << 2 + tos |= ecn + ttl = 64 + + pkts_num_leak_out = int(self.test_params['pkts_num_leak_out']) + pkts_num_trig_egr_drp = int(self.test_params['pkts_num_trig_egr_drp']) + default_packet_length = 64 + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_ttl=ttl) + # get a snapshot of counter values at recv and transmit ports + # queue_counters value is not of our interest here + recv_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # add slight tolerance in threshold characterization to consider + # the case that cpu puts packets in the egress queue after we pause the egress + # or the leak out is simply less than expected as we have occasionally observed + margin = 2 + + if asic_type == 'mellanox': + # Stop port function + sched_prof_id=sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) + else: + # Pause egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=0) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + + try: + # send packets short of triggering egress drop + send_packet(self, src_port_id, pkt, pkts_num_leak_out + pkts_num_trig_egr_drp - 1 - margin) + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + # get a snapshot of counter values at recv and transmit ports + # queue counters value is not of our interest here + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # recv port no pfc + assert(recv_counters[pg] == recv_counters_base[pg]) + # recv port no ingress drop + assert(recv_counters[INGRESS_DROP] == recv_counters_base[INGRESS_DROP]) + # xmit port no egress drop + assert(xmit_counters[EGRESS_DROP] == xmit_counters_base[EGRESS_DROP]) + + # send 1 packet to trigger egress drop + send_packet(self, src_port_id, pkt, 1 + 2 * margin) + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + # get a snapshot of counter values at recv and transmit ports + # queue counters value is not of our interest here + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # recv port no pfc + assert(recv_counters[pg] == recv_counters_base[pg]) + # recv port no ingress drop + assert(recv_counters[INGRESS_DROP] == recv_counters_base[INGRESS_DROP]) + # xmit port egress drop + assert(xmit_counters[EGRESS_DROP] > xmit_counters_base[EGRESS_DROP]) + + finally: + if asic_type == 'mellanox': + # Release ports + sched_prof_id=sai_thrift_create_scheduler_profile(self.client, RELEASE_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) + else: + # Resume egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=1) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + +# pg shared pool applied to both lossy and lossless traffic +class PGSharedWatermarkTest(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + time.sleep(5) + switch_init(self.client) + + # Parse input parameters + dscp = int(self.test_params['dscp']) + ecn = int(self.test_params['ecn']) + router_mac = self.test_params['router_mac'] + print >> sys.stderr, "router_mac: %s" % (router_mac) + pg = int(self.test_params['pg']) + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + + asic_type = self.test_params['sonic_asic_type'] + pkts_num_leak_out = int(self.test_params['pkts_num_leak_out']) + pkts_num_fill_min = int(self.test_params['pkts_num_fill_min']) + pkts_num_fill_shared = int(self.test_params['pkts_num_fill_shared']) + cell_size = int(self.test_params['cell_size']) + + # Prepare TCP packet data + tos = dscp << 2 + tos |= ecn + ttl = 64 + default_packet_length = 64 + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_ttl=ttl) + # Add slight tolerance in threshold characterization to consider + # the case that cpu puts packets in the egress queue after we pause the egress + # or the leak out is simply less than expected as we have occasionally observed + margin = 2 + + if asic_type == 'mellanox': + # Close DST port + sched_prof_id = sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + else: + # Pause egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=0) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + + # send packets + try: + # send packets to fill pg min but not trek into shared pool + # so if pg min is zero, it directly treks into shared pool by 1 + # this is the case for lossy traffic + send_packet(self, src_port_id, pkt, pkts_num_leak_out + pkts_num_fill_min) + time.sleep(8) + q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[src_port_id]) + print >> sys.stderr, "Init pkts num sent: %d, min: %d, actual watermark value to start: %d" % ((pkts_num_leak_out + pkts_num_fill_min), pkts_num_fill_min, pg_shared_wm_res[pg]) + if pkts_num_fill_min: + assert(pg_shared_wm_res[pg] == 0) + else: + # on t1-lag, we found vm will keep sending control + # packets, this will cause the watermark to be 2 * 208 bytes + # as all lossy packets are now mapped to single pg 0 + # so we remove the strict equity check, and use upper bound + # check instead + assert(1 * cell_size <= pg_shared_wm_res[pg]) + assert(pg_shared_wm_res[pg] <= margin * cell_size) + + # send packet batch of fixed packet numbers to fill pg shared + # first round sends only 1 packet + expected_wm = 0 + total_shared = pkts_num_fill_shared - pkts_num_fill_min + pkts_inc = total_shared >> 2 + pkts_num = 1 + margin + while (expected_wm < total_shared): + expected_wm += pkts_num + if (expected_wm > total_shared): + pkts_num -= (expected_wm - total_shared) + expected_wm = total_shared + print >> sys.stderr, "pkts num to send: %d, total pkts: %d, pg shared: %d" % (pkts_num, expected_wm, total_shared) + + send_packet(self, src_port_id, pkt, pkts_num) + time.sleep(8) + q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[src_port_id]) + print >> sys.stderr, "lower bound: %d, actual value: %d, upper bound: %d" % (expected_wm * cell_size, pg_shared_wm_res[pg], (expected_wm + margin) * cell_size) + assert(pg_shared_wm_res[pg] <= (expected_wm + margin) * cell_size) + assert(expected_wm * cell_size <= pg_shared_wm_res[pg]) + + pkts_num = pkts_inc + + # overflow the shared pool + send_packet(self, src_port_id, pkt, pkts_num) + time.sleep(8) + q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[src_port_id]) + print >> sys.stderr, "exceeded pkts num sent: %d, expected watermark: %d, actual value: %d" % (pkts_num, (expected_wm * cell_size), pg_shared_wm_res[pg]) + assert(expected_wm == total_shared) + assert(expected_wm * cell_size <= pg_shared_wm_res[pg]) + assert(pg_shared_wm_res[pg] <= (expected_wm + margin) * cell_size) + + finally: + if asic_type == 'mellanox': + # Release port + sched_prof_id = sai_thrift_create_scheduler_profile(self.client,RELEASE_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id],attr) + else: + # Resume egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=1) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + +# pg headroom is a notion for lossless traffic only +class PGHeadroomWatermarkTest(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + time.sleep(5) + switch_init(self.client) + + # Parse input parameters + dscp = int(self.test_params['dscp']) + ecn = int(self.test_params['ecn']) + router_mac = self.test_params['router_mac'] + print >> sys.stderr, "router_mac: %s" % (router_mac) + pg = int(self.test_params['pg']) + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + + asic_type = self.test_params['sonic_asic_type'] + pkts_num_leak_out = int(self.test_params['pkts_num_leak_out']) + pkts_num_trig_pfc = int(self.test_params['pkts_num_trig_pfc']) + pkts_num_trig_ingr_drp = int(self.test_params['pkts_num_trig_ingr_drp']) + cell_size = int(self.test_params['cell_size']) + + # Prepare TCP packet data + tos = dscp << 2 + tos |= ecn + ttl = 64 + default_packet_length = 64 + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_ttl=ttl) + # Add slight tolerance in threshold characterization to consider + # the case that cpu puts packets in the egress queue after we pause the egress + # or the leak out is simply less than expected as we have occasionally observed + margin = 0 + + if asic_type == 'mellanox': + # Close DST port + sched_prof_id = sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + else: + # Pause egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=0) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + + # send packets + try: + # send packets to trigger pfc but not trek into headroom + send_packet(self, src_port_id, pkt, pkts_num_leak_out + pkts_num_trig_pfc) + time.sleep(8) + q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[src_port_id]) + assert(pg_headroom_wm_res[pg] == 0) + + # send packet batch of fixed packet numbers to fill pg headroom + # first round sends only 1 packet + expected_wm = 0 + total_hdrm = pkts_num_trig_ingr_drp - pkts_num_trig_pfc - 1 + pkts_inc = total_hdrm >> 2 + pkts_num = 1 + margin + while (expected_wm < total_hdrm): + expected_wm += pkts_num + if (expected_wm > total_hdrm): + pkts_num -= (expected_wm - total_hdrm) + expected_wm = total_hdrm + print >> sys.stderr, "pkts num to send: %d, total pkts: %d, pg headroom: %d" % (pkts_num, expected_wm, total_hdrm) + + send_packet(self, src_port_id, pkt, pkts_num) + time.sleep(8) + q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[src_port_id]) + print >> sys.stderr, "lower bound: %d, actual value: %d, upper bound: %d" % ((expected_wm - margin) * cell_size, pg_headroom_wm_res[pg], (expected_wm * cell_size)) + assert(pg_headroom_wm_res[pg] <= expected_wm * cell_size) + assert((expected_wm - margin) * cell_size <= pg_headroom_wm_res[pg]) + + pkts_num = pkts_inc + + # overflow the headroom + send_packet(self, src_port_id, pkt, pkts_num) + time.sleep(8) + q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[src_port_id]) + print >> sys.stderr, "exceeded pkts num sent: %d, actual value: %d, expected watermark: %d" % (pkts_num, pg_headroom_wm_res[pg], (expected_wm * cell_size)) + assert(expected_wm == total_hdrm) + assert(pg_headroom_wm_res[pg] == expected_wm * cell_size) + + finally: + if asic_type == 'mellanox': + # Release port + sched_prof_id = sai_thrift_create_scheduler_profile(self.client,RELEASE_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id],attr) + else: + # Resume egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=1) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + +class QSharedWatermarkTest(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + time.sleep(5) + switch_init(self.client) + + # Parse input parameters + dscp = int(self.test_params['dscp']) + ecn = int(self.test_params['ecn']) + router_mac = self.test_params['router_mac'] + print >> sys.stderr, "router_mac: %s" % (router_mac) + queue = int(self.test_params['queue']) + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + + asic_type = self.test_params['sonic_asic_type'] + pkts_num_leak_out = int(self.test_params['pkts_num_leak_out']) + pkts_num_fill_min = int(self.test_params['pkts_num_fill_min']) + pkts_num_trig_drp = int(self.test_params['pkts_num_trig_drp']) + cell_size = int(self.test_params['cell_size']) + + # Prepare TCP packet data + tos = dscp << 2 + tos |= ecn + ttl = 64 + default_packet_length = 64 + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_ttl=ttl) + # Add slight tolerance in threshold characterization to consider + # the case that cpu puts packets in the egress queue after we pause the egress + # or the leak out is simply less than expected as we have occasionally observed + margin = 0 + + if asic_type == 'mellanox': + # Close DST port + sched_prof_id = sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + else: + # Pause egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=0) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + + # send packets + try: + # send packets to fill queue min but not trek into shared pool + # so if queue min is zero, it will directly trek into shared pool by 1 + send_packet(self, src_port_id, pkt, pkts_num_leak_out + pkts_num_fill_min) + time.sleep(8) + q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[dst_port_id]) + print >> sys.stderr, "Init pkts num sent: %d, min: %d, actual watermark value to start: %d" % ((pkts_num_leak_out + pkts_num_fill_min), pkts_num_fill_min, q_wm_res[queue]) + assert(q_wm_res[queue] == (0 if pkts_num_fill_min else (1 * cell_size))) + + # send packet batch of fixed packet numbers to fill queue shared + # first round sends only 1 packet + expected_wm = 0 + total_shared = pkts_num_trig_drp - pkts_num_fill_min - 1 + pkts_inc = total_shared >> 2 + pkts_num = 1 + margin + while (expected_wm < total_shared): + expected_wm += pkts_num + if (expected_wm > total_shared): + pkts_num -= (expected_wm - total_shared) + expected_wm = total_shared + print >> sys.stderr, "pkts num to send: %d, total pkts: %d, queue shared: %d" % (pkts_num, expected_wm, total_shared) + + send_packet(self, src_port_id, pkt, pkts_num) + time.sleep(8) + q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[dst_port_id]) + print >> sys.stderr, "lower bound: %d, actual value: %d, upper bound: %d" % ((expected_wm - margin) * cell_size, q_wm_res[queue], (expected_wm * cell_size)) + assert(q_wm_res[queue] <= expected_wm * cell_size) + assert((expected_wm - margin) * cell_size <= q_wm_res[queue]) + + pkts_num = pkts_inc + + # overflow the shared pool + send_packet(self, src_port_id, pkt, pkts_num) + time.sleep(8) + q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[dst_port_id]) + print >> sys.stderr, "exceeded pkts num sent: %d, actual value: %d, expected watermark: %d" % (pkts_num, q_wm_res[queue], (expected_wm * cell_size)) + assert(expected_wm == total_shared) + assert(q_wm_res[queue] == expected_wm * cell_size) + + finally: + if asic_type == 'mellanox': + # Release port + sched_prof_id = sai_thrift_create_scheduler_profile(self.client,RELEASE_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id],attr) + else: + # Resume egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=1) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + +# TODO: buffer pool roid should be obtained via rpc calls +# based on the pg or queue index +# rather than fed in as test parameters due to the lack in SAI implement +class BufferPoolWatermarkTest(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + time.sleep(5) + switch_init(self.client) + + # Parse input parameters + dscp = int(self.test_params['dscp']) + ecn = int(self.test_params['ecn']) + router_mac = self.test_params['router_mac'] + print >> sys.stderr, "router_mac: %s" % (router_mac) + pg = self.test_params['pg'] + queue = self.test_params['queue'] + print >> sys.stderr, "pg: %s, queue: %s, buffer pool type: %s" % (pg, queue, 'egress' if not pg else 'ingress') + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + + asic_type = self.test_params['sonic_asic_type'] + pkts_num_leak_out = int(self.test_params['pkts_num_leak_out']) + pkts_num_fill_min = int(self.test_params['pkts_num_fill_min']) + pkts_num_fill_shared = int(self.test_params['pkts_num_fill_shared']) + cell_size = int(self.test_params['cell_size']) + + print >> sys.stderr, "buf_pool_roid: %s" % (self.test_params['buf_pool_roid']) + buf_pool_roid=int(self.test_params['buf_pool_roid'], 0) + print >> sys.stderr, "buf_pool_roid: 0x%lx" % (buf_pool_roid) + + # Prepare TCP packet data + tos = dscp << 2 + tos |= ecn + ttl = 64 + default_packet_length = 64 + pkt = simple_tcp_packet(pktlen=default_packet_length, + eth_dst=router_mac if router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_ttl=ttl) + # Add slight tolerance in threshold characterization to consider + # the case that cpu puts packets in the egress queue after we pause the egress + # or the leak out is simply less than expected as we have occasionally observed + margin = 2 + + if asic_type == 'mellanox': + # Close DST port + sched_prof_id = sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + else: + # Pause egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=0) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + + # send packets + try: + # send packets to fill min but not trek into shared pool + # so if min is zero, it directly treks into shared pool by 1 + # this is the case for lossy traffic at ingress and lossless traffic at egress (on td2) + # Because lossy and lossless traffic use the same pool at ingress, even if + # lossless traffic has pg min not equal to zero, we still need to consider + # the impact caused by lossy traffic + send_packet(self, src_port_id, pkt, pkts_num_leak_out + pkts_num_fill_min) + time.sleep(8) + buffer_pool_wm = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) + print >> sys.stderr, "Init pkts num sent: %d, min: %d, actual watermark value to start: %d" % ((pkts_num_leak_out + pkts_num_fill_min), pkts_num_fill_min, buffer_pool_wm) + if pkts_num_fill_min: + assert(buffer_pool_wm <= margin * cell_size) + else: + # on t1-lag, we found vm will keep sending control + # packets, this will cause the watermark to be 2 * 208 bytes + # as all lossy packets are now mapped to single pg 0 + # so we remove the strict equity check, and use upper bound + # check instead + assert(1 * cell_size <= buffer_pool_wm) + assert(buffer_pool_wm <= margin * cell_size) + + # send packet batch of fixed packet numbers to fill shared + # first round sends only 1 packet + expected_wm = 0 + total_shared = pkts_num_fill_shared - pkts_num_fill_min + pkts_inc = total_shared >> 2 + pkts_num = 1 + margin + while (expected_wm < total_shared): + expected_wm += pkts_num + if (expected_wm > total_shared): + pkts_num -= (expected_wm - total_shared) + expected_wm = total_shared + print >> sys.stderr, "pkts num to send: %d, total pkts: %d, shared: %d" % (pkts_num, expected_wm, total_shared) + + send_packet(self, src_port_id, pkt, pkts_num) + time.sleep(8) + buffer_pool_wm = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) + print >> sys.stderr, "lower bound: %d, actual value: %d, upper bound: %d" % (expected_wm * cell_size, buffer_pool_wm, (expected_wm + margin) * cell_size) + assert(buffer_pool_wm <= (expected_wm + margin) * cell_size) + assert(expected_wm * cell_size <= buffer_pool_wm) + + pkts_num = pkts_inc + + # overflow the shared pool + send_packet(self, src_port_id, pkt, pkts_num) + time.sleep(8) + buffer_pool_wm = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) + print >> sys.stderr, "exceeded pkts num sent: %d, expected watermark: %d, actual value: %d" % (pkts_num, (expected_wm * cell_size), buffer_pool_wm) + assert(expected_wm == total_shared) + assert(expected_wm * cell_size <= buffer_pool_wm) + assert(buffer_pool_wm <= (expected_wm + margin) * cell_size) + + finally: + if asic_type == 'mellanox': + # Release port + sched_prof_id = sai_thrift_create_scheduler_profile(self.client,RELEASE_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id],attr) + else: + # Resume egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=1) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) diff --git a/ansible/roles/test/files/saitests/switch.py b/ansible/roles/test/files/saitests/switch.py index a5fb00a8989..e81cb616795 100644 --- a/ansible/roles/test/files/saitests/switch.py +++ b/ansible/roles/test/files/saitests/switch.py @@ -70,6 +70,8 @@ def switch_init(client): else: print "unknown switch attribute" + # TOFIX in brcm sai: This causes the following error on td2 (a7050-qx-32s) + # ERR syncd: brcm_sai_set_switch_attribute:842 updating switch mac addr failed with error -2. attr_value = sai_thrift_attribute_value_t(mac='00:77:66:55:44:33') attr = sai_thrift_attribute_t(id=SAI_SWITCH_ATTR_SRC_MAC_ADDRESS, value=attr_value) client.sai_thrift_set_switch_attribute(attr) @@ -85,7 +87,7 @@ def switch_init(client): for interface,front in interface_to_front_mapping.iteritems(): sai_port_id = client.sai_thrift_get_port_id_by_front_port(front); port_list[int(interface)]=sai_port_id - + switch_inited = 1 @@ -544,7 +546,7 @@ def sai_thrift_create_scheduler_profile(client, max_rate, algorithm=0): value=attribute_value) scheduler_attr_list.append(attribute) attribute_value = sai_thrift_attribute_value_t(s32=algorithm) - attribute = sai_thrift_attribute_t(id=SAI_SCHEDULER_ATTR_SCHEDULING_ALGORITHM , + attribute = sai_thrift_attribute_t(id=SAI_SCHEDULER_ATTR_SCHEDULING_TYPE, value=attribute_value) scheduler_attr_list.append(attribute) scheduler_profile_id = client.sai_thrift_create_scheduler_profile(scheduler_attr_list) @@ -618,7 +620,7 @@ def sai_thrift_clear_all_counters(client): def sai_thrift_read_port_counters(client,port): port_cnt_ids=[] port_cnt_ids.append(SAI_PORT_STAT_IF_OUT_DISCARDS) - port_cnt_ids.append(SAI_PORT_STAT_ETHER_STATS_DROP_EVENTS) + port_cnt_ids.append(SAI_PORT_STAT_IF_IN_DISCARDS) port_cnt_ids.append(SAI_PORT_STAT_PFC_0_TX_PKTS) port_cnt_ids.append(SAI_PORT_STAT_PFC_1_TX_PKTS) port_cnt_ids.append(SAI_PORT_STAT_PFC_2_TX_PKTS) @@ -631,6 +633,7 @@ def sai_thrift_read_port_counters(client,port): port_cnt_ids.append(SAI_PORT_STAT_IF_OUT_UCAST_PKTS) counters_results=[] counters_results = client.sai_thrift_get_port_stats(port,port_cnt_ids,len(port_cnt_ids)) + queue_list=[] port_attr_list = client.sai_thrift_get_port_attribute(port) attr_list = port_attr_list.attr_list @@ -650,6 +653,76 @@ def sai_thrift_read_port_counters(client,port): queue1+=1 return (counters_results, queue_counters_results) +def sai_thrift_read_port_watermarks(client,port): + q_wm_ids=[] + q_wm_ids.append(SAI_QUEUE_STAT_SHARED_WATERMARK_BYTES) + + pg_wm_ids=[] + pg_wm_ids.append(SAI_INGRESS_PRIORITY_GROUP_STAT_XOFF_ROOM_WATERMARK_BYTES) + pg_wm_ids.append(SAI_INGRESS_PRIORITY_GROUP_STAT_SHARED_WATERMARK_BYTES) + + queue_list=[] + pg_list=[] + port_attr_list = client.sai_thrift_get_port_attribute(port) + attr_list = port_attr_list.attr_list + for attribute in attr_list: + if attribute.id == SAI_PORT_ATTR_QOS_QUEUE_LIST: + for queue_id in attribute.value.objlist.object_id_list: + queue_list.append(queue_id) + elif attribute.id == SAI_PORT_ATTR_INGRESS_PRIORITY_GROUP_LIST: + for pg_id in attribute.value.objlist.object_id_list: + pg_list.append(pg_id) + + thrift_results=[] + queue_res=[] + pg_shared_res=[] + pg_headroom_res=[] + + # Only use the first 8 queues (unicast) - multicast queues are not used + for queue in queue_list[:8]: + thrift_results=client.sai_thrift_get_queue_stats(queue,q_wm_ids,len(q_wm_ids)) + queue_res.append(thrift_results[0]) + + for pg in pg_list: + thrift_results=client.sai_thrift_get_pg_stats(pg,pg_wm_ids,len(pg_wm_ids)) + pg_headroom_res.append(thrift_results[0]) + pg_shared_res.append(thrift_results[1]) + + return (queue_res, pg_shared_res, pg_headroom_res) + +def sai_thrift_read_pg_counters(client, port_id): + pg_cntr_ids=[ + SAI_INGRESS_PRIORITY_GROUP_STAT_PACKETS + ] + + # fetch pg ids under port id + pg_ids = [] + port_attrs = client.sai_thrift_get_port_attribute(port_id) + attrs = port_attrs.attr_list + for attr in attrs: + if attr.id == SAI_PORT_ATTR_INGRESS_PRIORITY_GROUP_LIST: + for pg_id in attr.value.objlist.object_id_list: + pg_ids.append(pg_id) + + # get counter values of counter ids of interest under each pg + pg_cntrs=[] + for pg_id in pg_ids: + cntr_vals = client.sai_thrift_get_pg_stats(pg_id, pg_cntr_ids, len(pg_cntr_ids)) + pg_cntrs.append(cntr_vals[0]) + + return pg_cntrs + +def sai_thrift_read_buffer_pool_watermark(client, buffer_pool_id): + buffer_pool_wm_ids = [ + SAI_BUFFER_POOL_STAT_WATERMARK_BYTES + ] + + wm_vals = client.sai_thrift_get_buffer_pool_stats(buffer_pool_id, buffer_pool_wm_ids) + if not wm_vals: + print >> sys.stderr, "sai_thrift_read_buffer_pool_watermark returns empty list" + return None + return wm_vals[0] + def sai_thrift_create_vlan_member(client, vlan_id, port_id, tagging_mode): vlan_member_attr_list = [] attribute_value = sai_thrift_attribute_value_t(s32=vlan_id) diff --git a/ansible/roles/test/tasks/qos_get_max_buff_size.yml b/ansible/roles/test/tasks/qos_get_max_buff_size.yml new file mode 100644 index 00000000000..694aac6c300 --- /dev/null +++ b/ansible/roles/test/tasks/qos_get_max_buff_size.yml @@ -0,0 +1,146 @@ +# Get the mmu buffer config parameters of the target port from the DUT +# to generate the buffer profile for testing and/or +# to calculate the max number of packets the mmu can hold in the single-port sending case that +# only the target port is sending packets + +# TODO: May be better suited as an ansible module + +- debug: + msg="Get {{target_port_name}} port {{target_buffer_profile_type}} MAX buffer size" + +- name: Get {{target_buffer_profile_type}} buffer profile table for {{target_port_name}} port + shell: redis-cli -n 4 KEYS "{{target_table}}|{{target_port_name}}|{{target_pg}}" + register: buffer_profile_table + + +- fail: + msg: "Unable to get {{target_buffer_profile_type}} buffer profile table for {{target_port_name}}" + when: buffer_profile_table.stdout == "" + +- name: Get {{target_buffer_profile_type}} buffer profile for {{target_port_name}} port + shell: redis-cli -n 4 HGET "{{buffer_profile_table.stdout}}" profile + register: buffer_profile + + +- fail: + msg: "Unable to get {{target_buffer_profile_type}} buffer profile for {{target_port_name}}" + when: buffer_profile.stdout == "" + +- name: Parse buffer profile name + set_fact: + buffer_profile="{{buffer_profile.stdout|replace('[','')|replace(']','')}}" + + +- name: Get {{target_buffer_profile_type}} buffer headroom size for {{target_port_name}} port + shell: redis-cli -n 4 HGET "{{buffer_profile}}" size + register: buffer_headroom + +- fail: + msg: "Unable to get headroom size for {{target_port_name}}" + when: buffer_headroom.stdout == "" + + +- name: Get {{target_buffer_profile_type}} buffer pool profile for {{target_port_name}} port + shell: redis-cli -n 4 HGET "{{buffer_profile}}" pool + register: buffer_pool_id + +- name: Parse {{target_buffer_profile_type}} buffer pool profile name + set_fact: + buffer_pool_id="{{buffer_pool_id.stdout|replace('[','')|replace(']','')}}" + +- name: Get {{target_buffer_profile_type}} buffer alpha ID for {{target_port_name}} port + shell: redis-cli -n 4 HGET "{{buffer_profile}}" dynamic_th + register: buffer_alpha_raw + +# static threshold +- block: + - debug: + msg: > + "Unable to get {{target_buffer_profile_type}} alpha for {{target_port_name}}\n" + "{{target_buffer_profile_type}} buffer uses static threshold" + + - name: Get {{target_buffer_profile_type}} buffer alpha ID for {{target_port_name}} port + shell: redis-cli -n 4 HGET "{{buffer_profile}}" static_th + register: buffer_static_th + + - fail: + msg: "Unable to get {{target_buffer_profile_type}} static threshold for {{target_port_name}}" + when: buffer_static_th.stdout == "" + + - set_fact: + buffer_max_size: "{{buffer_static_th.stdout|int}}" + when: buffer_alpha_raw.stdout == "" + +# dynamic threshold +- block: + - name: Calculate the {{target_buffer_profile_type}} alpha + set_fact: + buffer_alpha="{{2|pow(buffer_alpha_raw.stdout|int)}}" + + - name: Get {{target_buffer_profile_type}} buffer pool size for {{target_port_name}} port + shell: redis-cli -n 4 HGET "{{buffer_pool_id}}" size + register: buffer_pool_size + + - fail: + msg: "Unable to get {{target_buffer_profile_type}} buffer pool size for {{target_port_name}}" + when: buffer_pool_size.stdout == "" + + - name: Calculate MAX buffer size for {{target_port_name}} port + set_fact: + buffer_max_size="{{buffer_headroom.stdout|int + ((buffer_alpha|float / (buffer_alpha|float + 1)) * buffer_pool_size.stdout|int)}}" + when: buffer_alpha_raw.stdout != "" + + +# ingress lossless specific +- name: Get XON for {{target_port_name}} port + shell: redis-cli -n 4 HGET "{{buffer_profile}}" xon + register: buffer_xon + when: buffer_profile != "" and "pg_lossless" in buffer_profile + +- fail: + msg: "Unable to get XON for {{target_port_name}}" + when: "'pg_lossless' in buffer_profile and buffer_xon.stdout == ''" + + +# ingress lossless specific +- name: Get XOFF for {{target_port_name}} port + shell: redis-cli -n 4 HGET "{{buffer_profile}}" xoff + register: buffer_xoff + when: buffer_profile != "" and 'pg_lossless' in buffer_profile + +- fail: + msg: "Unable to get XOFF for {{target_port_name}}" + when: "'pg_lossless' in buffer_profile and buffer_xoff.stdout == ''" + + +# Get buffer pool ROID +# This is perhaps the only useful section in this yaml play +- set_fact: + buffer_pool_name="{{buffer_pool_id|replace('BUFFER_POOL|','')}}" + +- name: Get {{buffer_pool_name}} VOID + shell: redis-cli -n 2 HGET COUNTERS_BUFFER_POOL_NAME_MAP "{{buffer_pool_name}}" + register: buffer_pool_void + +- fail: + msg: "Unable to get VOID for {{buffer_pool_name}}" + when: buffer_pool_void.stdout == "" + +- name: Parse buffer pool VOID + set_fact: + buffer_pool_void="{{buffer_pool_void.stdout}}" + +- name: Get {{buffer_pool_name}} ROID + shell: redis-cli -n 1 HGET VIDTORID "{{buffer_pool_void}}" + register: buffer_pool_roid + +- fail: + msg: "Unable to get ROID for {{buffer_pool_name}}" + when: buffer_pool_roid.stdout == "" + +- name: Parse buffer pool ROID, remove 'oid:' prefix + set_fact: + buffer_pool_roid="{{buffer_pool_roid.stdout|replace('oid:','')}}" + +- debug: + msg="{{buffer_pool_name}} roid {{buffer_pool_roid}}" diff --git a/ansible/roles/test/tasks/qos_get_ports.yml b/ansible/roles/test/tasks/qos_get_ports.yml new file mode 100644 index 00000000000..1da26daa384 --- /dev/null +++ b/ansible/roles/test/tasks/qos_get_ports.yml @@ -0,0 +1,343 @@ +- name: Init variables. + set_fact: + ptf_interfaces: [] + dut_switch_ports: [] + ptf_lag_interfaces: [] + dut_switch_lag_members: [] + testing_ptf_interfaces: [] + +- name: Getting minigraph facts + minigraph_facts: host={{inventory_hostname}} + become: no + +# Index stored in ptf_interfaces list is the index +# to its corresponding connected dut port name in dut_switch_ports list +- name: Get PTF interfaces from map + set_fact: + ptf_interfaces: "{{ptf_interfaces + [item.split('@')[0]]}}" + with_lines: cat {{ptf_portmap}} + when: "'#' not in item" + +- name: Get switch ports from map + set_fact: + dut_switch_ports: "{{dut_switch_ports + [item.split('@')[1]]}}" + with_lines: cat {{ptf_portmap}} + when: "'#' not in item" + +- name: Print switch ports and PTF interfaces + debug: msg="ptf_interfaces={{ptf_interfaces}} dut_switch_ports={{dut_switch_ports}} total_ports={{dut_switch_ports|length}}" + + +- name: Set ptf LAG interfaces + set_fact: + ptf_lag_interfaces: "{{ptf_lag_interfaces + [ (item|replace(\"PortChannel\", \"\")|int / 4)|int ]}}" + with_items: "{{minigraph_portchannels.keys()}}" + +- name: Get switch LAG members + set_fact: + dut_switch_lag_members: "{{dut_switch_lag_members + item['members']}}" + with_items: "{{minigraph_portchannels.values()}}" + +- name: Print LAG members + debug: msg="ptf_lag_interfaces={{ptf_lag_interfaces}} dut_switch_lag_members={{dut_switch_lag_members}}" + + +- name: Init testing port count + set_fact: + testing_ports_count: 0 + testing_ports_id: [] + +- name: Find the not lag ports for testing + set_fact: + testing_ports_id: "{{testing_ports_id + [item]}}" + with_items: "{{ptf_interfaces}}" + when: + - dut_switch_ports[item|int] in minigraph_ports.keys() + - dut_switch_ports[item|int] not in dut_switch_lag_members + - item != '31' # Only for Mellanox testbed. The last port is used for up link from DUT switch + +# TODO: make port selection random +- name: Set DST port ID + set_fact: + dst_port_id: "{{testing_ports_id[0]}}" + +- name: Set DST port 2 ID + set_fact: + dst_port_2_id: "{{testing_ports_id[1]}}" + +- name: Set SRC port ID + set_fact: + src_port_id: "{{testing_ports_id[2]}}" + +- name: Set DST port 3 ID + set_fact: + dst_port_3_id: "{{testing_ports_id[3]}}" + + +- name: Get IPs for non-vlan testing ports + testing_port_ip_facts: + testing_ports_id: "{{testing_ports_id}}" + dut_switch_ports: "{{dut_switch_ports}}" + minigraph_bgp: "{{minigraph_bgp}}" + minigraph_neighbors: "{{minigraph_neighbors}}" + connection: local + +- debug: + var: testing_ports_ip + + +- name: Set unique MACs to PTF interfaces + script: roles/test/files/helpers/change_mac.sh + delegate_to: "{{ptf_host}}" + when: minigraph_vlans | length >0 + + +- set_fact: + vlan_members: "{{minigraph_vlans[minigraph_vlans.keys()[0]]['members']}}" + when: minigraph_vlans | length >0 + +- name: Generate IPs in VLAN range + get_ip_in_range: num="{{dut_switch_ports|length}}" prefix="{{minigraph_vlan_interfaces[0]['addr']}}/{{minigraph_vlan_interfaces[0]['prefixlen']}}" exclude_ips="{{minigraph_vlan_interfaces[0]['addr']}}" + become: no + connection: local + failed_when: False + when: minigraph_vlans | length > 0 + +- debug: + var: generated_ips + +- name: Assign IPs to vlan testing ports + set_fact: + testing_ports_ip: "{{testing_ports_ip | combine({item: generated_ips[item|int].split('/')[0]})}}" + when: + testing_ports_ip[item] is not defined and dut_switch_ports[item|int] in vlan_members + with_items: "{{testing_ports_id}}" + +- debug: + var: testing_ports_ip + + +- name: Set DST port 1 IP + set_fact: + dst_port_ip: "{{testing_ports_ip[dst_port_id]}}" + +- name: Set DST port 2 IP + set_fact: + dst_port_2_ip: "{{testing_ports_ip[dst_port_2_id]}}" + +- name: Set SRC port IP + set_fact: + src_port_ip: "{{testing_ports_ip[src_port_id]}}" + +- name: Set DST port 3 IP + set_fact: + dst_port_3_ip: "{{testing_ports_ip[dst_port_3_id]}}" + + +# Get buffers size +# Ingress lossless +- include: roles/test/tasks/qos_get_max_buff_size.yml + vars: + target_table: 'BUFFER_PG' + target_port_name: "{{dut_switch_ports[src_port_id|int]}}" + target_pg: '3-4' + target_buffer_profile_type: 'ingress lossless' + +- name: Set lossless MAX buffer size + set_fact: + lossless_buffer_max_size: "{{buffer_headroom.stdout|int}}" + +- name: Set lossless ingress buffer pool ROID + set_fact: + lossless_ingr_buf_pool_roid: "{{buffer_pool_roid}}" + + +# Ingress lossy +- include: roles/test/tasks/qos_get_max_buff_size.yml + vars: + target_table: 'BUFFER_PG' + target_port_name: "***{{dut_switch_ports[src_port_id|int]}}***" + target_pg: '0' + target_buffer_profile_type: 'ingress lossy' + +- name: Set lossy MAX buffer size + set_fact: + lossy_buffer_max_size: "{{buffer_max_size}}" + +- name: Set lossy headroom size + set_fact: + lossy_headroom_size: "{{buffer_headroom.stdout|int}}" + +- name: Set lossy ingress buffer pool ROID + set_fact: + lossy_ingr_buf_pool_roid: "{{buffer_pool_roid}}" + + +# Egress lossless +- include: roles/test/tasks/qos_get_max_buff_size.yml + vars: + target_table: 'BUFFER_QUEUE' + target_port_name: "***{{dut_switch_ports[src_port_id|int]}}***" + target_pg: '3-4' + target_buffer_profile_type: 'egress lossless' + +- name: Set MAX queue size for {{dut_switch_ports[src_port_id|int]}} + set_fact: + lossless_queue_max_size: "{{buffer_max_size}}" + +- name: Set lossless egress buffer pool ROID + set_fact: + lossless_egr_buf_pool_roid: "{{buffer_pool_roid}}" + + +# Egress lossy +- include: roles/test/tasks/qos_get_max_buff_size.yml + vars: + target_table: 'BUFFER_QUEUE' + target_port_name: "***{{dut_switch_ports[src_port_id|int]}}***" + target_pg: '0-2' + target_buffer_profile_type: 'egress lossy' + +- name: Set MAX queue size for {{dut_switch_ports[src_port_id|int]}} + set_fact: + lossy_queue_max_size: "{{buffer_max_size}}" + +- name: Set lossy egress buffer pool ROID + set_fact: + lossy_egr_buf_pool_roid: "{{buffer_pool_roid}}" + + +# ECN/WRED +- block: + - name: Determine the target queue of the WRED profile + set_fact: + target_q_wred: '3' + + - debug: var=target_q_wred + +- name: Get the WRED profile key for "{{dut_switch_ports[dst_port_id|int]}}" + shell: redis-cli -n 4 KEYS "QUEUE|{{dut_switch_ports[dst_port_id|int]}}|{{target_q_wred}}" + register: wred_profile_name + +- fail: + msg: "Unable to get the wred profile key for {{dut_switch_ports[dst_port_id|int]}}" + when: wred_profile_name.stdout == "" + +- name: Parse WRED profile key + set_fact: + wred_profile_name="{{wred_profile_name.stdout|replace('[','')|replace(']','')}}" + +- name: Get the WRED profile for "{{dut_switch_ports[dst_port_id|int]}}" + shell: redis-cli -n 4 HGET "{{wred_profile_name}}" wred_profile + register: wred_profile + +- fail: + msg: "Unable to get the buffer profile for {{dut_switch_ports[dst_port_id|int]}}" + when: wred_profile.stdout == "" + +- name: Parse WRED profile name + set_fact: + wred_profile="{{wred_profile.stdout|replace('[','')|replace(']','')}}" + +- name: Get green_max_threshold for {{dut_switch_ports[dst_port_id|int]}} from {{wred_profile}} + shell: redis-cli -n 4 HGET "{{wred_profile}}" green_max_threshold + register: green_max_threshold + +- fail: + msg: "Unable to get the green_max_threshold for {{dut_switch_ports[dst_port_id|int]}} from {{wred_profile}}" + when: green_max_threshold.stdout == "" + +- name: Parse WRED green_max_threshold + set_fact: + green_max_threshold="{{green_max_threshold.stdout|int}}" + +- name: Get yellow_max_threshold for {{dut_switch_ports[dst_port_id|int]}} from {{wred_profile}} + shell: redis-cli -n 4 HGET "{{wred_profile}}" yellow_max_threshold + register: yellow_max_threshold + +- fail: + msg: "Unable to get the yellow_max_threshold for {{dut_switch_ports[dst_port_id|int]}} from {{wred_profile}}" + when: yellow_max_threshold.stdout == "" + +- name: Parse WRED yellow_max_threshold + set_fact: + yellow_max_threshold="{{yellow_max_threshold.stdout|int}}" + +- name: Get red_max_threshold for {{dut_switch_ports[dst_port_id|int]}} from {{wred_profile}} + shell: redis-cli -n 4 HGET "{{wred_profile}}" red_max_threshold + register: red_max_threshold + +- fail: + msg: "Unable to get the red_max_threshold for {{dut_switch_ports[dst_port_id|int]}} from {{wred_profile}}" + when: red_max_threshold.stdout == "" + +- name: Parse WRED red_max_threshold + set_fact: + red_max_threshold="{{red_max_threshold.stdout|int}}" + + +# Get watermark polling status +- debug: + msg="Get watermark counter status before the test" + +- name: Get watermark polling status before the test + shell: redis-cli -n 4 HGET "FLEX_COUNTER_TABLE|QUEUE_WATERMARK" FLEX_COUNTER_STATUS + register: watermark_status + +- debug: + msg="Watermark polling status {{watermark_status.stdout}}" + + +# Get scheduler weight +- name: Set target lossy queue to query the lossy scheduler profile + set_fact: + target_q_sched: '0' + +- debug: var=target_q_sched + +- name: Get lossy scheduler profile for "{{dut_switch_ports[dst_port_id|int]}}" + shell: redis-cli -n 4 HGET "QUEUE|{{dut_switch_ports[dst_port_id|int]}}|{{target_q_sched}}" scheduler + register: lossy_sched_profile + +- fail: + msg: "Unable to get the lossy scheduler profile for {{dut_switch_ports[dst_port_id|int]}}" + when: lossy_sched_profile.stdout == "" + +- name: Process lossy scheduler profile name + set_fact: + lossy_sched_profile="{{lossy_sched_profile.stdout|replace('[','')|replace(']','')}}" + +- name: Get lossy scheduler weight for "{{dut_switch_ports[dst_port_id|int]}}" + shell: redis-cli -n 4 HGET "{{lossy_sched_profile}}" weight + register: lossy_sched_weight + +- fail: + msg: "Unable to get lossy scheduler weight for {{dut_switch_ports[dst_port_id|int]}}" + when: lossy_sched_weight.stdout == "" + + +- name: Set target lossless queue to query the lossless scheduler profile + set_fact: + target_q_sched: '3' + +- debug: var=target_q_sched + +- name: Get lossless scheduler profile "{{dut_switch_ports[dst_port_id|int]}}" + shell: redis-cli -n 4 HGET "QUEUE|{{dut_switch_ports[dst_port_id|int]}}|{{target_q_sched}}" scheduler + register: lossless_sched_profile + +- fail: + msg: "Unable to get the lossless scheduler profile for {{dut_switch_ports[dst_port_id|int]}}" + when: lossless_sched_profile.stdout == "" + +- name: Process lossless scheduler profile name + set_fact: + lossless_sched_profile="{{lossless_sched_profile.stdout|replace('[','')|replace(']','')}}" + +- name: Get lossless scheduler weight for "{{dut_switch_ports[dst_port_id|int]}}" + shell: redis-cli -n 4 HGET "{{lossless_sched_profile}}" weight + register: lossless_sched_weight + +- fail: + msg: "Unable to get lossless scheduler weight for {{dut_switch_ports[dst_port_id|int]}}" + when: lossless_sched_weight.stdout == "" diff --git a/ansible/roles/test/tasks/qos_sai.yml b/ansible/roles/test/tasks/qos_sai.yml new file mode 100644 index 00000000000..c0e6fe42a69 --- /dev/null +++ b/ansible/roles/test/tasks/qos_sai.yml @@ -0,0 +1,604 @@ +# To run ecn test the host system where ptf container resides should have +# optimized sysctl parameter "net.core.rmem_max". Now it's set to 4194304 +# Also the NICs supposed to have maximum buffer size of RX queue +# See: ethtool -g +# ethtool -G p4p1 rx 8192 + +- include_vars: vars/qos.yml + +- block: + - name: Getting minigraph facts + minigraph_facts: host={{inventory_hostname}} + become: no + + - name: check if the device has configured qos parameters + fail: msg="device doesn't have configured qos parameters" + when: minigraph_hwsku is not defined or qos_params[minigraph_hwsku] is not defined + + - name: set qos parameters for the device + set_fact: qp={{qos_params[minigraph_hwsku]}} + + - name: Ensure LLDP Daemon stopped + become: yes + supervisorctl: state=stopped name={{item}} + vars: + ansible_shell_type: docker + ansible_python_interpreter: docker exec -i lldp python + with_items: + - lldpd + - lldp-syncd + + - name: Disable bgpd + become: yes + lineinfile: dest=/etc/quagga/daemons + regexp=^bgpd=.*$ + line='bgpd=no' + notify: + - Restart Quagga Daemon + vars: + ansible_shell_type: docker + ansible_python_interpreter: docker exec -i bgp python + + - meta: flush_handlers + + - block: + - name: Deploy script to DUT/syncd + copy: src=roles/test/files/mlnx/packets_aging.py dest=/root/packets_aging.py + + - name: Disable Mellanox packet aging + shell: python /root/packets_aging.py disable + vars: + ansible_shell_type: docker + ansible_python_interpreter: docker exec -i syncd python + when: minigraph_hwsku is defined and minigraph_hwsku in mellanox_hwskus + + - name: copy ptf tests + copy: src=roles/test/files/ptftests dest=/root + delegate_to: "{{ptf_host}}" + + - name: copy sai tests + copy: src=roles/test/files/saitests dest=/root + delegate_to: "{{ptf_host}}" + + - name: copy portmap + copy: src={{ptf_portmap}} dest=/root + delegate_to: "{{ptf_host}}" + when: minigraph_hwsku is defined and + (minigraph_hwsku in mellanox_hwskus or minigraph_hwsku == 'Arista-7050-QX-32S' + or minigraph_hwsku == 'Arista-7060CX-32S-C32' or minigraph_hwsku == 'Celestica-DX010-C32' + or minigraph_hwsku == 'Arista-7260CX3-D108C8' or minigraph_hwsku == 'Force10-S6100') + + - name: Init PTF base test parameters + set_fact: + ptf_base_params: + - router_mac={% if testbed_type not in ['t0', 't0-64', 't0-116'] %}'{{ansible_Ethernet0['macaddress']}}'{% else %}''{% endif %} + - server='{{ansible_host}}' + - port_map_file='/root/{{ptf_portmap | basename}}' + - sonic_asic_type='{{sonic_asic_type}}' + + - name: Get ports info. + include: roles/test/tasks/qos_get_ports.yml + + # Unpause all paused port + - include: qos_sai_ptf.yml + vars: + test_name: release all paused ports + test_path: sai_qos_tests.ReleaseAllPorts + test_params: [] + + # Populate arps + - name: Check if DUT has ARP aging issue or not + command: arp -n + become: yes + register: arp_entries + + - debug: + var: arp_entries + + - include: qos_sai_ptf.yml + vars: + test_name: populate arp on all ports + test_path: sai_qos_tests.ARPpopulate + test_params: [] + when: testbed_type in ['t0', 't0-64', 't0-116'] or arp_entries.stdout.find('incomplete') == -1 + + - name: Manually add an ARP entry for dst port + command: ip neigh replace {{dst_port_ip}} lladdr 7c:fe:90:5e:6b:a6 dev {{dut_switch_ports[dst_port_id|int]}} + become: yes + when: testbed_type not in ['t0', 't0-64', 't0-116'] and arp_entries.stdout.find('incomplete') != -1 + + # XOFF limit + - include: qos_sai_ptf.yml + vars: + test_name: xoff limit ptf test dscp = {{qp.xoff_1.dscp}}, ecn = {{qp.xoff_1.ecn}} + test_path: sai_qos_tests.PFCtest + test_params: + - dscp='{{qp.xoff_1.dscp}}' + - ecn='{{qp.xoff_1.ecn}}' + - pg='{{qp.xoff_1.pg}}' + - buffer_max_size='{{lossless_buffer_max_size|int}}' + - queue_max_size='{{lossless_queue_max_size|int}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.xoff_1.pkts_num_leak_out}}' + - pkts_num_trig_pfc='{{qp.xoff_1.pkts_num_trig_pfc}}' + - pkts_num_trig_ingr_drp='{{qp.xoff_1.pkts_num_trig_ingr_drp}}' + + - include: qos_sai_ptf.yml + vars: + test_name: xoff limit ptf test dscp = {{qp.xoff_2.dscp}}, ecn = {{qp.xoff_2.ecn}} + test_path: sai_qos_tests.PFCtest + test_params: + - dscp='{{qp.xoff_2.dscp}}' + - ecn='{{qp.xoff_2.ecn}}' + - pg='{{qp.xoff_2.pg}}' + - buffer_max_size='{{lossless_buffer_max_size|int}}' + - queue_max_size='{{lossless_queue_max_size|int}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.xoff_2.pkts_num_leak_out}}' + - pkts_num_trig_pfc='{{qp.xoff_2.pkts_num_trig_pfc}}' + - pkts_num_trig_ingr_drp='{{qp.xoff_2.pkts_num_trig_ingr_drp}}' + + # XON limit + - include: qos_sai_ptf.yml + vars: + test_name: xon limit ptf test dscp = {{qp.xon_1.dscp}}, ecn = {{qp.xon_1.ecn}} + test_path: sai_qos_tests.PFCXonTest + test_params: + - dscp='{{qp.xon_1.dscp}}' + - ecn='{{qp.xon_1.ecn}}' + - pg='{{qp.xon_1.pg}}' + - buffer_max_size='{{lossless_buffer_max_size|int}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - dst_port_2_id='{{dst_port_2_id}}' + - dst_port_2_ip='{{dst_port_2_ip}}' + - dst_port_3_id='{{dst_port_3_id}}' + - dst_port_3_ip='{{dst_port_3_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.xon_1.pkts_num_leak_out}}' + - pkts_num_trig_pfc='{{qp.xon_1.pkts_num_trig_pfc}}' + - pkts_num_dismiss_pfc='{{qp.xon_1.pkts_num_dismiss_pfc}}' + + - include: qos_sai_ptf.yml + vars: + test_name: xon limit ptf test dscp = {{qp.xon_2.dscp}}, ecn = {{qp.xon_2.ecn}} + test_path: sai_qos_tests.PFCXonTest + test_params: + - dscp='{{qp.xon_2.dscp}}' + - ecn='{{qp.xon_2.ecn}}' + - pg='{{qp.xon_2.pg}}' + - buffer_max_size='{{lossless_buffer_max_size|int}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - dst_port_2_id='{{dst_port_2_id}}' + - dst_port_2_ip='{{dst_port_2_ip}}' + - dst_port_3_id='{{dst_port_3_id}}' + - dst_port_3_ip='{{dst_port_3_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.xon_2.pkts_num_leak_out}}' + - pkts_num_trig_pfc='{{qp.xon_2.pkts_num_trig_pfc}}' + - pkts_num_dismiss_pfc='{{qp.xon_2.pkts_num_dismiss_pfc}}' + + # Headroom pool size + - include: qos_sai_ptf.yml + vars: + test_name: headroom pool size ptf test ecn = {{qp.hdrm_pool_size.ecn}} + test_path: sai_qos_tests.HdrmPoolSizeTest + test_params: + - dscps={{qp.hdrm_pool_size.dscps}} + - ecn={{qp.hdrm_pool_size.ecn}} + - pgs={{qp.hdrm_pool_size.pgs}} + - src_port_ids={{qp.hdrm_pool_size.src_port_ids}} + - src_port_ips=[{% for pid in qp.hdrm_pool_size.src_port_ids %}{% if not loop.last %}'{{testing_ports_ip[pid|string]}}', {% else %}'{{testing_ports_ip[pid|string]}}'{% endif %}{% endfor %}] + - dst_port_id={{qp.hdrm_pool_size.dst_port_id}} + - dst_port_ip='{{testing_ports_ip[qp.hdrm_pool_size.dst_port_id|string]}}' + - pgs_num={{qp.hdrm_pool_size.pgs_num }} + - pkts_num_leak_out={{qp.hdrm_pool_size.pkts_num_leak_out}} + - pkts_num_trig_pfc={{qp.hdrm_pool_size.pkts_num_trig_pfc}} + - pkts_num_hdrm_full={{qp.hdrm_pool_size.pkts_num_hdrm_full}} + - pkts_num_hdrm_partial={{qp.hdrm_pool_size.pkts_num_hdrm_partial}} + when: minigraph_hwsku is defined and + (minigraph_hwsku == 'Arista-7060CX-32S-C32' or minigraph_hwsku == 'Celestica-DX010-C32' or minigraph_hwsku == 'Arista-7260CX3-D108C8' + or minigraph_hwsku == 'Force10-S6100') + + # Lossy queue + - include: qos_sai_ptf.yml + vars: + test_name: Lossy queue, shared buffer dynamic allocation. dscp = {{qp.lossy_queue_1.dscp}}, ecn = {{qp.lossy_queue_1.ecn}} + test_path: sai_qos_tests.LossyQueueTest + test_params: + - dscp='{{qp.lossy_queue_1.dscp}}' + - ecn='{{qp.lossy_queue_1.ecn}}' + - pg='{{qp.lossy_queue_1.pg}}' + - buffer_max_size='{{lossy_buffer_max_size|int}}' + - headroom_size='{{lossy_headroom_size}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - dst_port_2_id='{{dst_port_2_id}}' + - dst_port_2_ip='{{dst_port_2_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.lossy_queue_1.pkts_num_leak_out}}' + - pkts_num_trig_egr_drp='{{qp.lossy_queue_1.pkts_num_trig_egr_drp}}' + + # DSCP to queue mapping + - include: qos_sai_ptf.yml + vars: + test_name: dscp to queue mapping ptf test + test_path: sai_qos_tests.DscpMappingPB + test_params: + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + + # WRR test + - include: qos_sai_ptf.yml + vars: + test_name: DWRR + test_path: sai_qos_tests.WRRtest + test_params: + - ecn='{{qp.wrr.ecn}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - q0_num_of_pkts='{{qp.wrr.q0_num_of_pkts}}' + - q1_num_of_pkts='{{qp.wrr.q1_num_of_pkts}}' + - q2_num_of_pkts='{{qp.wrr.q2_num_of_pkts}}' + - q3_num_of_pkts='{{qp.wrr.q3_num_of_pkts}}' + - q4_num_of_pkts='{{qp.wrr.q4_num_of_pkts}}' + - q5_num_of_pkts='{{qp.wrr.q5_num_of_pkts}}' + - q6_num_of_pkts='{{qp.wrr.q6_num_of_pkts}}' + - limit='{{qp.wrr.limit}}' + - pkts_num_leak_out='{{qp.wrr.pkts_num_leak_out}}' + - debug: + var: out.stdout_lines + + # Clear all watermarks before each watermark test + # because of the clear on read polling mode + - name: Toggle watermark polling + shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' + + # PG shared watermark test + - include: qos_sai_ptf.yml + vars: + test_name: PG shared watermark test, lossless traffic + test_path: sai_qos_tests.PGSharedWatermarkTest + test_params: + - dscp='{{qp.wm_pg_shared_lossless.dscp}}' + - ecn='{{qp.wm_pg_shared_lossless.ecn}}' + - pg='{{qp.wm_pg_shared_lossless.pg}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.wm_pg_shared_lossless.pkts_num_leak_out}}' + - pkts_num_fill_min='{{qp.wm_pg_shared_lossless.pkts_num_fill_min}}' + - pkts_num_fill_shared='{{qp.wm_pg_shared_lossless.pkts_num_trig_pfc}}' + - cell_size='{{qp.wm_pg_shared_lossless.cell_size}}' + - debug: + var: out.stdout_lines + + # Clear all watermarks before each watermark test + # because of the clear on read polling mode + - name: Toggle watermark polling + shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' + + # PG shared watermark test + - include: qos_sai_ptf.yml + vars: + test_name: PG shared watermark test, lossy traffic + test_path: sai_qos_tests.PGSharedWatermarkTest + test_params: + - dscp='{{qp.wm_pg_shared_lossy.dscp}}' + - ecn='{{qp.wm_pg_shared_lossy.ecn}}' + - pg='{{qp.wm_pg_shared_lossy.pg}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.wm_pg_shared_lossy.pkts_num_leak_out}}' + - pkts_num_fill_min='{{qp.wm_pg_shared_lossy.pkts_num_fill_min}}' + - pkts_num_fill_shared='{{qp.wm_pg_shared_lossy.pkts_num_trig_egr_drp|int - 1}}' + - cell_size='{{qp.wm_pg_shared_lossy.cell_size}}' + - debug: + var: out.stdout_lines + + # Clear all watermarks before each watermark test + # because of the clear on read polling mode + - name: Toggle watermark polling + shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' + + # PG headroom watermark test + - include: qos_sai_ptf.yml + vars: + test_name: PG headroom watermark test + test_path: sai_qos_tests.PGHeadroomWatermarkTest + test_params: + - dscp='{{qp.wm_pg_headroom.dscp}}' + - ecn='{{qp.wm_pg_headroom.ecn}}' + - pg='{{qp.wm_pg_headroom.pg}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.wm_pg_headroom.pkts_num_leak_out}}' + - pkts_num_trig_pfc='{{qp.wm_pg_headroom.pkts_num_trig_pfc}}' + - pkts_num_trig_ingr_drp='{{qp.wm_pg_headroom.pkts_num_trig_ingr_drp}}' + - cell_size='{{qp.wm_pg_headroom.cell_size}}' + - debug: + var: out.stdout_lines + + # Clear all watermarks before each watermark test + # because of the clear on read polling mode + - name: Toggle watermark polling + shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' + + # Queue shared watermark test + - include: qos_sai_ptf.yml + vars: + test_name: Queue shared watermark test, lossless traffic + test_path: sai_qos_tests.QSharedWatermarkTest + test_params: + - dscp='{{qp.wm_q_shared_lossless.dscp}}' + - ecn='{{qp.wm_q_shared_lossless.ecn}}' + - queue='{{qp.wm_q_shared_lossless.queue}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.wm_q_shared_lossless.pkts_num_leak_out}}' + - pkts_num_fill_min='{{qp.wm_q_shared_lossless.pkts_num_fill_min}}' + - pkts_num_trig_drp='{{qp.wm_q_shared_lossless.pkts_num_trig_ingr_drp}}' + - cell_size='{{qp.wm_q_shared_lossless.cell_size}}' + - debug: + var: out.stdout_lines + + # Clear all watermarks before each watermark test + # because of the clear on read polling mode + - name: Toggle watermark polling + shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' + + # Queue shared watermark test + - include: qos_sai_ptf.yml + vars: + test_name: Queue shared watermark test, lossy traffic + test_path: sai_qos_tests.QSharedWatermarkTest + test_params: + - dscp='{{qp.wm_q_shared_lossy.dscp}}' + - ecn='{{qp.wm_q_shared_lossy.ecn}}' + - queue='{{qp.wm_q_shared_lossy.queue}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.wm_q_shared_lossy.pkts_num_leak_out}}' + - pkts_num_fill_min='{{qp.wm_q_shared_lossy.pkts_num_fill_min}}' + - pkts_num_trig_drp='{{qp.wm_q_shared_lossy.pkts_num_trig_egr_drp}}' + - cell_size='{{qp.wm_q_shared_lossy.cell_size}}' + - debug: + var: out.stdout_lines + + # Clear all watermarks before each watermark test + # because of the clear on read polling mode + - name: Toggle watermark polling + shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' + + # buffer pool watermark test + - include: qos_sai_ptf.yml + vars: + test_name: Ingress buffer pool watermark test, lossless traffic + test_path: sai_qos_tests.BufferPoolWatermarkTest + test_params: + - dscp='{{qp.wm_buf_pool_lossless.dscp}}' + - ecn='{{qp.wm_buf_pool_lossless.ecn}}' + - pg='{{qp.wm_buf_pool_lossless.pg}}' + - queue='' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.wm_buf_pool_lossless.pkts_num_leak_out}}' + - pkts_num_fill_min='{{qp.wm_buf_pool_lossless.pkts_num_fill_ingr_min}}' + - pkts_num_fill_shared='{{qp.wm_buf_pool_lossless.pkts_num_trig_pfc}}' + - cell_size='{{qp.wm_buf_pool_lossless.cell_size}}' + - buf_pool_roid='{{lossless_ingr_buf_pool_roid}}' + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7050-Qx-32S') + - debug: + var: out.stdout_lines + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7050-Qx-32S') + + # Clear all watermarks before each watermark test + # because of the clear on read polling mode + - name: Toggle watermark polling + shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' + + # buffer pool watermark test + - include: qos_sai_ptf.yml + vars: + test_name: Egress buffer pool watermark test, lossless traffic + test_path: sai_qos_tests.BufferPoolWatermarkTest + test_params: + - dscp='{{qp.wm_buf_pool_lossless.dscp}}' + - ecn='{{qp.wm_buf_pool_lossless.ecn}}' + - pg='' + - queue='{{qp.wm_buf_pool_lossless.queue}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.wm_buf_pool_lossless.pkts_num_leak_out}}' + - pkts_num_fill_min='{{qp.wm_buf_pool_lossless.pkts_num_fill_egr_min}}' + - pkts_num_fill_shared='{{qp.wm_buf_pool_lossless.pkts_num_trig_ingr_drp|int - 1}}' + - cell_size='{{qp.wm_buf_pool_lossless.cell_size}}' + - buf_pool_roid='{{lossless_egr_buf_pool_roid}}' + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7050-Qx-32S') + - debug: + var: out.stdout_lines + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7050-Qx-32S') + + # Clear all watermarks before each watermark test + # because of the clear on read polling mode + - name: Toggle watermark polling + shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' + + # buffer pool watermark test + - include: qos_sai_ptf.yml + vars: + test_name: Ingress buffer pool watermark test, lossy traffic + test_path: sai_qos_tests.BufferPoolWatermarkTest + test_params: + - dscp='{{qp.wm_buf_pool_lossy.dscp}}' + - ecn='{{qp.wm_buf_pool_lossy.ecn}}' + - pg='{{qp.wm_buf_pool_lossy.pg}}' + - queue='' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.wm_buf_pool_lossy.pkts_num_leak_out}}' + - pkts_num_fill_min='{{qp.wm_buf_pool_lossy.pkts_num_fill_ingr_min}}' + - pkts_num_fill_shared='{{qp.wm_buf_pool_lossy.pkts_num_trig_egr_drp|int - 1}}' + - cell_size='{{qp.wm_buf_pool_lossy.cell_size}}' + - buf_pool_roid='{{lossy_ingr_buf_pool_roid}}' + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7050-Qx-32S') + - debug: + var: out.stdout_lines + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7050-Qx-32S') + + # Clear all watermarks before each watermark test + # because of the clear on read polling mode + - name: Toggle watermark polling + shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' + + # buffer pool watermark test + - include: qos_sai_ptf.yml + vars: + test_name: Egress buffer pool watermark test, lossy traffic + test_path: sai_qos_tests.BufferPoolWatermarkTest + test_params: + - dscp='{{qp.wm_buf_pool_lossy.dscp}}' + - ecn='{{qp.wm_buf_pool_lossy.ecn}}' + - pg='' + - queue='{{qp.wm_buf_pool_lossy.queue}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.wm_buf_pool_lossy.pkts_num_leak_out}}' + - pkts_num_fill_min='{{qp.wm_buf_pool_lossy.pkts_num_fill_egr_min}}' + - pkts_num_fill_shared='{{qp.wm_buf_pool_lossy.pkts_num_trig_egr_drp|int - 1}}' + - cell_size='{{qp.wm_buf_pool_lossy.cell_size}}' + - buf_pool_roid='{{lossy_egr_buf_pool_roid}}' + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7050-Qx-32S') + - debug: + var: out.stdout_lines + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7050-Qx-32S') + + # DSCP to pg mapping + - include: qos_sai_ptf.yml + vars: + test_name: dscp to pg mapping ptf test + test_path: sai_qos_tests.DscpToPgMapping + test_params: + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - debug: + var: out.stdout_lines + + # Change lossy and lossless scheduler weights + - name: Change lossy scheduler weight to {{qp.wrr_chg.lossy_weight}} + command: redis-cli -n 4 HSET "{{lossy_sched_profile}}" weight {{qp.wrr_chg.lossy_weight}} + + - name: Change lossless scheduler weight to {{qp.wrr_chg.lossless_weight}} + command: redis-cli -n 4 HSET "{{lossless_sched_profile}}" weight {{qp.wrr_chg.lossless_weight}} + + # WRR test + - include: qos_sai_ptf.yml + vars: + test_name: DWRR runtime weight change + test_path: sai_qos_tests.WRRtest + test_params: + - ecn='{{qp.wrr_chg.ecn}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - q0_num_of_pkts='{{qp.wrr_chg.q0_num_of_pkts}}' + - q1_num_of_pkts='{{qp.wrr_chg.q1_num_of_pkts}}' + - q2_num_of_pkts='{{qp.wrr_chg.q2_num_of_pkts}}' + - q3_num_of_pkts='{{qp.wrr_chg.q3_num_of_pkts}}' + - q4_num_of_pkts='{{qp.wrr_chg.q4_num_of_pkts}}' + - q5_num_of_pkts='{{qp.wrr_chg.q5_num_of_pkts}}' + - q6_num_of_pkts='{{qp.wrr_chg.q6_num_of_pkts}}' + - limit='{{qp.wrr_chg.limit}}' + - pkts_num_leak_out='{{qp.wrr_chg.pkts_num_leak_out}}' + - debug: + var: out.stdout_lines + + # Restore lossy and lossless scheduler weights + - name: Restore lossy scheduler weight to {{lossy_sched_weight}} + command: redis-cli -n 4 HSET "{{lossy_sched_profile}}" weight "{{lossy_sched_weight.stdout}}" + + - name: Restore lossless scheduler weight to {{lossless_sched_weight}} + command: redis-cli -n 4 HSET "{{lossless_sched_profile}}" weight "{{lossless_sched_weight.stdout}}" + + always: + - name: Restore LLDP Daemon + become: yes + supervisorctl: state=started name={{item}} + vars: + ansible_shell_type: docker + ansible_python_interpreter: docker exec -i lldp python + with_items: + - lldpd + - lldp-syncd + + - name: Enable bgpd + become: yes + lineinfile: dest=/etc/quagga/daemons + regexp=^bgpd=.*$ + line='bgpd=yes' + notify: + - Restart Quagga Daemon + vars: + ansible_shell_type: docker + ansible_python_interpreter: docker exec -i bgp python + + - name: Restore original watermark polling status + shell: counterpoll watermark {{watermark_status.stdout}} + when: watermark_status.stdout == "enable" or watermark_status.stdout == "disable" + + - name: Restore lossy scheduler weight to {{lossy_sched_weight}} + command: redis-cli -n 4 HSET "{{lossy_sched_profile}}" weight "{{lossy_sched_weight.stdout}}" + + - name: Restore lossless scheduler weight to {{lossless_sched_weight}} + command: redis-cli -n 4 HSET "{{lossless_sched_profile}}" weight "{{lossless_sched_weight.stdout}}" + + - name: Enable Mellanox packet aging + shell: python /root/packets_aging.py enable + vars: + ansible_shell_type: docker + ansible_python_interpreter: docker exec -i syncd python + when: minigraph_hwsku is defined and minigraph_hwsku in mellanox_hwskus + + - meta: flush_handlers diff --git a/ansible/roles/test/tasks/qos_sai_ptf.yml b/ansible/roles/test/tasks/qos_sai_ptf.yml new file mode 100644 index 00000000000..2981755ba79 --- /dev/null +++ b/ansible/roles/test/tasks/qos_sai_ptf.yml @@ -0,0 +1,16 @@ +- name: Set parameters for specific test + set_fact: ptf_qos_params="{{ptf_base_params + test_params}}" + +- name: "{{test_name}}" + shell: ptf --test-dir saitests {{test_path}} --platform-dir ptftests --platform remote -t "{{ptf_qos_params|join(';')}}" --disable-ipv6 --disable-vxlan --disable-geneve --disable-erspan --disable-mpls --disable-nvgre {{extra_options | default("")}} 2>&1 + args: + chdir: /root + delegate_to: "{{ptf_host}}" + failed_when: False + register: out + +- debug: var=out.stdout_lines + when: out.rc != 0 + +- fail: msg="Failed test '{{test_name}}'" + when: out.rc != 0 diff --git a/ansible/roles/test/templates/qos_lossy_profile.j2 b/ansible/roles/test/templates/qos_lossy_profile.j2 new file mode 100644 index 00000000000..2b377863015 --- /dev/null +++ b/ansible/roles/test/templates/qos_lossy_profile.j2 @@ -0,0 +1,24 @@ +{ + {% if pfc_generate_buffer_profile == 'True' %} + "BUFFER_PROFILE": { + "pg_lossy_TEST_profile": { + "dynamic_th": "-8", + "pool": "[{{ buffer_pool_id }}]", + {% if buffer_headroom.stdout != '0' %} + "size": "{{ buffer_headroom.stdout }}" + {% else %} + "size": "15000" + {% endif %} + } + }, + {% endif %} + "BUFFER_PG": { + "{{ dut_switch_ports[src_port_id|int] }}|0-1": { + {% if pfc_generate_buffer_profile == 'True' %} + "profile": "[BUFFER_PROFILE|pg_lossy_TEST_profile]" + {% else %} + "profile": "[{{ buffer_profile }}]" + {% endif %} + } + } +} diff --git a/ansible/roles/test/templates/qos_pfc_profile.j2 b/ansible/roles/test/templates/qos_pfc_profile.j2 new file mode 100644 index 00000000000..9b0650ea5e3 --- /dev/null +++ b/ansible/roles/test/templates/qos_pfc_profile.j2 @@ -0,0 +1,22 @@ +{ + {% if pfc_generate_buffer_profile == 'True' %} + "BUFFER_PROFILE": { + "pg_lossless_PFC_TEST_profile": { + "xon": "{{ buffer_xon.stdout }}", + "dynamic_th": "-8", + "xoff": "{{ buffer_xoff.stdout }}", + "pool": "[{{ buffer_pool_id }}]", + "size": "{{ buffer_headroom.stdout }}" + } + }, + {% endif %} + "BUFFER_PG": { + "{{ dut_switch_ports[src_port_id|int] }}|3-4": { + {% if pfc_generate_buffer_profile == 'True' %} + "profile": "[BUFFER_PROFILE|pg_lossless_PFC_TEST_profile]" + {% else %} + "profile": "[{{ buffer_profile }}]" + {% endif %} + } + } +} diff --git a/ansible/roles/test/vars/testcases.yml b/ansible/roles/test/vars/testcases.yml index e1f084eee35..73f67669ec1 100644 --- a/ansible/roles/test/vars/testcases.yml +++ b/ansible/roles/test/vars/testcases.yml @@ -188,6 +188,10 @@ testcases: qos: filename: qos.yml topologies: [ptf32, ptf64] + + qos_sai: + filename: qos_sai.yml + topologies: [ptf32, ptf64, t1, t1-lag, t0, t0-64, t0-116] reboot: filename: reboot.yml diff --git a/ansible/vars/qos.yml b/ansible/vars/qos.yml new file mode 100644 index 00000000000..201411e9cc3 --- /dev/null +++ b/ansible/vars/qos.yml @@ -0,0 +1,712 @@ +--- + +# TBD for ACS-MSN2700 xon_1, xon_2: +# Once the new fw version with the fix is burned, should change the +# xon_th_pkts to 10687 +# xoff_th_pkts to 0 +# since xon_th and xoff_th are configured to the same value. +# The current parameters are according to current fw behavior +# +# ecn: +# Dictate the ECN field in assembling a packet +# 0 - not-ECT; 1 - ECT(1); 2 - ECT(0); 3 - CE +# +# ecn_* profile is for ECN limit test, which is removed +# +# Arista-7260CX3-D108C8: +# xoff_1 for 50G +# xoff_2 for 100G +qos_params: + ACS-MSN2700: + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + ecn_1: + dscp: 8 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 96 + ecn_2: + dscp: 8 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 96 + ecn_3: + dscp: 0 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 96 + ecn_4: + dscp: 0 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 96 + lossy_queue: + dscp: 8 + ecn: 1 + pg: 1 + wrr: + ecn: 1 + q0_num_of_pkts: 600 + q1_num_of_pkts: 400 + q3_num_of_pkts: 500 + q4_num_of_pkts: 500 + limit: 80 + Mellanox-SN2700: + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 11115 + pkts_num_trig_ingr_drp: 11213 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 11115 + pkts_num_trig_ingr_drp: 11213 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 11115 + pkts_num_dismiss_pfc: 10924 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 11115 + pkts_num_dismiss_pfc: 10924 + ecn_1: + dscp: 8 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 96 + ecn_2: + dscp: 8 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 96 + ecn_3: + dscp: 0 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 96 + ecn_4: + dscp: 0 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 96 + lossy_queue: + dscp: 8 + ecn: 1 + pg: 1 + pkts_num_leak_out: 0 + pkts_num_trig_egr_drp: 48547 + wrr: + ecn: 1 + q0_num_of_pkts: 600 + q1_num_of_pkts: 400 + q3_num_of_pkts: 500 + q4_num_of_pkts: 500 + limit: 80 + pkts_num_leak_out: 0 + ACS-MSN2740: + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + ecn_1: + dscp: 8 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 96 + ecn_2: + dscp: 8 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 96 + ecn_3: + dscp: 0 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 96 + ecn_4: + dscp: 0 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 96 + lossy_queue: + dscp: 8 + ecn: 1 + pg: 1 + wrr: + ecn: 1 + q0_num_of_pkts: 600 + q1_num_of_pkts: 400 + q3_num_of_pkts: 500 + q4_num_of_pkts: 500 + limit: 80 + Arista-7050-QX-32S: + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 48 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 48 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 48 + pkts_num_trig_pfc: 4898 + pkts_num_dismiss_pfc: 12 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 48 + pkts_num_trig_pfc: 4898 + pkts_num_dismiss_pfc: 12 + ecn_1: + dscp: 8 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 208 + ecn_2: + dscp: 8 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 208 + ecn_3: + dscp: 0 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 208 + ecn_4: + dscp: 0 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 208 + lossy_queue_1: + dscp: 8 + ecn: 1 + pg: 1 + pkts_num_leak_out: 48 + pkts_num_trig_egr_drp: 31322 + wrr: + ecn: 1 + q0_num_of_pkts: 140 + q1_num_of_pkts: 140 + q2_num_of_pkts: 140 + q3_num_of_pkts: 150 + q4_num_of_pkts: 150 + q5_num_of_pkts: 140 + q6_num_of_pkts: 140 + limit: 80 + pkts_num_leak_out: 48 + wm_pg_shared_lossless: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 48 + pkts_num_fill_min: 6 + pkts_num_trig_pfc: 4898 + cell_size: 208 + wm_pg_shared_lossy: + dscp: 1 + ecn: 1 + pg: 0 + pkts_num_leak_out: 48 + pkts_num_fill_min: 0 + pkts_num_trig_egr_drp: 31322 + cell_size: 208 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 48 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + cell_size: 208 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_leak_out: 48 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 5164 + cell_size: 208 + wm_q_shared_lossy: + dscp: 1 + ecn: 1 + queue: 1 + pkts_num_leak_out: 48 + pkts_num_fill_min: 8 + pkts_num_trig_egr_drp: 31322 + cell_size: 208 + Force10-S6100: + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 19 + pkts_num_trig_pfc: 1458 + pkts_num_trig_ingr_drp: 1979 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 19 + pkts_num_trig_pfc: 1458 + pkts_num_trig_ingr_drp: 1979 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 19 + pkts_num_trig_pfc: 1458 + pkts_num_dismiss_pfc: 11 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 19 + pkts_num_trig_pfc: 1458 + pkts_num_dismiss_pfc: 11 + ecn_1: + dscp: 8 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 208 + ecn_2: + dscp: 8 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 208 + ecn_3: + dscp: 0 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 208 + ecn_4: + dscp: 0 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 208 + lossy_queue_1: + dscp: 8 + ecn: 1 + pg: 1 + pkts_num_leak_out: 19 + pkts_num_trig_egr_drp: 9887 + wrr: + ecn: 1 + q0_num_of_pkts: 140 + q1_num_of_pkts: 140 + q2_num_of_pkts: 140 + q3_num_of_pkts: 150 + q4_num_of_pkts: 150 + q5_num_of_pkts: 140 + q6_num_of_pkts: 140 + limit: 80 + pkts_num_leak_out: 19 + hdrm_pool_size: + dscps: [3, 4] + ecn: 1 + pgs: [3, 4] + src_port_ids: [25, 26, 27, 40, 41] + dst_port_id: 24 + pgs_num: 10 + pkts_num_leak_out: 19 + pkts_num_trig_pfc: 732 + pkts_num_hdrm_full: 520 + pkts_num_hdrm_partial: 361 + Arista-7060CX-32S-C32: + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 36 + pkts_num_trig_pfc: 1458 + pkts_num_trig_ingr_drp: 2751 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 36 + pkts_num_trig_pfc: 1458 + pkts_num_trig_ingr_drp: 2751 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 36 + pkts_num_trig_pfc: 1458 + pkts_num_dismiss_pfc: 11 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 36 + pkts_num_trig_pfc: 1458 + pkts_num_dismiss_pfc: 11 + ecn_1: + dscp: 8 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 208 + ecn_2: + dscp: 8 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 208 + ecn_3: + dscp: 0 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 208 + ecn_4: + dscp: 0 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 208 + lossy_queue_1: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_leak_out: 36 + pkts_num_trig_egr_drp: 9887 + wrr: + ecn: 1 + q0_num_of_pkts: 140 + q1_num_of_pkts: 140 + q2_num_of_pkts: 140 + q3_num_of_pkts: 150 + q4_num_of_pkts: 150 + q5_num_of_pkts: 140 + q6_num_of_pkts: 140 + limit: 80 + pkts_num_leak_out: 36 + hdrm_pool_size: + dscps: [3, 4] + ecn: 1 + pgs: [3, 4] + src_port_ids: [17, 18] + dst_port_id: 16 + pgs_num: 4 + pkts_num_leak_out: 36 + pkts_num_trig_pfc: 1095 + pkts_num_hdrm_full: 1292 + pkts_num_hdrm_partial: 1165 + Celestica-DX010-C32: + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 36 + pkts_num_trig_pfc: 1458 + pkts_num_trig_ingr_drp: 2751 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 36 + pkts_num_trig_pfc: 1458 + pkts_num_trig_ingr_drp: 2751 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 36 + pkts_num_trig_pfc: 1458 + pkts_num_dismiss_pfc: 11 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 36 + pkts_num_trig_pfc: 1458 + pkts_num_dismiss_pfc: 11 + ecn_1: + dscp: 8 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 208 + ecn_2: + dscp: 8 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 208 + ecn_3: + dscp: 0 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 208 + ecn_4: + dscp: 0 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 208 + lossy_queue_1: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_leak_out: 36 + pkts_num_trig_egr_drp: 9887 + wrr: + ecn: 1 + q0_num_of_pkts: 140 + q1_num_of_pkts: 140 + q2_num_of_pkts: 140 + q3_num_of_pkts: 150 + q4_num_of_pkts: 150 + q5_num_of_pkts: 140 + q6_num_of_pkts: 140 + limit: 80 + pkts_num_leak_out: 36 + wrr_chg: + ecn: 1 + q0_num_of_pkts: 80 + q1_num_of_pkts: 80 + q2_num_of_pkts: 80 + q3_num_of_pkts: 300 + q4_num_of_pkts: 300 + q5_num_of_pkts: 80 + q6_num_of_pkts: 80 + limit: 80 + pkts_num_leak_out: 36 + lossy_weight: 8 + lossless_weight: 30 + hdrm_pool_size: + dscps: [3, 4] + ecn: 1 + pgs: [3, 4] + src_port_ids: [17, 18] + dst_port_id: 16 + pgs_num: 4 + pkts_num_leak_out: 36 + pkts_num_trig_pfc: 1095 + pkts_num_hdrm_full: 1292 + pkts_num_hdrm_partial: 1165 + wm_pg_shared_lossless: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 36 + pkts_num_fill_min: 6 + pkts_num_trig_pfc: 1458 + cell_size: 208 + wm_pg_shared_lossy: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_leak_out: 36 + pkts_num_fill_min: 0 + pkts_num_trig_egr_drp: 9887 + cell_size: 208 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 36 + pkts_num_trig_pfc: 1458 + pkts_num_trig_ingr_drp: 2751 + cell_size: 208 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_leak_out: 36 + pkts_num_fill_min: 8 + pkts_num_trig_ingr_drp: 2751 + cell_size: 208 + wm_q_shared_lossy: + dscp: 8 + ecn: 1 + queue: 0 + pkts_num_leak_out: 36 + pkts_num_fill_min: 8 + pkts_num_trig_egr_drp: 9887 + cell_size: 208 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_leak_out: 36 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 1458 + pkts_num_trig_ingr_drp: 2751 + pkts_num_fill_egr_min: 8 + cell_size: 208 + wm_buf_pool_lossy: + dscp: 8 + ecn: 1 + pg: 0 + queue: 0 + pkts_num_leak_out: 36 + pkts_num_fill_ingr_min: 0 + pkts_num_trig_egr_drp: 9887 + pkts_num_fill_egr_min: 8 + cell_size: 208 + Arista-7260CX3-D108C8: + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 22 + pkts_num_trig_pfc: 4386 + pkts_num_trig_ingr_drp: 4657 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 31 + pkts_num_trig_pfc: 4386 + pkts_num_trig_ingr_drp: 4853 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 22 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 22 + ecn_1: + dscp: 8 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 208 + ecn_2: + dscp: 8 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 208 + ecn_3: + dscp: 0 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 208 + ecn_4: + dscp: 0 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 208 + lossy_queue: + dscp: 8 + ecn: 1 + pg: 1 + pkts_num_leak_out: 22 + pkts_num_trig_egr_drp: 10522 + wrr: + ecn: 1 + q0_num_of_pkts: 600 + q1_num_of_pkts: 400 + q3_num_of_pkts: 500 + q4_num_of_pkts: 500 + limit: 80 + pkts_num_leak_out: 22 From 8b0c5823fddc49bf66ad5e2ee85127082f2eebf2 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Tue, 15 Oct 2019 23:54:21 +0800 Subject: [PATCH 122/218] [bgp_speaker] Flush secondary IP address assigned to PTF interface in testing (#1131) The BGP speaker testing need to assign two secondary IP addresses to a PTF interface. The cleanup section tries to flush the previously assigned IP addresses. However, it flushes the wrong interface. This fix is flush IP address on the correct PTF interface. Signed-off-by: Xin Wang --- ansible/roles/test/tasks/bgp_speaker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/tasks/bgp_speaker.yml b/ansible/roles/test/tasks/bgp_speaker.yml index 86cf8581740..248aea38846 100644 --- a/ansible/roles/test/tasks/bgp_speaker.yml +++ b/ansible/roles/test/tasks/bgp_speaker.yml @@ -212,5 +212,5 @@ with_items: "{{vlan_ips}}" - name: Remove Assigned IPs - shell: ip addr flush dev eth{{ '%d' % (minigraph_vlans[minigraph_vlan_interfaces[0]['attachto']]['members'][0] | replace("Ethernet", "") | int / 4)}} + shell: ip addr flush dev eth{{ '%d' % (minigraph_port_indices[minigraph_vlans[minigraph_vlan_interfaces[0]['attachto']]['members'][0]])}} delegate_to: "{{ptf_host}}" From 342269cc4985f7aeed9385a16442aff95fced71e Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Wed, 2 Oct 2019 18:54:59 -0700 Subject: [PATCH 123/218] Dump only the current result and summary files for debugging and troubleshooting purpose (#1137) Signed-off-by: Wenda Ni --- ansible/roles/test/files/tools/loganalyzer/loganalyzer_end.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_end.yml b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_end.yml index d3ab2301feb..ab2047def33 100644 --- a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_end.yml +++ b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_end.yml @@ -8,7 +8,7 @@ test_fetch_dir: test/{{ inventory_hostname }} # Output content of result files to ansible console -- shell: cat {{ test_out_dir }}/* +- shell: cat {{ test_out_dir }}/{{ result_file }} {{ test_out_dir }}/{{ summary_file }} register: out - debug: var=out.stdout_lines From 1a9d00dfae5ce36c6d352e8c5a19e42e7e0b5785 Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Thu, 3 Oct 2019 10:29:44 -0700 Subject: [PATCH 124/218] Add the capability to check if the number of exact matches is equal to the target number (#1138) Signed-off-by: Wenda Ni --- .../files/tools/loganalyzer/loganalyzer_end.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_end.yml b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_end.yml index ab2047def33..e1988cf8ae2 100644 --- a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_end.yml +++ b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_end.yml @@ -20,8 +20,23 @@ shell: grep "TOTAL EXPECTED MISSING MATCHES" "{{ test_out_dir }}/{{ summary_file }}" | sed -n "s/TOTAL EXPECTED MISSING MATCHES:[[:space:]]*//p" register: expected_missing_matches +- debug: msg={{expected_missing_matches}} + +- name: Check if loganalyzer gets the exact number of expected messages + shell: grep "TOTAL EXPECTED MATCHES" "{{ test_out_dir }}/{{ summary_file }}" | sed -n "s/TOTAL EXPECTED MATCHES:[[:space:]]*//p" + register: expected_matches + when: expected_matches_target is defined and expected_matches_target != "" + +- debug: msg={{expected_matches}} + when: expected_matches_target is defined and expected_matches_target != "" + - set_fact: fail_in_logs: "{{ errors_found.stdout != \"0\" or expected_missing_matches.stdout != \"0\" }}" + when: expected_matches_target is not defined or expected_matches_target == "" + +- set_fact: + fail_in_logs: "{{ errors_found.stdout != \"0\" or expected_missing_matches.stdout != \"0\" or expected_matches.stdout != expected_matches_target|string }}" + when: expected_matches_target is defined and expected_matches_target != "" - set_fact: dump_since: '1 hour ago' From 05986b0b9e4a8742c98ef3b8ae58855608077199 Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Fri, 4 Oct 2019 09:46:09 -0700 Subject: [PATCH 125/218] Use lookup('pipe', 'date +%H:%M:%S') in place of ansible_date_time.time (#1136) * Use lookup('pipe', 'date +%H:%M:%S') in place of ansible_date_time.time, which uses cached time for a certain period of time https://github.com/ansible/ansible/issues/22561 Signed-off-by: Wenda Ni * Addres comment Signed-off-by: Wenda Ni --- .../roles/test/files/tools/loganalyzer/loganalyzer_init.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_init.yml b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_init.yml index a90106869aa..c43c21ea2d4 100644 --- a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_init.yml +++ b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_init.yml @@ -18,8 +18,8 @@ when: expect_file is not defined - set_fact: - testname_unique: "{{ testname }}.{{lookup('pipe','date +%Y-%m-%d-%H:%M:%S')}}" - when: testname_unique is not defined + testname_unique: "{{ testname }}.{{ lookup('pipe', 'date +%Y-%m-%d.%H:%M:%S') }}" + when: testname_unique is not defined or (testname_unique_gen is defined and testname_unique_gen == true) - set_fact: test_out_dir: "{{ out_dir }}/{{ testname_unique }}" From 4a14c38aecb84c04b552d7e4bc3f25825779976f Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Thu, 3 Oct 2019 10:27:10 -0700 Subject: [PATCH 126/218] Allow log analyzer to take a specified start marker (#1135) Signed-off-by: Wenda Ni --- .../test/files/tools/loganalyzer/loganalyzer.py | 16 ++++++++++++---- .../tools/loganalyzer/loganalyzer_analyze.yml | 4 ++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py b/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py index 8875e874b47..91967641f00 100644 --- a/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py +++ b/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py @@ -79,9 +79,10 @@ def init_sys_logger(self): return logger #--------------------------------------------------------------------- - def __init__(self, run_id, verbose): + def __init__(self, run_id, verbose, start_marker = None): self.run_id = run_id self.verbose = verbose + self.start_marker = start_marker #--------------------------------------------------------------------- def print_diagnostic_message(self, message): @@ -92,7 +93,10 @@ def print_diagnostic_message(self, message): #--------------------------------------------------------------------- def create_start_marker(self): - return self.start_marker_prefix + "-" + self.run_id + if (self.start_marker is None) or (len(self.start_marker) == 0): + return self.start_marker_prefix + "-" + self.run_id + else: + return self.start_marker #--------------------------------------------------------------------- @@ -559,6 +563,7 @@ def main(argv): action = None run_id = None + start_marker = None log_files_in = "" out_dir = None match_files_in = None @@ -567,7 +572,7 @@ def main(argv): verbose = False try: - opts, args = getopt.getopt(argv, "a:r:l:o:m:i:e:vh", ["action=", "run_id=", "logs=", "out_dir=", "match_files_in=", "ignore_files_in=", "expect_files_in=", "verbose", "help"]) + opts, args = getopt.getopt(argv, "a:r:s:l:o:m:i:e:vh", ["action=", "run_id=", "start_marker=", "logs=", "out_dir=", "match_files_in=", "ignore_files_in=", "expect_files_in=", "verbose", "help"]) except getopt.GetoptError: print "Invalid option specified" @@ -585,6 +590,9 @@ def main(argv): elif (opt in ("-r", "--run_id")): run_id = arg + elif (opt in ("-s", "--start_marker")): + start_marker = arg + elif (opt in ("-l", "--logs")): log_files_in = arg @@ -607,7 +615,7 @@ def main(argv): usage() sys.exit(err_invalid_input) - analyzer = LogAnalyzer(run_id, verbose) + analyzer = LogAnalyzer(run_id, verbose, start_marker) log_file_list = filter(None, log_files_in.split(tokenizer)) diff --git a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml index a52293e033f..924ce88ec92 100644 --- a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml +++ b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml @@ -46,7 +46,7 @@ extract_log: directory: '/var/log' file_prefix: 'syslog' - start_string: 'start-LogAnalyzer-{{ testname_unique }}' + start_string: "{% if start_marker is defined %}{{ start_marker }}{% else %}start-LogAnalyzer-{{ testname_unique }}{% endif %}" target_filename: "/tmp/syslog" become: yes @@ -55,7 +55,7 @@ shell: sed -i 's/^#//g' /etc/cron.d/logrotate become: yes -- set_fact: cmd="python {{ run_dir }}/loganalyzer.py --action analyze --logs {{ tmp_log_file }} --run_id {{ testname_unique }} --out_dir {{ test_out_dir }} {{ match_file_option }} {{ ignore_file_option }} {{ expect_file_option }} -v" +- set_fact: cmd="python {{ run_dir }}/loganalyzer.py --action analyze --logs {{ tmp_log_file }} --run_id {{ testname_unique }} {% if start_marker is defined %}--start_marker '{{ start_marker }}'{% endif %} --out_dir {{ test_out_dir }} {{ match_file_option }} {{ ignore_file_option }} {{ expect_file_option }} -v" - debug: msg={{cmd}} From ff29dced9e28c4f14fdb54f99d12dc9e250318dd Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Fri, 4 Oct 2019 09:39:18 -0700 Subject: [PATCH 127/218] Fine control PFC storm stop at Arista & Mellanox fanout (#1134) * Fine control PFC storm stop at Arista fanout to allow per-queue per-port granularity Signed-off-by: Wenda Ni * [mlnx] per port per queue storm stop on mellanox fanout Signed-off-by: Stepan Blyschak --- ansible/roles/test/templates/pfc_storm_stop_arista.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/templates/pfc_storm_stop_arista.j2 b/ansible/roles/test/templates/pfc_storm_stop_arista.j2 index a483cf255df..36fc50a43de 100644 --- a/ansible/roles/test/templates/pfc_storm_stop_arista.j2 +++ b/ansible/roles/test/templates/pfc_storm_stop_arista.j2 @@ -1,5 +1,5 @@ bash cd /mnt/flash -sudo pkill -f {{pfc_gen_file}} +sudo pkill -f "sudo python {{pfc_gen_file}} -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("Ethernet", "et") | replace("/", "_")}} -r {{ansible_eth0_ipv4_addr}}" exit exit From acd54a6ae5213941710bb714c2e2fd6f9d1b2de4 Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Mon, 14 Oct 2019 15:03:23 -0700 Subject: [PATCH 128/218] pfc_gen: add the flexiblity to defer storm start and stop at Arista fanout (#1139) * Fine control PFC storm stop at Arista fanout to allow per-queue per-port granularity Signed-off-by: Wenda Ni * Add the flexiblity to defer storm start and stop at fanout Signed-off-by: Wenda Ni * Address comment: not send the command to background if stop_defer_time is not defined Signed-off-by: Wenda Ni --- ansible/roles/test/templates/pfc_storm_arista.j2 | 4 ++-- ansible/roles/test/templates/pfc_storm_stop_arista.j2 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/test/templates/pfc_storm_arista.j2 b/ansible/roles/test/templates/pfc_storm_arista.j2 index 77c914136ed..f79561c7456 100644 --- a/ansible/roles/test/templates/pfc_storm_arista.j2 +++ b/ansible/roles/test/templates/pfc_storm_arista.j2 @@ -1,9 +1,9 @@ bash cd /mnt/flash {% if (pfc_asym is defined) and (pfc_asym == True) %} -sudo python {{pfc_gen_file}} -p {{pfc_queue_index}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("Ethernet", "et") | replace("/", "_")}} & +{% if pfc_storm_defer_time is defined %} sleep {{pfc_storm_defer_time}} &&{% endif %} sudo python {{pfc_gen_file}} -p {{pfc_queue_index}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("Ethernet", "et") | replace("/", "_")}} & {% else %} -sudo python {{pfc_gen_file}} -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("Ethernet", "et") | replace("/", "_")}} -r {{ansible_eth0_ipv4_addr}} & +{% if pfc_storm_defer_time is defined %} sleep {{pfc_storm_defer_time}} &&{% endif %} sudo python {{pfc_gen_file}} -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("Ethernet", "et") | replace("/", "_")}} -r {{ansible_eth0_ipv4_addr}} & {% endif %} exit exit diff --git a/ansible/roles/test/templates/pfc_storm_stop_arista.j2 b/ansible/roles/test/templates/pfc_storm_stop_arista.j2 index 36fc50a43de..cb68b2723d6 100644 --- a/ansible/roles/test/templates/pfc_storm_stop_arista.j2 +++ b/ansible/roles/test/templates/pfc_storm_stop_arista.j2 @@ -1,5 +1,5 @@ bash cd /mnt/flash -sudo pkill -f "sudo python {{pfc_gen_file}} -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("Ethernet", "et") | replace("/", "_")}} -r {{ansible_eth0_ipv4_addr}}" +{% if pfc_storm_stop_defer_time is defined %} sleep {{pfc_storm_stop_defer_time}} &&{% endif %} sudo pkill -f "sudo python {{pfc_gen_file}} -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("Ethernet", "et") | replace("/", "_")}} -r {{ansible_eth0_ipv4_addr}}" {% if pfc_storm_stop_defer_time is defined %}&{% endif %} exit exit From cf25a7045d2b622b6590423d051865124c43aa97 Mon Sep 17 00:00:00 2001 From: lguohan Date: Mon, 14 Oct 2019 16:27:24 -0700 Subject: [PATCH 129/218] Merge pull request #1143 from wendani/qos_sai_master1 QoS SAI test update --- .../test/files/saitests/sai_qos_tests.py | 102 +++--- ansible/roles/test/files/saitests/switch.py | 26 ++ ansible/roles/test/tasks/qos_sai.yml | 27 +- ansible/vars/qos.yml | 304 +++++++++++++++++- 4 files changed, 385 insertions(+), 74 deletions(-) diff --git a/ansible/roles/test/files/saitests/sai_qos_tests.py b/ansible/roles/test/files/saitests/sai_qos_tests.py index 6e7eb0dbb16..7a5ebc6290b 100644 --- a/ansible/roles/test/files/saitests/sai_qos_tests.py +++ b/ansible/roles/test/files/saitests/sai_qos_tests.py @@ -24,7 +24,9 @@ port_list, sai_thrift_read_port_watermarks, sai_thrift_read_pg_counters, - sai_thrift_read_buffer_pool_watermark) + sai_thrift_read_buffer_pool_watermark, + sai_thrift_port_tx_disable, + sai_thrift_port_tx_enable) from switch_sai_thrift.ttypes import (sai_thrift_attribute_value_t, sai_thrift_attribute_t) from switch_sai_thrift.sai_headers import (SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, @@ -1516,7 +1518,7 @@ def runTest(self): send_packet(self, src_port_id, pkt, pkts_num) time.sleep(8) q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[src_port_id]) - print >> sys.stderr, "lower bound: %d, actual value: %d, upper bound: %d" % (expected_wm * cell_size, pg_shared_wm_res[pg], (expected_wm + margin) * cell_size) + print >> sys.stderr, "lower bound: %d, actual value: %d, upper bound (+%d): %d" % (expected_wm * cell_size, pg_shared_wm_res[pg], margin, (expected_wm + margin) * cell_size) assert(pg_shared_wm_res[pg] <= (expected_wm + margin) * cell_size) assert(expected_wm * cell_size <= pg_shared_wm_res[pg]) @@ -1688,7 +1690,11 @@ def runTest(self): # Add slight tolerance in threshold characterization to consider # the case that cpu puts packets in the egress queue after we pause the egress # or the leak out is simply less than expected as we have occasionally observed - margin = 0 + # + # On TH2 using scheduler-based TX enable, we find the Q min being inflated + # to have 0x10 = 16 cells. This effect is captured in lossy traffic queue + # shared test, so the margin here actually means extra capacity margin + margin = 8 if asic_type == 'mellanox': # Close DST port @@ -1706,11 +1712,16 @@ def runTest(self): try: # send packets to fill queue min but not trek into shared pool # so if queue min is zero, it will directly trek into shared pool by 1 + # TH2 uses scheduler-based TX enable, this does not require sending packets + # to leak out send_packet(self, src_port_id, pkt, pkts_num_leak_out + pkts_num_fill_min) time.sleep(8) q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[dst_port_id]) print >> sys.stderr, "Init pkts num sent: %d, min: %d, actual watermark value to start: %d" % ((pkts_num_leak_out + pkts_num_fill_min), pkts_num_fill_min, q_wm_res[queue]) - assert(q_wm_res[queue] == (0 if pkts_num_fill_min else (1 * cell_size))) + if pkts_num_fill_min: + assert(q_wm_res[queue] == 0) + else: + assert(q_wm_res[queue] <= 1 * cell_size) # send packet batch of fixed packet numbers to fill queue shared # first round sends only 1 packet @@ -1728,9 +1739,9 @@ def runTest(self): send_packet(self, src_port_id, pkt, pkts_num) time.sleep(8) q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[dst_port_id]) - print >> sys.stderr, "lower bound: %d, actual value: %d, upper bound: %d" % ((expected_wm - margin) * cell_size, q_wm_res[queue], (expected_wm * cell_size)) + print >> sys.stderr, "lower bound: %d, actual value: %d, upper bound: %d" % (expected_wm * cell_size, q_wm_res[queue], (expected_wm * cell_size)) assert(q_wm_res[queue] <= expected_wm * cell_size) - assert((expected_wm - margin) * cell_size <= q_wm_res[queue]) + assert(expected_wm * cell_size <= q_wm_res[queue]) pkts_num = pkts_inc @@ -1738,9 +1749,10 @@ def runTest(self): send_packet(self, src_port_id, pkt, pkts_num) time.sleep(8) q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[dst_port_id]) - print >> sys.stderr, "exceeded pkts num sent: %d, actual value: %d, expected watermark: %d" % (pkts_num, q_wm_res[queue], (expected_wm * cell_size)) + print >> sys.stderr, "exceeded pkts num sent: %d, expected watermark: %d, actual value: %d" % (pkts_num, (expected_wm * cell_size), q_wm_res[queue]) assert(expected_wm == total_shared) - assert(q_wm_res[queue] == expected_wm * cell_size) + assert(expected_wm * cell_size <= q_wm_res[queue]) + assert(q_wm_res[queue] <= (expected_wm + margin) * cell_size) finally: if asic_type == 'mellanox': @@ -1803,20 +1815,20 @@ def runTest(self): # Add slight tolerance in threshold characterization to consider # the case that cpu puts packets in the egress queue after we pause the egress # or the leak out is simply less than expected as we have occasionally observed - margin = 2 - - if asic_type == 'mellanox': - # Close DST port - sched_prof_id = sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) - else: - # Pause egress of dut xmit port - attr_value = sai_thrift_attribute_value_t(booldata=0) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) - + upper_bound_margin = 2 + # On TD2, we found the watermark value is always short of the expected + # value by 1 + lower_bound_margin = 1 + # On TH2 using scheduler-based TX enable, we find the Q min being inflated + # to have 0x10 = 16 cells. This effect is captured in lossy traffic ingress + # buffer pool test and lossy traffic egress buffer pool test to illusively + # have extra capacity in the buffer pool space + extra_cap_margin = 8 + + # Adjust the methodology to enable TX for each incremental watermark value test + # To this end, send the total # of packets instead of the incremental amount + # to refill the buffer to the exepected level + pkts_num_to_send = 0 # send packets try: # send packets to fill min but not trek into shared pool @@ -1825,27 +1837,31 @@ def runTest(self): # Because lossy and lossless traffic use the same pool at ingress, even if # lossless traffic has pg min not equal to zero, we still need to consider # the impact caused by lossy traffic - send_packet(self, src_port_id, pkt, pkts_num_leak_out + pkts_num_fill_min) + # + # TH2 uses scheduler-based TX enable, this does not require sending packets to leak out + sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) + pkts_num_to_send += (pkts_num_leak_out + pkts_num_fill_min) + send_packet(self, src_port_id, pkt, pkts_num_to_send) + sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) time.sleep(8) buffer_pool_wm = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) print >> sys.stderr, "Init pkts num sent: %d, min: %d, actual watermark value to start: %d" % ((pkts_num_leak_out + pkts_num_fill_min), pkts_num_fill_min, buffer_pool_wm) if pkts_num_fill_min: - assert(buffer_pool_wm <= margin * cell_size) + assert(buffer_pool_wm <= upper_bound_margin * cell_size) else: # on t1-lag, we found vm will keep sending control # packets, this will cause the watermark to be 2 * 208 bytes # as all lossy packets are now mapped to single pg 0 # so we remove the strict equity check, and use upper bound # check instead - assert(1 * cell_size <= buffer_pool_wm) - assert(buffer_pool_wm <= margin * cell_size) + assert(buffer_pool_wm <= upper_bound_margin * cell_size) # send packet batch of fixed packet numbers to fill shared # first round sends only 1 packet expected_wm = 0 total_shared = pkts_num_fill_shared - pkts_num_fill_min pkts_inc = total_shared >> 2 - pkts_num = 1 + margin + pkts_num = 1 + upper_bound_margin while (expected_wm < total_shared): expected_wm += pkts_num if (expected_wm > total_shared): @@ -1853,33 +1869,29 @@ def runTest(self): expected_wm = total_shared print >> sys.stderr, "pkts num to send: %d, total pkts: %d, shared: %d" % (pkts_num, expected_wm, total_shared) - send_packet(self, src_port_id, pkt, pkts_num) + sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) + pkts_num_to_send += pkts_num + send_packet(self, src_port_id, pkt, pkts_num_to_send) + sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) time.sleep(8) buffer_pool_wm = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) - print >> sys.stderr, "lower bound: %d, actual value: %d, upper bound: %d" % (expected_wm * cell_size, buffer_pool_wm, (expected_wm + margin) * cell_size) - assert(buffer_pool_wm <= (expected_wm + margin) * cell_size) - assert(expected_wm * cell_size <= buffer_pool_wm) + print >> sys.stderr, "lower bound (-%d): %d, actual value: %d, upper bound (+%d): %d" % (lower_bound_margin, (expected_wm - lower_bound_margin)* cell_size, buffer_pool_wm, upper_bound_margin, (expected_wm + upper_bound_margin) * cell_size) + assert(buffer_pool_wm <= (expected_wm + upper_bound_margin) * cell_size) + assert((expected_wm - lower_bound_margin)* cell_size <= buffer_pool_wm) pkts_num = pkts_inc # overflow the shared pool - send_packet(self, src_port_id, pkt, pkts_num) + sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) + pkts_num_to_send += pkts_num + send_packet(self, src_port_id, pkt, pkts_num_to_send) + sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) time.sleep(8) buffer_pool_wm = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) print >> sys.stderr, "exceeded pkts num sent: %d, expected watermark: %d, actual value: %d" % (pkts_num, (expected_wm * cell_size), buffer_pool_wm) assert(expected_wm == total_shared) - assert(expected_wm * cell_size <= buffer_pool_wm) - assert(buffer_pool_wm <= (expected_wm + margin) * cell_size) + assert((expected_wm - lower_bound_margin)* cell_size <= buffer_pool_wm) + assert(buffer_pool_wm <= (expected_wm + extra_cap_margin) * cell_size) finally: - if asic_type == 'mellanox': - # Release port - sched_prof_id = sai_thrift_create_scheduler_profile(self.client,RELEASE_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id],attr) - else: - # Resume egress of dut xmit port - attr_value = sai_thrift_attribute_value_t(booldata=1) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) diff --git a/ansible/roles/test/files/saitests/switch.py b/ansible/roles/test/files/saitests/switch.py index e81cb616795..26b3851743c 100644 --- a/ansible/roles/test/files/saitests/switch.py +++ b/ansible/roles/test/files/saitests/switch.py @@ -617,6 +617,32 @@ def sai_thrift_clear_all_counters(client): for queue in queue_list: client.sai_thrift_clear_queue_stats(queue,cnt_ids,len(cnt_ids)) +def sai_thrift_port_tx_disable(client, asic_type, port_id): + if asic_type == 'mellanox': + # Close DST port + sched_prof_id = sai_thrift_create_scheduler_profile(client, STOP_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + client.sai_thrift_set_port_attribute(port_list[port_id], attr) + else: + # Pause egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=0) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + client.sai_thrift_set_port_attribute(port_list[port_id], attr) + +def sai_thrift_port_tx_enable(client, asic_type, port_id): + if asic_type == 'mellanox': + # Release port + sched_prof_id = sai_thrift_create_scheduler_profile(client, RELEASE_PORT_MAX_RATE) + attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) + client.sai_thrift_set_port_attribute(port_list[port_id], attr) + else: + # Resume egress of dut xmit port + attr_value = sai_thrift_attribute_value_t(booldata=1) + attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + client.sai_thrift_set_port_attribute(port_list[port_id], attr) + def sai_thrift_read_port_counters(client,port): port_cnt_ids=[] port_cnt_ids.append(SAI_PORT_STAT_IF_OUT_DISCARDS) diff --git a/ansible/roles/test/tasks/qos_sai.yml b/ansible/roles/test/tasks/qos_sai.yml index c0e6fe42a69..52d3b2d8b57 100644 --- a/ansible/roles/test/tasks/qos_sai.yml +++ b/ansible/roles/test/tasks/qos_sai.yml @@ -66,7 +66,8 @@ when: minigraph_hwsku is defined and (minigraph_hwsku in mellanox_hwskus or minigraph_hwsku == 'Arista-7050-QX-32S' or minigraph_hwsku == 'Arista-7060CX-32S-C32' or minigraph_hwsku == 'Celestica-DX010-C32' - or minigraph_hwsku == 'Arista-7260CX3-D108C8' or minigraph_hwsku == 'Force10-S6100') + or minigraph_hwsku == 'Arista-7260CX3-D108C8' or minigraph_hwsku == 'Force10-S6100' + or minigraph_hwsku == 'Arista-7260CX3-Q64') - name: Init PTF base test parameters set_fact: @@ -285,8 +286,12 @@ - pkts_num_fill_min='{{qp.wm_pg_shared_lossless.pkts_num_fill_min}}' - pkts_num_fill_shared='{{qp.wm_pg_shared_lossless.pkts_num_trig_pfc}}' - cell_size='{{qp.wm_pg_shared_lossless.cell_size}}' + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7260CX3-Q64' and minigraph_hwsku != 'Arista-7260CX3-D108C8') - debug: var: out.stdout_lines + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7260CX3-Q64' and minigraph_hwsku != 'Arista-7260CX3-D108C8') # Clear all watermarks before each watermark test # because of the clear on read polling mode @@ -310,8 +315,12 @@ - pkts_num_fill_min='{{qp.wm_pg_shared_lossy.pkts_num_fill_min}}' - pkts_num_fill_shared='{{qp.wm_pg_shared_lossy.pkts_num_trig_egr_drp|int - 1}}' - cell_size='{{qp.wm_pg_shared_lossy.cell_size}}' + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7260CX3-Q64' and minigraph_hwsku != 'Arista-7260CX3-D108C8') - debug: var: out.stdout_lines + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7260CX3-Q64' and minigraph_hwsku != 'Arista-7260CX3-D108C8') # Clear all watermarks before each watermark test # because of the clear on read polling mode @@ -413,11 +422,11 @@ - cell_size='{{qp.wm_buf_pool_lossless.cell_size}}' - buf_pool_roid='{{lossless_ingr_buf_pool_roid}}' when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7050-Qx-32S') + (minigraph_hwsku != 'Arista-7050-QX-32S') - debug: var: out.stdout_lines when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7050-Qx-32S') + (minigraph_hwsku != 'Arista-7050-QX-32S') # Clear all watermarks before each watermark test # because of the clear on read polling mode @@ -443,12 +452,8 @@ - pkts_num_fill_shared='{{qp.wm_buf_pool_lossless.pkts_num_trig_ingr_drp|int - 1}}' - cell_size='{{qp.wm_buf_pool_lossless.cell_size}}' - buf_pool_roid='{{lossless_egr_buf_pool_roid}}' - when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7050-Qx-32S') - debug: var: out.stdout_lines - when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7050-Qx-32S') # Clear all watermarks before each watermark test # because of the clear on read polling mode @@ -475,11 +480,11 @@ - cell_size='{{qp.wm_buf_pool_lossy.cell_size}}' - buf_pool_roid='{{lossy_ingr_buf_pool_roid}}' when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7050-Qx-32S') + (minigraph_hwsku != 'Arista-7050-QX-32S') - debug: var: out.stdout_lines when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7050-Qx-32S') + (minigraph_hwsku != 'Arista-7050-QX-32S') # Clear all watermarks before each watermark test # because of the clear on read polling mode @@ -505,12 +510,8 @@ - pkts_num_fill_shared='{{qp.wm_buf_pool_lossy.pkts_num_trig_egr_drp|int - 1}}' - cell_size='{{qp.wm_buf_pool_lossy.cell_size}}' - buf_pool_roid='{{lossy_egr_buf_pool_roid}}' - when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7050-Qx-32S') - debug: var: out.stdout_lines - when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7050-Qx-32S') # DSCP to pg mapping - include: qos_sai_ptf.yml diff --git a/ansible/vars/qos.yml b/ansible/vars/qos.yml index 201411e9cc3..191cdfd09ab 100644 --- a/ansible/vars/qos.yml +++ b/ansible/vars/qos.yml @@ -274,6 +274,19 @@ qos_params: q6_num_of_pkts: 140 limit: 80 pkts_num_leak_out: 48 + wrr_chg: + ecn: 1 + q0_num_of_pkts: 80 + q1_num_of_pkts: 80 + q2_num_of_pkts: 80 + q3_num_of_pkts: 300 + q4_num_of_pkts: 300 + q5_num_of_pkts: 80 + q6_num_of_pkts: 80 + limit: 80 + pkts_num_leak_out: 48 + lossy_weight: 8 + lossless_weight: 30 wm_pg_shared_lossless: dscp: 3 ecn: 1 @@ -314,6 +327,27 @@ qos_params: pkts_num_fill_min: 8 pkts_num_trig_egr_drp: 31322 cell_size: 208 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_leak_out: 48 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + pkts_num_fill_egr_min: 0 + cell_size: 208 + wm_buf_pool_lossy: + dscp: 8 + ecn: 1 + pg: 0 + queue: 0 + pkts_num_leak_out: 48 + pkts_num_fill_ingr_min: 0 + pkts_num_trig_egr_drp: 31322 + pkts_num_fill_egr_min: 8 + cell_size: 208 Force10-S6100: xoff_1: dscp: 3 @@ -643,31 +677,194 @@ qos_params: pkts_num_trig_egr_drp: 9887 pkts_num_fill_egr_min: 8 cell_size: 208 + Arista-7260CX3-Q64: + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 4728 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 4728 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 4457 + pkts_num_dismiss_pfc: 12 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 4457 + pkts_num_dismiss_pfc: 12 + ecn_1: + dscp: 8 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 208 + ecn_2: + dscp: 8 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 208 + ecn_3: + dscp: 0 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 208 + ecn_4: + dscp: 0 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 208 + lossy_queue_1: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_leak_out: 8 + pkts_num_trig_egr_drp: 10692 + wrr: + ecn: 1 + q0_num_of_pkts: 140 + q1_num_of_pkts: 140 + q2_num_of_pkts: 140 + q3_num_of_pkts: 150 + q4_num_of_pkts: 150 + q5_num_of_pkts: 140 + q6_num_of_pkts: 140 + limit: 80 + pkts_num_leak_out: 0 + wrr_chg: + ecn: 1 + q0_num_of_pkts: 80 + q1_num_of_pkts: 80 + q2_num_of_pkts: 80 + q3_num_of_pkts: 300 + q4_num_of_pkts: 300 + q5_num_of_pkts: 80 + q6_num_of_pkts: 80 + limit: 80 + pkts_num_leak_out: 0 + lossy_weight: 8 + lossless_weight: 30 + hdrm_pool_size: + dscps: [3, 4] + ecn: 1 + pgs: [3, 4] + src_port_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 56, 57, 58, 59, 60, 61, 62] + dst_port_id: 0 + pgs_num: 35 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 937 + pkts_num_hdrm_full: 270 + pkts_num_hdrm_partial: 228 + wm_pg_shared_lossless: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 0 + pkts_num_fill_min: 6 + pkts_num_trig_pfc: 4457 + cell_size: 208 + wm_pg_shared_lossy: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_leak_out: 0 + pkts_num_fill_min: 0 + pkts_num_trig_egr_drp: 10692 + cell_size: 208 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 4728 + cell_size: 208 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_leak_out: 0 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 4728 + cell_size: 208 + wm_q_shared_lossy: + dscp: 8 + ecn: 1 + queue: 0 + pkts_num_leak_out: 0 + pkts_num_fill_min: 8 + pkts_num_trig_egr_drp: 10692 + cell_size: 208 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_leak_out: 0 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 4728 + pkts_num_fill_egr_min: 16 + cell_size: 208 + wm_buf_pool_lossy: + dscp: 8 + ecn: 1 + pg: 0 + queue: 0 + pkts_num_leak_out: 0 + pkts_num_fill_ingr_min: 0 + pkts_num_trig_egr_drp: 10692 + pkts_num_fill_egr_min: 16 + cell_size: 208 Arista-7260CX3-D108C8: xoff_1: dscp: 3 ecn: 1 pg: 3 - pkts_num_leak_out: 22 - pkts_num_trig_pfc: 4386 - pkts_num_trig_ingr_drp: 4657 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 4728 xoff_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_leak_out: 31 - pkts_num_trig_pfc: 4386 - pkts_num_trig_ingr_drp: 4853 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 4728 xon_1: dscp: 3 ecn: 1 pg: 3 - pkts_num_leak_out: 22 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 4457 + pkts_num_dismiss_pfc: 12 xon_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_leak_out: 22 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 4457 + pkts_num_dismiss_pfc: 12 ecn_1: dscp: 8 ecn: 0 @@ -696,17 +893,92 @@ qos_params: limit: 182320 min_limit: 0 cell_size: 208 - lossy_queue: + lossy_queue_1: dscp: 8 ecn: 1 pg: 1 - pkts_num_leak_out: 22 - pkts_num_trig_egr_drp: 10522 + pkts_num_leak_out: 0 + pkts_num_trig_egr_drp: 10692 wrr: ecn: 1 - q0_num_of_pkts: 600 - q1_num_of_pkts: 400 - q3_num_of_pkts: 500 - q4_num_of_pkts: 500 + q0_num_of_pkts: 140 + q1_num_of_pkts: 140 + q2_num_of_pkts: 140 + q3_num_of_pkts: 150 + q4_num_of_pkts: 150 + q5_num_of_pkts: 140 + q6_num_of_pkts: 140 limit: 80 - pkts_num_leak_out: 22 + pkts_num_leak_out: 0 + hdrm_pool_size: + dscps: [3, 4] + ecn: 1 + pgs: [3, 4] + src_port_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 56, 57, 58, 59, 60, 61, 62] + dst_port_id: 0 + pgs_num: 35 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 937 + pkts_num_hdrm_full: 270 + pkts_num_hdrm_partial: 228 + wm_pg_shared_lossless: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 0 + pkts_num_fill_min: 6 + pkts_num_trig_pfc: 4457 + cell_size: 208 + wm_pg_shared_lossy: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_leak_out: 0 + pkts_num_fill_min: 0 + pkts_num_trig_egr_drp: 10692 + cell_size: 208 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 0 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 4728 + cell_size: 208 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_leak_out: 0 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 4728 + cell_size: 208 + wm_q_shared_lossy: + dscp: 8 + ecn: 1 + queue: 0 + pkts_num_leak_out: 0 + pkts_num_fill_min: 8 + pkts_num_trig_egr_drp: 10692 + cell_size: 208 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_leak_out: 0 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 4728 + pkts_num_fill_egr_min: 16 + cell_size: 208 + wm_buf_pool_lossy: + dscp: 8 + ecn: 1 + pg: 0 + queue: 0 + pkts_num_leak_out: 0 + pkts_num_fill_ingr_min: 0 + pkts_num_trig_egr_drp: 10692 + pkts_num_fill_egr_min: 16 + cell_size: 208 From 69975bb6f0ca967e3367804609c041731a80df73 Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Tue, 15 Oct 2019 06:36:12 -0700 Subject: [PATCH 130/218] PG headroom change for th2 (#1156) Signed-off-by: Wenda Ni --- ansible/vars/qos.yml | 50 ++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/ansible/vars/qos.yml b/ansible/vars/qos.yml index 191cdfd09ab..f616176f14f 100644 --- a/ansible/vars/qos.yml +++ b/ansible/vars/qos.yml @@ -260,7 +260,7 @@ qos_params: lossy_queue_1: dscp: 8 ecn: 1 - pg: 1 + pg: 0 pkts_num_leak_out: 48 pkts_num_trig_egr_drp: 31322 wrr: @@ -408,7 +408,7 @@ qos_params: lossy_queue_1: dscp: 8 ecn: 1 - pg: 1 + pg: 0 pkts_num_leak_out: 19 pkts_num_trig_egr_drp: 9887 wrr: @@ -684,14 +684,14 @@ qos_params: pg: 3 pkts_num_leak_out: 0 pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 4728 + pkts_num_trig_ingr_drp: 4978 xoff_2: dscp: 4 ecn: 1 pg: 4 pkts_num_leak_out: 0 pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 4728 + pkts_num_trig_ingr_drp: 4978 xon_1: dscp: 3 ecn: 1 @@ -738,7 +738,7 @@ qos_params: dscp: 8 ecn: 1 pg: 0 - pkts_num_leak_out: 8 + pkts_num_leak_out: 0 pkts_num_trig_egr_drp: 10692 wrr: ecn: 1 @@ -768,13 +768,13 @@ qos_params: dscps: [3, 4] ecn: 1 pgs: [3, 4] - src_port_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 56, 57, 58, 59, 60, 61, 62] - dst_port_id: 0 - pgs_num: 35 + src_port_ids: [7, 8, 9, 10, 11, 38, 39, 40, 41, 42] + dst_port_id: 6 + pgs_num: 19 pkts_num_leak_out: 0 - pkts_num_trig_pfc: 937 - pkts_num_hdrm_full: 270 - pkts_num_hdrm_partial: 228 + pkts_num_trig_pfc: 1489 + pkts_num_hdrm_full: 520 + pkts_num_hdrm_partial: 48 wm_pg_shared_lossless: dscp: 3 ecn: 1 @@ -797,7 +797,7 @@ qos_params: pg: 3 pkts_num_leak_out: 0 pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 4728 + pkts_num_trig_ingr_drp: 4978 cell_size: 208 wm_q_shared_lossless: dscp: 3 @@ -805,7 +805,7 @@ qos_params: queue: 3 pkts_num_leak_out: 0 pkts_num_fill_min: 0 - pkts_num_trig_ingr_drp: 4728 + pkts_num_trig_ingr_drp: 4978 cell_size: 208 wm_q_shared_lossy: dscp: 8 @@ -823,7 +823,7 @@ qos_params: pkts_num_leak_out: 0 pkts_num_fill_ingr_min: 6 pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 4728 + pkts_num_trig_ingr_drp: 4978 pkts_num_fill_egr_min: 16 cell_size: 208 wm_buf_pool_lossy: @@ -843,14 +843,14 @@ qos_params: pg: 3 pkts_num_leak_out: 0 pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 4728 + pkts_num_trig_ingr_drp: 5140 xoff_2: dscp: 4 ecn: 1 pg: 4 pkts_num_leak_out: 0 pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 4728 + pkts_num_trig_ingr_drp: 5140 xon_1: dscp: 3 ecn: 1 @@ -896,7 +896,7 @@ qos_params: lossy_queue_1: dscp: 8 ecn: 1 - pg: 1 + pg: 0 pkts_num_leak_out: 0 pkts_num_trig_egr_drp: 10692 wrr: @@ -914,13 +914,13 @@ qos_params: dscps: [3, 4] ecn: 1 pgs: [3, 4] - src_port_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 56, 57, 58, 59, 60, 61, 62] + src_port_ids: [1, 2, 3, 4, 5, 6, 7] dst_port_id: 0 - pgs_num: 35 + pgs_num: 14 pkts_num_leak_out: 0 - pkts_num_trig_pfc: 937 - pkts_num_hdrm_full: 270 - pkts_num_hdrm_partial: 228 + pkts_num_trig_pfc: 1826 + pkts_num_hdrm_full: 682 + pkts_num_hdrm_partial: 542 wm_pg_shared_lossless: dscp: 3 ecn: 1 @@ -943,7 +943,7 @@ qos_params: pg: 3 pkts_num_leak_out: 0 pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 4728 + pkts_num_trig_ingr_drp: 5140 cell_size: 208 wm_q_shared_lossless: dscp: 3 @@ -951,7 +951,7 @@ qos_params: queue: 3 pkts_num_leak_out: 0 pkts_num_fill_min: 0 - pkts_num_trig_ingr_drp: 4728 + pkts_num_trig_ingr_drp: 5140 cell_size: 208 wm_q_shared_lossy: dscp: 8 @@ -969,7 +969,7 @@ qos_params: pkts_num_leak_out: 0 pkts_num_fill_ingr_min: 6 pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 4728 + pkts_num_trig_ingr_drp: 5140 pkts_num_fill_egr_min: 16 cell_size: 208 wm_buf_pool_lossy: From 2c64632b2559c443337a882a5402aedd377b9b55 Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Wed, 16 Oct 2019 15:01:41 -0700 Subject: [PATCH 131/218] qos sai: refactor tx enable/disable (#1162) Signed-off-by: Wenda Ni --- .../test/files/saitests/sai_qos_tests.py | 166 ++---------------- 1 file changed, 14 insertions(+), 152 deletions(-) diff --git a/ansible/roles/test/files/saitests/sai_qos_tests.py b/ansible/roles/test/files/saitests/sai_qos_tests.py index 7a5ebc6290b..07486c04666 100644 --- a/ansible/roles/test/files/saitests/sai_qos_tests.py +++ b/ansible/roles/test/files/saitests/sai_qos_tests.py @@ -528,17 +528,7 @@ def runTest(self): # or the leak out is simply less than expected as we have occasionally observed margin = 2 - if asic_type == 'mellanox': - # Close DST port - sched_prof_id = sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) - else: - # Pause egress of dut xmit port - attr_value = sai_thrift_attribute_value_t(booldata=0) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) try: # send packets short of triggering pfc @@ -605,17 +595,7 @@ def runTest(self): assert(xmit_counters[EGRESS_DROP] == xmit_counters_base[EGRESS_DROP]) finally: - if asic_type == 'mellanox': - # Release port - sched_prof_id = sai_thrift_create_scheduler_profile(self.client,RELEASE_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id],attr) - else: - # Resume egress of dut xmit port - attr_value = sai_thrift_attribute_value_t(booldata=1) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) # This test looks to measure xon threshold (pg_reset_floor) class PFCXonTest(sai_base_test.ThriftInterfaceDataPlane): @@ -729,17 +709,7 @@ def runTest(self): assert(xmit_2_counters[EGRESS_DROP] == xmit_2_counters_base[EGRESS_DROP]) assert(xmit_3_counters[EGRESS_DROP] == xmit_3_counters_base[EGRESS_DROP]) - if asic_type == 'mellanox': - # Release dst port 1 - sched_prof_id=sai_thrift_create_scheduler_profile(self.client, RELEASE_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) - else: - # Resume egress of dst port 1 - attr_value = sai_thrift_attribute_value_t(booldata=1) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) + sai_thrift_port_tx_enable(self.client, asic_type, dst_port_2_id) # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) @@ -759,17 +729,7 @@ def runTest(self): assert(xmit_2_counters[EGRESS_DROP] == xmit_2_counters_base[EGRESS_DROP]) assert(xmit_3_counters[EGRESS_DROP] == xmit_3_counters_base[EGRESS_DROP]) - if asic_type == 'mellanox': - # Release dst port 2 - sched_prof_id=sai_thrift_create_scheduler_profile(self.client, RELEASE_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_3_id], attr) - else: - # Resume egress of dst port 2 - attr_value = sai_thrift_attribute_value_t(booldata=1) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_3_id], attr) + sai_thrift_port_tx_enable(self.client, asic_type, dst_port_3_id) # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) @@ -852,17 +812,7 @@ def runTest(self): xmit_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) # Pause egress of dut xmit port - if asic_type == 'mellanox': - # Close DST port - sched_prof_id = sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) - else: - # Pause egress of dut xmit port - attr_value = sai_thrift_attribute_value_t(booldata=0) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) try: # send packets to leak out @@ -976,17 +926,7 @@ def runTest(self): sys.stderr.flush() finally: - if asic_type == 'mellanox': - # Release port - sched_prof_id = sai_thrift_create_scheduler_profile(self.client,RELEASE_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id],attr) - else: - # Resume egress of dur xmit port - attr_value = sai_thrift_attribute_value_t(booldata=1) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) # TODO: remove sai_thrift_clear_all_counters and change to use incremental counter values class DscpEcnSend(sai_base_test.ThriftInterfaceDataPlane): @@ -1144,16 +1084,7 @@ def runTest(self): limit = int(self.test_params['limit']) pkts_num_leak_out = int(self.test_params['pkts_num_leak_out']) - if asic_type == 'mellanox': - # Stop port function - sched_prof_id=sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) - else: - attr_value = sai_thrift_attribute_value_t(booldata=0) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) # Send packets to leak out pkt = simple_tcp_packet(pktlen=64, @@ -1264,16 +1195,7 @@ def runTest(self): p.socket.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 41943040) # Release port - if asic_type == 'mellanox': - sched_prof_id=sai_thrift_create_scheduler_profile(self.client, RELEASE_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) - else: - # Resume egress of dut xmit port - attr_value = sai_thrift_attribute_value_t(booldata=1) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) cnt = 0 pkts = [] @@ -1470,17 +1392,7 @@ def runTest(self): # or the leak out is simply less than expected as we have occasionally observed margin = 2 - if asic_type == 'mellanox': - # Close DST port - sched_prof_id = sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) - else: - # Pause egress of dut xmit port - attr_value = sai_thrift_attribute_value_t(booldata=0) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) # send packets try: @@ -1534,17 +1446,7 @@ def runTest(self): assert(pg_shared_wm_res[pg] <= (expected_wm + margin) * cell_size) finally: - if asic_type == 'mellanox': - # Release port - sched_prof_id = sai_thrift_create_scheduler_profile(self.client,RELEASE_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id],attr) - else: - # Resume egress of dut xmit port - attr_value = sai_thrift_attribute_value_t(booldata=1) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) # pg headroom is a notion for lossless traffic only class PGHeadroomWatermarkTest(sai_base_test.ThriftInterfaceDataPlane): @@ -1588,17 +1490,7 @@ def runTest(self): # or the leak out is simply less than expected as we have occasionally observed margin = 0 - if asic_type == 'mellanox': - # Close DST port - sched_prof_id = sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) - else: - # Pause egress of dut xmit port - attr_value = sai_thrift_attribute_value_t(booldata=0) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) # send packets try: @@ -1639,17 +1531,7 @@ def runTest(self): assert(pg_headroom_wm_res[pg] == expected_wm * cell_size) finally: - if asic_type == 'mellanox': - # Release port - sched_prof_id = sai_thrift_create_scheduler_profile(self.client,RELEASE_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id],attr) - else: - # Resume egress of dut xmit port - attr_value = sai_thrift_attribute_value_t(booldata=1) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) class QSharedWatermarkTest(sai_base_test.ThriftInterfaceDataPlane): def runTest(self): @@ -1696,17 +1578,7 @@ def runTest(self): # shared test, so the margin here actually means extra capacity margin margin = 8 - if asic_type == 'mellanox': - # Close DST port - sched_prof_id = sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) - else: - # Pause egress of dut xmit port - attr_value = sai_thrift_attribute_value_t(booldata=0) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) # send packets try: @@ -1755,17 +1627,7 @@ def runTest(self): assert(q_wm_res[queue] <= (expected_wm + margin) * cell_size) finally: - if asic_type == 'mellanox': - # Release port - sched_prof_id = sai_thrift_create_scheduler_profile(self.client,RELEASE_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id],attr) - else: - # Resume egress of dut xmit port - attr_value = sai_thrift_attribute_value_t(booldata=1) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) # TODO: buffer pool roid should be obtained via rpc calls # based on the pg or queue index From 620d0839ef8a5232f0bb406774a9081978db5dce Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Wed, 16 Oct 2019 16:02:57 -0700 Subject: [PATCH 132/218] qos sai: enable headroom pool test on Arista-7260CX3-Q64 (#1163) * qos sai: enable headroom pool test on Arista-7260CX3-Q64 Signed-off-by: Wenda Ni * Address comment Signed-off-by: Wenda Ni --- ansible/roles/test/tasks/qos_sai.yml | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/ansible/roles/test/tasks/qos_sai.yml b/ansible/roles/test/tasks/qos_sai.yml index 52d3b2d8b57..6c9d307e773 100644 --- a/ansible/roles/test/tasks/qos_sai.yml +++ b/ansible/roles/test/tasks/qos_sai.yml @@ -64,10 +64,8 @@ copy: src={{ptf_portmap}} dest=/root delegate_to: "{{ptf_host}}" when: minigraph_hwsku is defined and - (minigraph_hwsku in mellanox_hwskus or minigraph_hwsku == 'Arista-7050-QX-32S' - or minigraph_hwsku == 'Arista-7060CX-32S-C32' or minigraph_hwsku == 'Celestica-DX010-C32' - or minigraph_hwsku == 'Arista-7260CX3-D108C8' or minigraph_hwsku == 'Force10-S6100' - or minigraph_hwsku == 'Arista-7260CX3-Q64') + (minigraph_hwsku in mellanox_hwskus or minigraph_hwsku in + ['Arista-7050-QX-32S', 'Arista-7060CX-32S-C32', 'Celestica-DX010-C32', 'Arista-7260CX3-D108C8', 'Force10-S6100', 'Arista-7260CX3-Q64']) - name: Init PTF base test parameters set_fact: @@ -207,8 +205,7 @@ - pkts_num_hdrm_full={{qp.hdrm_pool_size.pkts_num_hdrm_full}} - pkts_num_hdrm_partial={{qp.hdrm_pool_size.pkts_num_hdrm_partial}} when: minigraph_hwsku is defined and - (minigraph_hwsku == 'Arista-7060CX-32S-C32' or minigraph_hwsku == 'Celestica-DX010-C32' or minigraph_hwsku == 'Arista-7260CX3-D108C8' - or minigraph_hwsku == 'Force10-S6100') + minigraph_hwsku in ['Arista-7060CX-32S-C32', 'Celestica-DX010-C32', 'Arista-7260CX3-D108C8', 'Force10-S6100', 'Arista-7260CX3-Q64'] # Lossy queue - include: qos_sai_ptf.yml @@ -287,11 +284,11 @@ - pkts_num_fill_shared='{{qp.wm_pg_shared_lossless.pkts_num_trig_pfc}}' - cell_size='{{qp.wm_pg_shared_lossless.cell_size}}' when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7260CX3-Q64' and minigraph_hwsku != 'Arista-7260CX3-D108C8') + (minigraph_hwsku not in ['Arista-7260CX3-Q64', 'Arista-7260CX3-D108C8']) - debug: var: out.stdout_lines when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7260CX3-Q64' and minigraph_hwsku != 'Arista-7260CX3-D108C8') + (minigraph_hwsku not in ['Arista-7260CX3-Q64', 'Arista-7260CX3-D108C8']) # Clear all watermarks before each watermark test # because of the clear on read polling mode @@ -316,11 +313,11 @@ - pkts_num_fill_shared='{{qp.wm_pg_shared_lossy.pkts_num_trig_egr_drp|int - 1}}' - cell_size='{{qp.wm_pg_shared_lossy.cell_size}}' when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7260CX3-Q64' and minigraph_hwsku != 'Arista-7260CX3-D108C8') + minigraph_hwsku not in ['Arista-7260CX3-Q64', 'Arista-7260CX3-D108C8'] - debug: var: out.stdout_lines when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7260CX3-Q64' and minigraph_hwsku != 'Arista-7260CX3-D108C8') + minigraph_hwsku not in ['Arista-7260CX3-Q64', 'Arista-7260CX3-D108C8'] # Clear all watermarks before each watermark test # because of the clear on read polling mode From 74f970f0ebbbf45ff162ee8f9522d987cabd4310 Mon Sep 17 00:00:00 2001 From: Stephen Sun <5379172+stephenxs@users.noreply.github.com> Date: Mon, 21 Oct 2019 08:03:03 +0800 Subject: [PATCH 133/218] [platform/test_reboot]Only check reboot-cause for power off and watchdog reboot. (#1169) The power off and watchdog reboot share the same working flow with cold reboot except that their reboot causes differ and the motivation of them is to verify whether the reboot cause is correct. In this sense, it is unnecessary to do the full check for them. --- tests/platform/test_reboot.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/platform/test_reboot.py b/tests/platform/test_reboot.py index 01ee03534ae..67c65063fa9 100644 --- a/tests/platform/test_reboot.py +++ b/tests/platform/test_reboot.py @@ -35,22 +35,26 @@ reboot_ctrl_dict = { REBOOT_TYPE_POWEROFF: { "timeout": 300, - "cause": "Power Loss" + "cause": "Power Loss", + "test_reboot_cause_only": True }, REBOOT_TYPE_COLD: { "command": "reboot", "timeout": 300, - "cause": "reboot" + "cause": "reboot", + "test_reboot_cause_only": False }, REBOOT_TYPE_FAST: { "command": "fast-reboot", "timeout": 180, - "cause": "fast-reboot" + "cause": "fast-reboot", + "test_reboot_cause_only": False }, REBOOT_TYPE_WARM: { "command": "warm-reboot", "timeout": 180, - "cause": "warm-reboot" + "cause": "warm-reboot", + "test_reboot_cause_only": False } } @@ -123,6 +127,10 @@ def reboot_and_check(localhost, dut, interfaces, reboot_type=REBOOT_TYPE_COLD, r logging.info("Check reboot cause") check_reboot_cause(dut, reboot_cause) + if reboot_ctrl_dict[reboot_type]["test_reboot_cause_only"]: + logging.info("Further checking skipped for {} test which intends to verify reboot-cause only".format(reboot_type)) + return + logging.info("Wait some time for all the transceivers to be detected") assert wait_until(300, 20, check_interface_information, dut, interfaces), \ "Not all transceivers are detected or interfaces are up in 300 seconds" From 4335daf91dbb3dc8d25aef1da44148e8723d059e Mon Sep 17 00:00:00 2001 From: Neetha John Date: Thu, 17 Oct 2019 12:40:40 -0700 Subject: [PATCH 134/218] [warm-reboot] Routing change (add/del) during warm boot automation (#1152) * Automation for routing change(add/del) during warm-reboot Signed-off-by: Neetha John --- .../test/files/ptftests/advanced-reboot.py | 148 +++++++++++++----- ansible/roles/test/files/ptftests/arista.py | 7 +- ansible/roles/test/files/ptftests/sad_path.py | 146 +++++++++++++++-- ansible/roles/test/tasks/advanced-reboot.yml | 39 ++++- ...preboot_list.yml => validate_sad_list.yml} | 4 + .../roles/test/tasks/ptf_runner_reboot.yml | 30 +++- .../tasks/warm-reboot-multi-sad-inboot.yml | 16 ++ ansible/roles/test/vars/testcases.yml | 7 + 8 files changed, 334 insertions(+), 63 deletions(-) rename ansible/roles/test/tasks/advanced_reboot/{validate_preboot_list.yml => validate_sad_list.yml} (83%) create mode 100644 ansible/roles/test/tasks/warm-reboot-multi-sad-inboot.yml diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index fe93e9e931f..9cd5c982c2e 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -121,7 +121,7 @@ def __init__(self): self.logs_info = {} self.log_lock = threading.RLock() self.vm_handle = None - self.pre_handle = None + self.sad_handle = None self.test_params = testutils.test_params_get() self.check_param('verbose', False, required=False) self.check_param('dut_username', '', required=True) @@ -144,13 +144,23 @@ def __init__(self): self.check_param('dut_stabilize_secs', 30, required=False) self.check_param('preboot_files', None, required = False) self.check_param('preboot_oper', None, required = False) # preboot sad path to inject before warm-reboot + self.check_param('inboot_oper', None, required = False) # sad path to inject during warm-reboot + self.check_param('nexthop_ips', [], required = False) # nexthops for the routes that will be added during warm-reboot self.check_param('allow_vlan_flooding', False, required = False) self.check_param('sniff_time_incr', 60, required = False) if not self.test_params['preboot_oper'] or self.test_params['preboot_oper'] == 'None': self.test_params['preboot_oper'] = None + if not self.test_params['inboot_oper'] or self.test_params['inboot_oper'] == 'None': + self.test_params['inboot_oper'] = None - if self.test_params['preboot_oper'] is not None: - self.log_file_name = '/tmp/%s-%s.log' % (self.test_params['reboot_type'], self.test_params['preboot_oper']) + # initialize sad oper + if self.test_params['preboot_oper']: + self.sad_oper = self.test_params['preboot_oper'] + else: + self.sad_oper = self.test_params['inboot_oper'] + + if self.sad_oper: + self.log_file_name = '/tmp/%s-%s.log' % (self.test_params['reboot_type'], self.sad_oper) else: self.log_file_name = '/tmp/%s.log' % self.test_params['reboot_type'] self.log_fp = open(self.log_file_name, 'w') @@ -298,7 +308,7 @@ def generate_arp_responder_conf(self, vlan_host_map): def dump_arp_responder_config(self, dump): # save data for arp_replay process - filename = "/tmp/from_t1.json" if self.preboot_oper is None else "/tmp/from_t1_%s.json" % self.preboot_oper + filename = "/tmp/from_t1.json" if self.sad_oper is None else "/tmp/from_t1_%s.json" % self.sad_oper with open(filename, "w") as fp: json.dump(dump, fp) @@ -361,36 +371,98 @@ def populate_fail_info(self, fails): self.fails[key] = set() self.fails[key] |= fails[key] - def get_preboot_info(self): + def get_sad_info(self): ''' - Prepares the msg string to log when a preboot_oper is defined. - preboot_oper can be represented in the following ways + Prepares the msg string to log when a sad_oper is defined. Sad oper can be a preboot or inboot oper + sad_oper can be represented in the following ways eg. 'preboot_oper' - a single VM will be selected and preboot_oper will be applied to it 'neigh_bgp_down:2' - 2 VMs will be selected and preboot_oper will be applied to the selected 2 VMs 'neigh_lag_member_down:3:1' - this case is used for lag member down operation only. This indicates that 3 VMs will be selected and 1 of the lag members in the porchannel will be brought down + 'inboot_oper' - represents a routing change during warm boot (add or del of multiple routes) + 'routing_add:10' - adding 10 routes during warm boot ''' msg = '' - if self.preboot_oper: - msg = 'Preboot oper: %s ' % self.preboot_oper - if ':' in self.preboot_oper: - oper_list = self.preboot_oper.split(':') - msg = 'Preboot oper: %s ' % oper_list[0] # extract the preboot oper_type + if self.sad_oper: + msg = 'Sad oper: %s ' % self.sad_oper + if ':' in self.sad_oper: + oper_list = self.sad_oper.split(':') + msg = 'Sad oper: %s ' % oper_list[0] # extract the sad oper_type if len(oper_list) > 2: - # extract the number of VMs and the number of LAG members. preboot_oper will be of the form oper:no of VMS:no of lag members + # extract the number of VMs and the number of LAG members. sad_oper will be of the form oper:no of VMS:no of lag members msg += 'Number of sad path VMs: %s Lag member down in a portchannel: %s' % (oper_list[-2], oper_list[-1]) else: - # extract the number of VMs. preboot_oper will be of the form oper:no of VMS - msg += 'Number of sad path VMs: %s' % oper_list[-1] + # inboot oper + if 'routing' in self.sad_oper: + msg += 'Number of ip addresses: %s' % oper_list[-1] + else: + # extract the number of VMs. preboot_oper will be of the form oper:no of VMS + msg += 'Number of sad path VMs: %s' % oper_list[-1] return msg + def init_sad_oper(self): + if self.sad_oper: + self.log("Preboot/Inboot Operations:") + self.sad_handle = sp.SadTest(self.sad_oper, self.ssh_targets, self.portchannel_ports, self.vm_dut_map, self.test_params, self.dut_ssh, self.vlan_ports) + (self.ssh_targets, self.portchannel_ports, self.neigh_vm, self.vlan_ports), (log_info, fails) = self.sad_handle.setup() + self.populate_fail_info(fails) + for log in log_info: + self.log(log) + + if self.sad_oper: + log_info, fails = self.sad_handle.verify() + self.populate_fail_info(fails) + for log in log_info: + self.log(log) + self.log(" ") + + def do_inboot_oper(self): + ''' + Add or del routes during boot + ''' + if self.sad_oper and 'routing' in self.sad_oper: + self.log("Performing inboot operation") + log_info, fails = self.sad_handle.route_setup() + self.populate_fail_info(fails) + for log in log_info: + self.log(log) + self.log(" ") + + def check_inboot_sad_status(self): + if 'routing_add' in self.sad_oper: + self.log('Verify if new routes added during warm reboot are received') + else: + self.log('Verify that routes deleted during warm reboot are removed') + + log_info, fails = self.sad_handle.verify(pre_check=False, inboot=True) + self.populate_fail_info(fails) + for log in log_info: + self.log(log) + self.log(" ") + + def check_postboot_sad_status(self): + self.log("Postboot checks:") + log_info, fails = self.sad_handle.verify(pre_check=False, inboot=False) + self.populate_fail_info(fails) + for log in log_info: + self.log(log) + self.log(" ") + + def sad_revert(self): + self.log("Revert to preboot state:") + log_info, fails = self.sad_handle.revert() + self.populate_fail_info(fails) + for log in log_info: + self.log(log) + self.log(" ") + def setUp(self): self.fails['dut'] = set() self.port_indices = self.read_port_indices() self.portchannel_ports = self.read_portchannel_ports() self.vlan_ports = self.read_vlan_ports() - if self.test_params['preboot_oper'] is not None: + if self.sad_oper: self.build_peer_mapping() self.test_params['vlan_if_port'] = self.build_vlan_if_port_mapping() @@ -399,7 +471,6 @@ def setUp(self): self.limit = datetime.timedelta(seconds=self.test_params['reboot_limit_in_seconds']) self.reboot_type = self.test_params['reboot_type'] - self.preboot_oper = self.test_params['preboot_oper'] if self.reboot_type not in ['fast-reboot', 'warm-reboot']: raise ValueError('Not supported reboot_type %s' % self.reboot_type) self.dut_ssh = self.test_params['dut_username'] + '@' + self.test_params['dut_hostname'] @@ -415,18 +486,7 @@ def setUp(self): self.ssh_targets.append(vm) self.log("Converted addresses VMs: %s" % str(self.ssh_targets)) - if self.preboot_oper is not None: - self.log("Preboot Operations:") - self.pre_handle = sp.PrebootTest(self.preboot_oper, self.ssh_targets, self.portchannel_ports, self.vm_dut_map, self.test_params, self.dut_ssh, self.vlan_ports) - (self.ssh_targets, self.portchannel_ports, self.neigh_vm, self.vlan_ports), (log_info, fails) = self.pre_handle.setup() - self.populate_fail_info(fails) - for log in log_info: - self.log(log) - log_info, fails = self.pre_handle.verify() - self.populate_fail_info(fails) - for log in log_info: - self.log(log) - self.log(" ") + self.init_sad_oper() self.vlan_host_map = self.generate_vlan_servers() arp_responder_conf = self.generate_arp_responder_conf(self.vlan_host_map) @@ -458,7 +518,7 @@ def setUp(self): self.generate_arp_ping_packet() if self.reboot_type == 'warm-reboot': - self.log(self.get_preboot_info()) + self.log(self.get_sad_info()) # Pre-generate list of packets to be sent in send_in_background method. generate_start = datetime.datetime.now() @@ -706,6 +766,9 @@ def runTest(self): if self.reboot_type == 'fast-reboot': self.light_probe = True + else: + # add or del routes during boot + self.do_inboot_oper() self.reboot_start = datetime.datetime.now() self.log("Dut reboots: reboot start %s" % str(self.reboot_start)) @@ -803,13 +866,13 @@ def wait_for_ssh_threads(): self.fails['dut'].add("Dataplane didn't route to all servers, when control-plane was down: %d vs %d" % (no_cp_replies, self.nr_vl_pkts)) if self.reboot_type == 'warm-reboot': - if self.preboot_oper is not None and self.pre_handle is not None: - self.log("Postboot checks:") - log_info, fails = self.pre_handle.verify(pre_check=False) - self.populate_fail_info(fails) - for log in log_info: - self.log(log) - self.log(" ") + # after the data plane is up, check for routing changes + if self.test_params['inboot_oper'] and self.sad_handle: + self.check_inboot_sad_status() + + # postboot check for all preboot operations + if self.test_params['preboot_oper'] and self.sad_handle: + self.check_postboot_sad_status() else: # verify there are no interface flaps after warm boot @@ -822,9 +885,10 @@ def wait_for_ssh_threads(): self.watching = False # revert to pretest state - if self.preboot_oper is not None and self.pre_handle is not None: - self.log("Revert to preboot state:") - self.pre_handle.revert() + if self.sad_oper and self.sad_handle: + self.sad_revert() + if self.test_params['inboot_oper']: + self.check_postboot_sad_status() self.log(" ") # Generating report @@ -1018,7 +1082,7 @@ def sniff_in_background(self, wait = None): self.sniffer_started.clear() def save_sniffed_packets(self): - filename = "/tmp/capture_%s.pcap" % self.preboot_oper if self.preboot_oper is not None else "/tmp/capture.pcap" + filename = "/tmp/capture_%s.pcap" % self.sad_oper if self.sad_oper is not None else "/tmp/capture.pcap" if self.packets: scapyall.wrpcap(filename, self.packets) self.log("Pcap file dumped to %s" % filename) @@ -1152,7 +1216,7 @@ def examine_flow(self, filename = None): self.log("Gaps in forwarding not found.") self.log("Total incoming packets captured %d" % received_counter) if packets: - filename = '/tmp/capture_filtered.pcap' if self.preboot_oper is None else "/tmp/capture_filtered_%s.pcap" % self.preboot_oper + filename = '/tmp/capture_filtered.pcap' if self.sad_oper is None else "/tmp/capture_filtered_%s.pcap" % self.sad_oper scapyall.wrpcap(filename, packets) self.log("Filtered pcap dumped to %s" % filename) diff --git a/ansible/roles/test/files/ptftests/arista.py b/ansible/roles/test/files/ptftests/arista.py index db967eb0bda..7bab31fff52 100644 --- a/ansible/roles/test/files/ptftests/arista.py +++ b/ansible/roles/test/files/ptftests/arista.py @@ -363,6 +363,12 @@ def get_bgp_info(self): return neigh_bgp, dut_bgp + def change_bgp_route(self, cfg_map): + self.do_cmd('configure') + for item in cfg_map: + self.do_cmd(item) + self.do_cmd('exit') + def change_bgp_neigh_state(self, asn, is_up=True): state = ['shut', 'no shut'] self.do_cmd('configure') @@ -519,4 +525,3 @@ def check_change_time(self, output, entity, what): # Note: the first item is a placeholder return 0, change_count - diff --git a/ansible/roles/test/files/ptftests/sad_path.py b/ansible/roles/test/files/ptftests/sad_path.py index bda06a49265..8fcb5b7db50 100644 --- a/ansible/roles/test/files/ptftests/sad_path.py +++ b/ansible/roles/test/files/ptftests/sad_path.py @@ -1,4 +1,5 @@ import datetime +import ipaddress import re import subprocess import time @@ -6,7 +7,7 @@ from arista import Arista -class PrebootTest(object): +class SadTest(object): def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh, vlan_ports): self.oper_type = oper_type self.vm_list = vm_list @@ -24,9 +25,15 @@ def setup(self): self.shandle.sad_setup(is_up=False) return self.shandle.retreive_test_info(), self.shandle.retreive_logs() - def verify(self, pre_check=True): + def route_setup(self): + self.shandle.modify_routes() + return self.shandle.retreive_logs() + + def verify(self, pre_check=True, inboot=False): if 'vlan' in self.oper_type: self.shandle.verify_vlan_port_state(pre_check=pre_check) + elif 'routing' in self.oper_type: + self.shandle.verify_route_add(pre_check=pre_check, inboot=inboot) else: self.shandle.sad_bgp_verify() if 'lag' in self.oper_type: @@ -41,8 +48,9 @@ def revert(self): class SadPath(object): def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, vlan_ports): self.oper_type = '' - self.cnt = 1 self.memb_cnt = 0 + self.cnt = 1 if 'routing' not in oper_type else len(vm_list) + self.ip_cnt = 1 self.vm_list = vm_list self.portchannel_ports = portchannel_ports self.vm_dut_map = vm_dut_map @@ -61,18 +69,31 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, self.memb_index = 0 self.if_port = [] self.down_vlan_info = [] + self.bp_ip = None + self.bp_ip6 = None self.extract_oper_info(oper_type) + self.extract_nexthops() + + def extract_nexthops(self): + if self.test_args['nexthop_ips']: + self.bp_ip = str(self.test_args['nexthop_ips'][0]) + self.bp_ip6 = str(self.test_args['nexthop_ips'][1]) def extract_oper_info(self, oper_type): if oper_type and ':' in oper_type: temp = oper_type.split(':') self.oper_type = temp[0] - # get number of VMs where the sad pass oper needs to be done. For vlan_member case, + # get number of VMs where the preboot sad pass oper needs to be done. For vlan_member case, # this will be the number of down vlan ports - self.cnt = int(temp[1]) - if len(temp) > 2: - # get the number of lag members in a portchannel that should be brought down - self.memb_cnt = int(temp[-1]) + if 'routing' not in oper_type: + self.cnt = int(temp[1]) + if len(temp) > 2: + # get the number of lag members in a portchannel that should be brought down + self.memb_cnt = int(temp[-1]) + else: + # for sad operation during reboot, all VMs should be included in the cnt + self.cnt = len(self.vm_list) + self.ip_cnt = int(temp[-1]) else: self.oper_type = oper_type @@ -152,8 +173,10 @@ def down_vlan_ports(self): def setup(self): self.select_vm() self.get_neigh_name() - self.down_neigh_port() self.vm_connect() + # bring down the VM PTF ports only for preboot sad oper + if 'routing' not in self.oper_type: + self.down_neigh_port() # decide if its all member down or few members down for lag member oper type if 'member' in self.oper_type: @@ -221,7 +244,28 @@ def sad_setup(self, is_up=True): if 'lag' in self.oper_type: self.populate_lag_state() - if 'bgp' in self.oper_type: + elif 'routing' in self.oper_type: + if self.bp_ip and self.bp_ip6: + self.generate_ips() + self.build_route_config() + neigh_rt_v4_info, ret = self.get_bgp_route_cnt(is_up=is_up) + neigh_rt_v6_info, ret1 = self.get_bgp_route_cnt(is_up=is_up, v4=False) + if not ret and not ret1: + self.build_neigh_rt_map(neigh_rt_v4_info + neigh_rt_v6_info) + + if 'routing' in self.oper_type: + if self.bp_ip: + for vm in self.neigh_vms: + if not is_up: + # Need to add the routes which will be removed during the the boot + if 'routing_del' in self.oper_type: + self.log.append('Adding %d routes from VM %s' % (2 * self.ip_cnt, vm)) + self.vm_handles[vm].change_bgp_route(self.route_cfg) + else: + self.log.append('Removing %d routes from VM %s' % (2 * self.ip_cnt, vm)) + self.vm_handles[vm].change_bgp_route(self.no_route_cfg) + + elif 'bgp' in self.oper_type: self.log.append('BGP state change will be for %s' % ", ".join(self.neigh_vms)) if self.oper_type == 'neigh_bgp_down': for vm in self.neigh_vms: @@ -254,6 +298,88 @@ def sad_setup(self, is_up=True): elif 'vlan' in self.oper_type: self.change_vlan_port_state(is_up=is_up) + def generate_ips(self): + ''' + Generates the prefixes that will be added to the neighbor + ''' + self.start_ip_pfx = '123.45.67.0/25' + self.start_ip6_pfx = '20d0:a808:0:80::/120' + self.ip_pfx_list = list(ipaddress.ip_network(u'%s' % self.start_ip_pfx).hosts())[0:self.ip_cnt] + self.ip_pfx_list = [str(ip) for ip in self.ip_pfx_list] + self.ip6_pfx_list = list(ipaddress.IPv6Network(u'%s' % self.start_ip6_pfx).hosts())[0:self.ip_cnt] + self.ip6_pfx_list = [str(ip) for ip in self.ip6_pfx_list] + + def build_route_config(self): + # cmds for adding routes + self.route_cfg = [] + # cmds for deleting routes + self.no_route_cfg = [] + for cnt, ip in enumerate(zip(self.ip_pfx_list, self.ip6_pfx_list)): + # add route cfg + self.route_cfg.append('ip route %s/32 %s' % (ip[0], self.bp_ip)) + self.route_cfg.append('ipv6 route %s/128 %s' % (ip[1], self.bp_ip6)) + # remove route cfg + self.no_route_cfg.append('no ip route %s/32 %s' % (ip[0], self.bp_ip)) + self.no_route_cfg.append('no ipv6 route %s/128 %s' % (ip[1], self.bp_ip6)) + self.route_cfg.append('router bgp %s' % self.neigh_bgps[self.neigh_vms[-1]]['asn']) + self.route_cfg.append('redistribute static') + self.route_cfg.append('exit') + self.no_route_cfg.append('router bgp %s' % self.neigh_bgps[self.neigh_vms[-1]]['asn']) + self.no_route_cfg.append('redistribute static route-map PREPENDAS') + self.no_route_cfg.append('exit') + + def get_bgp_route_cnt(self, is_up=True, v4=True): + # extract the neigh ip and current number of routes + if v4: + cmd = 'show ip bgp summary | sed \'1,/Neighbor/d;/^$/,$d\' | sed \'s/\s\s*/ /g\' | cut -d\' \' -f 1,10' + else: + cmd = 'show ipv6 bgp summary | sed \'1,/Neighbor/d;/^$/,$d\' | sed \'s/\s\s*/ /g\' | cut -d\' \' -f 1,10' + + stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, cmd]) + if return_code != 0: + self.fails['dut'].add('%s: Failed to retreive BGP route info from DUT' % self.msg_prefix[1 - is_up]) + self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code)) + self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[1 - is_up], stderr)) + return stdout, return_code + + def build_neigh_rt_map(self, neigh_rt_info): + # construct neigh to route cnt map + self.neigh_rt_map = dict() + for line in neigh_rt_info.strip().split('\n'): + key, value = line.split(' ') + self.neigh_rt_map.update({key:value}) + + def verify_route_cnt(self, rt_incr, is_up=True, v4=True): + neigh_rt_info, ret = self.get_bgp_route_cnt(is_up=is_up, v4=v4) + if not ret: + for line in neigh_rt_info.strip().split('\n'): + neigh_ip, rt_cnt = line.split(' ') + exp_cnt = int(self.neigh_rt_map[neigh_ip]) + rt_incr + if int(rt_cnt) != exp_cnt: + self.fails['dut'].add('%s: Route cnt incorrect for neighbor %s Expected: %d Obtained: %d' % (self.msg_prefix[is_up], neigh_ip, exp_cnt, int(rt_cnt))) + else: + self.log.append('Route cnt as expected for neighbor %s: %d' % (neigh_ip, exp_cnt)) + + def verify_route_add(self, pre_check=True, inboot=True): + self.log = [] + rt_incr = 0 + if (pre_check and 'routing_del' in self.oper_type) or (inboot and 'routing_add' in self.oper_type): + rt_incr = self.ip_cnt + # verify ipv4 and ipv6 route cnts + self.verify_route_cnt(rt_incr, is_up=pre_check) + self.verify_route_cnt(rt_incr, is_up=pre_check, v4=False) + + def modify_routes(self): + self.log = [] + if self.bp_ip: + for vm in self.neigh_vms: + if 'routing_add' in self.oper_type: + self.log.append('Adding %d routes from VM %s' % (2 * self.ip_cnt, vm)) + self.vm_handles[vm].change_bgp_route(self.route_cfg) + else: + self.log.append('Removing %d routes from VM %s' % (2 * self.ip_cnt, vm)) + self.vm_handles[vm].change_bgp_route(self.no_route_cfg) + def change_vlan_port_state(self, is_up=True): state = ['shutdown', 'startup'] diff --git a/ansible/roles/test/tasks/advanced-reboot.yml b/ansible/roles/test/tasks/advanced-reboot.yml index 810c9ec6a5b..4dd78e605e3 100644 --- a/ansible/roles/test/tasks/advanced-reboot.yml +++ b/ansible/roles/test/tasks/advanced-reboot.yml @@ -23,16 +23,30 @@ - name: Preboot-list initialization set_fact: preboot_list={% if preboot_list is not defined %}[None]{% else %}{{ preboot_list }}{% endif %} - - name: Validate preboot list - include: roles/test/tasks/advanced_reboot/validate_preboot_list.yml - with_items: "{{ preboot_list }}" + - name: Inboot-list initialization + set_fact: + inboot_list: {% if inboot_list is not defined %}[None]{% else %}{{ inboot_list }}{% endif %} + nexthop_ips: None + + - include_vars: "vars/topo_{{testbed_type}}.yml" + + - name: Set nexthop parameters for inboot operation + set_fact: + nexthop_ips: + - "{{ configuration_properties['common']['nhipv4'] }}" + - "{{ configuration_properties['common']['nhipv6'] }}" + when: None not in inboot_list + + - name: Validate preboot and inboot list + include: roles/test/tasks/advanced_reboot/validate_sad_list.yml + with_items: "{{ preboot_list + inboot_list }}" when: item and ':' in item - name: Preboot files initialization set_fact: preboot_files={% if preboot_files is not defined %}None{% else %}{{ preboot_files }}{% endif %} - debug: - msg: "Preboot-list: {{ preboot_list }} Preboot-files: {{ preboot_files }}" + msg: "Preboot-list: {{ preboot_list }} Preboot-files: {{ preboot_files }} Inboot-list: {{ inboot_list }}" - name: Set PTF test params set_fact: @@ -122,7 +136,8 @@ dest: /tmp/neigh_port_info.json delegate_to: "{{ ptf_host }}" - when: preboot_list|length > 1 + when: (preboot_list|length > 1) or + (inboot_list|length > 0 and 'None' not in inboot_list) - debug: msg="Defined new sonic image url is {{ new_sonic_image }}" when: new_sonic_image is defined @@ -142,8 +157,20 @@ - reboot_type == "fast-reboot" - minigraph_hwsku is defined and minigraph_hwsku in mellanox_hwskus + - name: populate sad list with inboot list + set_fact: sad_list="{{ inboot_list }}" + when: (inboot_list|length > 0) and (None not in inboot_list) + + - name: populate sad list with preboot list + set_fact: sad_list="{{ preboot_list }}" + when: (preboot_list|length > 0) and (None not in preboot_list) + + - name: populate sad list with preboot list + set_fact: sad_list="{{ preboot_list }}" + when: (None in preboot_list) and (None in inboot_list) + - include: ptf_runner_reboot.yml - with_items: "{{ preboot_list }}" + with_items: "{{ sad_list }}" always: # When new image is defined, test removed /host/config_db.json diff --git a/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml b/ansible/roles/test/tasks/advanced_reboot/validate_sad_list.yml similarity index 83% rename from ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml rename to ansible/roles/test/tasks/advanced_reboot/validate_sad_list.yml index d0c59eae24b..b8cde6a18c6 100644 --- a/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml +++ b/ansible/roles/test/tasks/advanced_reboot/validate_sad_list.yml @@ -3,6 +3,7 @@ host_max_len: "{{ vm_hosts|length - 1 }}" member_max_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}" vlan_max_cnt: "{{ minigraph_vlans.values()[0]['members']|length - 1 }}" + host_max_cnt: 126 - fail: msg="Bgp neigh down count is greater than or equal to number of VM hosts. Current val = {{ item_cnt }} Max val = {{ host_max_len }}" when: "{{ 'bgp_down' in item and item_cnt > host_max_len }}" @@ -15,3 +16,6 @@ - fail: msg="Vlan count is greater than or equal to number of Vlan interfaces. Current val = {{ item_cnt }} Max val = {{ vlan_max_cnt }}" when: "{{ 'vlan_port_down' in item and item_cnt|int > vlan_max_cnt|int }}" + +- fail: msg="Number of prefixes is greater than allowed max. Current val = {{ item_cnt }} Max val = {{ host_max_cnt }}" + when: "{{ 'routing' in item and item_cnt|int > host_max_cnt }}" diff --git a/ansible/roles/test/tasks/ptf_runner_reboot.yml b/ansible/roles/test/tasks/ptf_runner_reboot.yml index b3d9004c965..36396652dbc 100644 --- a/ansible/roles/test/tasks/ptf_runner_reboot.yml +++ b/ansible/roles/test/tasks/ptf_runner_reboot.yml @@ -1,4 +1,23 @@ - block: + - name: Default values for preboot and inboot type + set_fact: + preboot_oper: None + inboot_oper: None + + - name: Populate inboot var when it is of inboot type + set_fact: + inboot_oper: "{{ item }}" + when: + - item and item != 'None' + - "{{ 'routing' in item }}" + + - name: Populate preboot var when it is of preboot type + set_fact: + preboot_oper: "{{ item }}" + when: + - item and item != 'None' + - "{{ 'routing' not in item }}" + - name: Copy arp responder to the PTF container copy: src=roles/test/files/helpers/arp_responder.py dest=/opt delegate_to: "{{ ptf_host }}" @@ -46,14 +65,16 @@ - lo_v6_prefix='{{ lo_v6_prefix }}' - arista_vms=\"['{{ vm_hosts | list | join("','") }}']\" - preboot_files='{{ preboot_files }}' - - preboot_oper='{{ item }}' + - preboot_oper='{{ preboot_oper }}' + - inboot_oper='{{ inboot_oper }}' + - nexthop_ips={{ nexthop_ips }} - allow_vlan_flooding='{{ allow_vlan_flooding }}' - sniff_time_incr={{ sniff_time_incr }} - setup_fdb_before_test=True always: - - name: Set all the filename vars when there is no preboot type + - name: Set all the filename vars when there is no preboot/inboot type set_fact: reboot_log: '/tmp/{{reboot_type}}.log' capture_pcap: '/tmp/capture.pcap' @@ -63,7 +84,7 @@ swss_rec: '/tmp/swss.rec' when: not item or item == 'None' - - name: Set all the filename vars when there is a preboot type + - name: Set all the filename vars when there is a preboot/inboot type set_fact: reboot_log: '/tmp/{{reboot_type}}-{{item}}.log' capture_pcap: '/tmp/capture_{{item}}.pcap' @@ -126,4 +147,5 @@ - name: Wait for the DUT to be ready for the next test pause: seconds=420 - when: preboot_list|length > 1 + when: (preboot_list|length > 1) or + (inboot_list|length > 0 and 'None' not in inboot_list) diff --git a/ansible/roles/test/tasks/warm-reboot-multi-sad-inboot.yml b/ansible/roles/test/tasks/warm-reboot-multi-sad-inboot.yml new file mode 100644 index 00000000000..915e6aa8157 --- /dev/null +++ b/ansible/roles/test/tasks/warm-reboot-multi-sad-inboot.yml @@ -0,0 +1,16 @@ +- name: set default reboot_limit in seconds + set_fact: + reboot_limit: 1 + when: reboot_limit is not defined + +# inboot list format: 'inboot_oper:route_cnt' +- name: Set sad operation during warm boot + set_fact: + in_list: ['routing_del:50', 'routing_add:50'] + +- name: Warm-reboot test + include: advanced-reboot.yml + vars: + reboot_type: warm-reboot + inboot_list: "{{ in_list }}" + preboot_files: "peer_dev_info,neigh_port_info" diff --git a/ansible/roles/test/vars/testcases.yml b/ansible/roles/test/vars/testcases.yml index 73f67669ec1..ff181c4a274 100644 --- a/ansible/roles/test/vars/testcases.yml +++ b/ansible/roles/test/vars/testcases.yml @@ -101,6 +101,13 @@ testcases: ptf_host: vm_hosts: + warm-reboot-multi-sad-inboot: + filename: warm-reboot-multi-sad-inboot.yml + topologies: [t0, t0-64, t0-64-32, t0-116, t0-56] + required_vars: + ptf_host: + vm_hosts: + fib: filename: simple-fib.yml topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] From dc35040f1c1e2e3bd43817ae5956ab5589b27bf6 Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Thu, 17 Oct 2019 17:25:14 -0700 Subject: [PATCH 135/218] pfcwd warm-reboot: sleep to allow control-plane message to flow into (#1161) Signed-off-by: Wenda Ni --- .../pfc_wd/functional_test/functional_test_restore_perq.yml | 6 +++++- .../pfc_wd/functional_test/functional_test_storm_perq.yml | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore_perq.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore_perq.yml index e7b1e751b0d..c123eed51ef 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore_perq.yml +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_restore_perq.yml @@ -14,6 +14,10 @@ vars: testname_unique_gen: true + - name: Allow enough time for the start marker to flow into the syslog + pause: + seconds: 5 + - name: Stop PFC storm on fanout switch action: apswitch template="{{pfc_wd_storm_stop_template}}" args: @@ -23,7 +27,7 @@ - name: Allow enough time for the PFC storm restoration to flow into the syslog pause: - seconds: 1 + seconds: 15 - name: Check if logs contain message that PFC WD restored from deadlock include: roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm_perq.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm_perq.yml index 5453542773a..ca4d630cea7 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm_perq.yml +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test_storm_perq.yml @@ -41,7 +41,7 @@ - name: Allow enough time for the PFC storm detection to flow into the syslog pause: - seconds: 5 + seconds: 15 - name: Check if logs contain message that PFC WD detected storm include: roles/test/files/tools/loganalyzer/loganalyzer_analyze.yml From 46184c56fc670f2184438cdb1076b7db8604c8a6 Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Sun, 20 Oct 2019 17:04:04 -0700 Subject: [PATCH 136/218] Extend tx enable/disable to support port id list (#1168) Signed-off-by: Wenda Ni --- .../test/files/saitests/sai_qos_tests.py | 112 ++++-------------- ansible/roles/test/files/saitests/switch.py | 10 +- 2 files changed, 32 insertions(+), 90 deletions(-) diff --git a/ansible/roles/test/files/saitests/sai_qos_tests.py b/ansible/roles/test/files/saitests/sai_qos_tests.py index 07486c04666..981516b050e 100644 --- a/ansible/roles/test/files/saitests/sai_qos_tests.py +++ b/ansible/roles/test/files/saitests/sai_qos_tests.py @@ -80,17 +80,7 @@ def runTest(self): asic_type = self.test_params['sonic_asic_type'] - if asic_type == 'mellanox': - sched_prof_id=sai_thrift_create_scheduler_profile(self.client, RELEASE_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - else: - # Resume egress of dut xmit port - attr_value = sai_thrift_attribute_value_t(booldata=1) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - - for port in sai_port_list: - self.client.sai_thrift_set_port_attribute(port, attr) + sai_thrift_port_tx_enable(self.client, asic_type, port_list.keys()) # DSCP to queue mapping class DscpMappingPB(sai_base_test.ThriftInterfaceDataPlane): @@ -528,7 +518,7 @@ def runTest(self): # or the leak out is simply less than expected as we have occasionally observed margin = 2 - sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) try: # send packets short of triggering pfc @@ -595,7 +585,7 @@ def runTest(self): assert(xmit_counters[EGRESS_DROP] == xmit_counters_base[EGRESS_DROP]) finally: - sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) # This test looks to measure xon threshold (pg_reset_floor) class PFCXonTest(sai_base_test.ThriftInterfaceDataPlane): @@ -647,21 +637,7 @@ def runTest(self): # the ingress may not trigger PFC sharp at its boundary margin = 1 - if asic_type == 'mellanox': - # Stop function of dst xmit ports - sched_prof_id = sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_3_id], attr) - else: - # Pause egress of dut xmit ports - attr_value = sai_thrift_attribute_value_t(booldata=0) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_3_id], attr) + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id, dst_port_2_id, dst_port_3_id]) try: # send packets to dst port 0 @@ -709,7 +685,7 @@ def runTest(self): assert(xmit_2_counters[EGRESS_DROP] == xmit_2_counters_base[EGRESS_DROP]) assert(xmit_3_counters[EGRESS_DROP] == xmit_3_counters_base[EGRESS_DROP]) - sai_thrift_port_tx_enable(self.client, asic_type, dst_port_2_id) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_2_id]) # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) @@ -729,7 +705,7 @@ def runTest(self): assert(xmit_2_counters[EGRESS_DROP] == xmit_2_counters_base[EGRESS_DROP]) assert(xmit_3_counters[EGRESS_DROP] == xmit_3_counters_base[EGRESS_DROP]) - sai_thrift_port_tx_enable(self.client, asic_type, dst_port_3_id) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_3_id]) # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) @@ -756,21 +732,7 @@ def runTest(self): assert(xmit_3_counters[EGRESS_DROP] == xmit_3_counters_base[EGRESS_DROP]) finally: - if asic_type == 'mellanox': - # Release dst ports - sched_prof_id=sai_thrift_create_scheduler_profile(self.client, RELEASE_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_3_id], attr) - else: - # Resume egress of dut xmit ports - attr_value = sai_thrift_attribute_value_t(booldata=1) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_3_id], attr) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id, dst_port_2_id, dst_port_3_id]) class HdrmPoolSizeTest(sai_base_test.ThriftInterfaceDataPlane): def runTest(self): @@ -812,7 +774,7 @@ def runTest(self): xmit_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) # Pause egress of dut xmit port - sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) try: # send packets to leak out @@ -926,7 +888,7 @@ def runTest(self): sys.stderr.flush() finally: - sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) # TODO: remove sai_thrift_clear_all_counters and change to use incremental counter values class DscpEcnSend(sai_base_test.ThriftInterfaceDataPlane): @@ -1084,7 +1046,7 @@ def runTest(self): limit = int(self.test_params['limit']) pkts_num_leak_out = int(self.test_params['pkts_num_leak_out']) - sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) # Send packets to leak out pkt = simple_tcp_packet(pktlen=64, @@ -1195,7 +1157,7 @@ def runTest(self): p.socket.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 41943040) # Release port - sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) cnt = 0 pkts = [] @@ -1292,18 +1254,7 @@ def runTest(self): # or the leak out is simply less than expected as we have occasionally observed margin = 2 - if asic_type == 'mellanox': - # Stop port function - sched_prof_id=sai_thrift_create_scheduler_profile(self.client, STOP_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) - else: - # Pause egress of dut xmit port - attr_value = sai_thrift_attribute_value_t(booldata=0) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) try: # send packets short of triggering egress drop @@ -1337,18 +1288,7 @@ def runTest(self): assert(xmit_counters[EGRESS_DROP] > xmit_counters_base[EGRESS_DROP]) finally: - if asic_type == 'mellanox': - # Release ports - sched_prof_id=sai_thrift_create_scheduler_profile(self.client, RELEASE_PORT_MAX_RATE) - attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_2_id], attr) - else: - # Resume egress of dut xmit port - attr_value = sai_thrift_attribute_value_t(booldata=1) - attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) - self.client.sai_thrift_set_port_attribute(port_list[dst_port_id], attr) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) # pg shared pool applied to both lossy and lossless traffic class PGSharedWatermarkTest(sai_base_test.ThriftInterfaceDataPlane): @@ -1392,7 +1332,7 @@ def runTest(self): # or the leak out is simply less than expected as we have occasionally observed margin = 2 - sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) # send packets try: @@ -1446,7 +1386,7 @@ def runTest(self): assert(pg_shared_wm_res[pg] <= (expected_wm + margin) * cell_size) finally: - sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) # pg headroom is a notion for lossless traffic only class PGHeadroomWatermarkTest(sai_base_test.ThriftInterfaceDataPlane): @@ -1490,7 +1430,7 @@ def runTest(self): # or the leak out is simply less than expected as we have occasionally observed margin = 0 - sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) # send packets try: @@ -1531,7 +1471,7 @@ def runTest(self): assert(pg_headroom_wm_res[pg] == expected_wm * cell_size) finally: - sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) class QSharedWatermarkTest(sai_base_test.ThriftInterfaceDataPlane): def runTest(self): @@ -1578,7 +1518,7 @@ def runTest(self): # shared test, so the margin here actually means extra capacity margin margin = 8 - sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) # send packets try: @@ -1627,7 +1567,7 @@ def runTest(self): assert(q_wm_res[queue] <= (expected_wm + margin) * cell_size) finally: - sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) # TODO: buffer pool roid should be obtained via rpc calls # based on the pg or queue index @@ -1701,10 +1641,10 @@ def runTest(self): # the impact caused by lossy traffic # # TH2 uses scheduler-based TX enable, this does not require sending packets to leak out - sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) pkts_num_to_send += (pkts_num_leak_out + pkts_num_fill_min) send_packet(self, src_port_id, pkt, pkts_num_to_send) - sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) time.sleep(8) buffer_pool_wm = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) print >> sys.stderr, "Init pkts num sent: %d, min: %d, actual watermark value to start: %d" % ((pkts_num_leak_out + pkts_num_fill_min), pkts_num_fill_min, buffer_pool_wm) @@ -1731,10 +1671,10 @@ def runTest(self): expected_wm = total_shared print >> sys.stderr, "pkts num to send: %d, total pkts: %d, shared: %d" % (pkts_num, expected_wm, total_shared) - sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) pkts_num_to_send += pkts_num send_packet(self, src_port_id, pkt, pkts_num_to_send) - sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) time.sleep(8) buffer_pool_wm = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) print >> sys.stderr, "lower bound (-%d): %d, actual value: %d, upper bound (+%d): %d" % (lower_bound_margin, (expected_wm - lower_bound_margin)* cell_size, buffer_pool_wm, upper_bound_margin, (expected_wm + upper_bound_margin) * cell_size) @@ -1744,10 +1684,10 @@ def runTest(self): pkts_num = pkts_inc # overflow the shared pool - sai_thrift_port_tx_disable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) pkts_num_to_send += pkts_num send_packet(self, src_port_id, pkt, pkts_num_to_send) - sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) time.sleep(8) buffer_pool_wm = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) print >> sys.stderr, "exceeded pkts num sent: %d, expected watermark: %d, actual value: %d" % (pkts_num, (expected_wm * cell_size), buffer_pool_wm) @@ -1756,4 +1696,4 @@ def runTest(self): assert(buffer_pool_wm <= (expected_wm + extra_cap_margin) * cell_size) finally: - sai_thrift_port_tx_enable(self.client, asic_type, dst_port_id) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) diff --git a/ansible/roles/test/files/saitests/switch.py b/ansible/roles/test/files/saitests/switch.py index 26b3851743c..4653fa2a191 100644 --- a/ansible/roles/test/files/saitests/switch.py +++ b/ansible/roles/test/files/saitests/switch.py @@ -617,30 +617,32 @@ def sai_thrift_clear_all_counters(client): for queue in queue_list: client.sai_thrift_clear_queue_stats(queue,cnt_ids,len(cnt_ids)) -def sai_thrift_port_tx_disable(client, asic_type, port_id): +def sai_thrift_port_tx_disable(client, asic_type, port_ids): if asic_type == 'mellanox': # Close DST port sched_prof_id = sai_thrift_create_scheduler_profile(client, STOP_PORT_MAX_RATE) attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - client.sai_thrift_set_port_attribute(port_list[port_id], attr) else: # Pause egress of dut xmit port attr_value = sai_thrift_attribute_value_t(booldata=0) attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + + for port_id in port_ids: client.sai_thrift_set_port_attribute(port_list[port_id], attr) -def sai_thrift_port_tx_enable(client, asic_type, port_id): +def sai_thrift_port_tx_enable(client, asic_type, port_ids): if asic_type == 'mellanox': # Release port sched_prof_id = sai_thrift_create_scheduler_profile(client, RELEASE_PORT_MAX_RATE) attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_QOS_SCHEDULER_PROFILE_ID, value=attr_value) - client.sai_thrift_set_port_attribute(port_list[port_id], attr) else: # Resume egress of dut xmit port attr_value = sai_thrift_attribute_value_t(booldata=1) attr = sai_thrift_attribute_t(id=SAI_PORT_ATTR_PKT_TX_ENABLE, value=attr_value) + + for port_id in port_ids: client.sai_thrift_set_port_attribute(port_list[port_id], attr) def sai_thrift_read_port_counters(client,port): From 41966160c5acc34f91e28f8d0375575827d55885 Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Tue, 22 Oct 2019 14:55:48 -0700 Subject: [PATCH 137/218] Sleep to allow counter change propagate to COUNTERS_DB (#1174) Signed-off-by: Wenda Ni --- ansible/roles/test/files/saitests/sai_qos_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/test/files/saitests/sai_qos_tests.py b/ansible/roles/test/files/saitests/sai_qos_tests.py index 981516b050e..6b135f8faee 100644 --- a/ansible/roles/test/files/saitests/sai_qos_tests.py +++ b/ansible/roles/test/files/saitests/sai_qos_tests.py @@ -139,6 +139,7 @@ def runTest(self): continue # Read Counters + time.sleep(10) port_results, queue_results = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) print >> sys.stderr, map(operator.sub, queue_results, queue_results_base) From ca7d19a7563005ba9711aa54a03b26ed90a8f1e5 Mon Sep 17 00:00:00 2001 From: Mykola F <37578614+mykolaf@users.noreply.github.com> Date: Thu, 24 Oct 2019 01:09:05 +0300 Subject: [PATCH 138/218] [advanced-reboot] fix syntax error (#1176) Change-Id: I899e7fb2215de90214980e75c3636ef8f3490e09 Signed-off-by: Mykola Faryma --- ansible/roles/test/tasks/advanced-reboot.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/tasks/advanced-reboot.yml b/ansible/roles/test/tasks/advanced-reboot.yml index 4dd78e605e3..69d6258479a 100644 --- a/ansible/roles/test/tasks/advanced-reboot.yml +++ b/ansible/roles/test/tasks/advanced-reboot.yml @@ -25,7 +25,7 @@ - name: Inboot-list initialization set_fact: - inboot_list: {% if inboot_list is not defined %}[None]{% else %}{{ inboot_list }}{% endif %} + inboot_list: "{% if inboot_list is not defined %}[None]{% else %}{{ inboot_list }}{% endif %}" nexthop_ips: None - include_vars: "vars/topo_{{testbed_type}}.yml" From 30df644c5d91dfcc260acca5314e8af0ebea0b3f Mon Sep 17 00:00:00 2001 From: Neetha John Date: Wed, 23 Oct 2019 16:49:30 -0700 Subject: [PATCH 139/218] =?UTF-8?q?=E3=80=90reboot]:=20Fix=20the=20wait=20?= =?UTF-8?q?condition=20(#1178)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Neetha John --- ansible/roles/test/tasks/ptf_runner_reboot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/tasks/ptf_runner_reboot.yml b/ansible/roles/test/tasks/ptf_runner_reboot.yml index 36396652dbc..3026bdd3d63 100644 --- a/ansible/roles/test/tasks/ptf_runner_reboot.yml +++ b/ansible/roles/test/tasks/ptf_runner_reboot.yml @@ -147,5 +147,5 @@ - name: Wait for the DUT to be ready for the next test pause: seconds=420 - when: (preboot_list|length > 1) or - (inboot_list|length > 0 and 'None' not in inboot_list) + when: (preboot_list|length > 0 and None not in preboot_list) or + (inboot_list|length > 0 and None not in inboot_list) From a6846377f77da6c366ed16347c2b9be799f47a71 Mon Sep 17 00:00:00 2001 From: bbinxie Date: Sat, 26 Oct 2019 06:55:10 +0800 Subject: [PATCH 140/218] update sensors path for seastone (#1155) remove and change some sensors path --- ansible/group_vars/sonic/sku-sensors-data.yml | 160 +++--------------- 1 file changed, 26 insertions(+), 134 deletions(-) diff --git a/ansible/group_vars/sonic/sku-sensors-data.yml b/ansible/group_vars/sonic/sku-sensors-data.yml index c4b01b23b45..f740f87c59a 100644 --- a/ansible/group_vars/sonic/sku-sensors-data.yml +++ b/ansible/group_vars/sonic/sku-sensors-data.yml @@ -1501,6 +1501,7 @@ sensors_checks: side: left skip_list: - ym2651-i2c-11-5b + Seastone-DX010: alarms: fan: @@ -1532,66 +1533,30 @@ sensors_checks: - dps460-i2c-10-5a/iin/curr1_crit_alarm - dps460-i2c-10-5a/iin/curr1_max_alarm - dps460-i2c-10-5a/iout1/curr2_crit_alarm - - dps460-i2c-10-5a/iout1/curr2_lcrit_alarm - dps460-i2c-10-5a/iout1/curr2_max_alarm - dps460-i2c-10-5a/pin/power1_alarm - dps460-i2c-10-5a/pout1/power2_cap_alarm - dps460-i2c-10-5a/pout1/power2_crit_alarm - dps460-i2c-10-5a/pout1/power2_max_alarm - - dps460-i2c-10-5a/vin/in1_crit_alarm - - dps460-i2c-10-5a/vin/in1_lcrit_alarm - - dps460-i2c-10-5a/vin/in1_max_alarm - - dps460-i2c-10-5a/vin/in1_min_alarm - - dps460-i2c-10-5a/vout1/in3_crit_alarm - - dps460-i2c-10-5a/vout1/in3_lcrit_alarm - - dps460-i2c-10-5a/vout1/in3_max_alarm - - dps460-i2c-10-5a/vout1/in3_min_alarm - dps460-i2c-11-5b/iin/curr1_crit_alarm - dps460-i2c-11-5b/iin/curr1_max_alarm - dps460-i2c-11-5b/iout1/curr2_crit_alarm - - dps460-i2c-11-5b/iout1/curr2_lcrit_alarm - dps460-i2c-11-5b/iout1/curr2_max_alarm - dps460-i2c-11-5b/pin/power1_alarm - - dps460-i2c-11-5b/pout1/power2_cap_alarm - - dps460-i2c-11-5b/pout1/power2_crit_alarm - dps460-i2c-11-5b/pout1/power2_max_alarm - - dps460-i2c-11-5b/vin/in1_crit_alarm - - dps460-i2c-11-5b/vin/in1_lcrit_alarm - - dps460-i2c-11-5b/vin/in1_max_alarm - - dps460-i2c-11-5b/vin/in1_min_alarm - dps460-i2c-11-5b/vout1/in3_crit_alarm - dps460-i2c-11-5b/vout1/in3_lcrit_alarm - - dps460-i2c-11-5b/vout1/in3_max_alarm - - dps460-i2c-11-5b/vout1/in3_min_alarm temp: - coretemp-isa-0000/Core 0/temp2_crit_alarm - coretemp-isa-0000/Core 1/temp3_crit_alarm - coretemp-isa-0000/Core 2/temp4_crit_alarm - coretemp-isa-0000/Core 3/temp5_crit_alarm - - dps460-i2c-10-5a/Power Supply 1 temp sensor 1/temp1_crit_alarm - - dps460-i2c-10-5a/Power Supply 1 temp sensor 1/temp1_lcrit_alarm - dps460-i2c-10-5a/Power Supply 1 temp sensor 1/temp1_max_alarm - - dps460-i2c-10-5a/Power Supply 1 temp sensor 1/temp1_min_alarm - - dps460-i2c-10-5a/Power Supply 1 temp sensor 2/temp2_crit_alarm - - dps460-i2c-10-5a/Power Supply 1 temp sensor 2/temp2_lcrit_alarm - dps460-i2c-10-5a/Power Supply 1 temp sensor 2/temp2_max_alarm - - dps460-i2c-10-5a/Power Supply 1 temp sensor 2/temp2_min_alarm - - dps460-i2c-10-5a/Power Supply 1 temp sensor 3/temp3_crit_alarm - - dps460-i2c-10-5a/Power Supply 1 temp sensor 3/temp3_lcrit_alarm - dps460-i2c-10-5a/Power Supply 1 temp sensor 3/temp3_max_alarm - - dps460-i2c-10-5a/Power Supply 1 temp sensor 3/temp3_min_alarm - - dps460-i2c-11-5b/Power Supply 2 temp sensor 1/temp1_crit_alarm - - dps460-i2c-11-5b/Power Supply 2 temp sensor 1/temp1_lcrit_alarm - dps460-i2c-11-5b/Power Supply 2 temp sensor 1/temp1_max_alarm - - dps460-i2c-11-5b/Power Supply 2 temp sensor 1/temp1_min_alarm - - dps460-i2c-11-5b/Power Supply 2 temp sensor 2/temp2_crit_alarm - - dps460-i2c-11-5b/Power Supply 2 temp sensor 2/temp2_lcrit_alarm - dps460-i2c-11-5b/Power Supply 2 temp sensor 2/temp2_max_alarm - - dps460-i2c-11-5b/Power Supply 2 temp sensor 2/temp2_min_alarm - - dps460-i2c-11-5b/Power Supply 2 temp sensor 3/temp3_crit_alarm - - dps460-i2c-11-5b/Power Supply 2 temp sensor 3/temp3_lcrit_alarm - dps460-i2c-11-5b/Power Supply 2 temp sensor 3/temp3_max_alarm - - dps460-i2c-11-5b/Power Supply 2 temp sensor 3/temp3_min_alarm compares: fan: [] power: @@ -1601,8 +1566,6 @@ sensors_checks: - dps460-i2c-10-5a/iin/curr1_max - - dps460-i2c-10-5a/iout1/curr2_input - dps460-i2c-10-5a/iout1/curr2_crit - - - dps460-i2c-10-5a/iout1/curr2_lcrit - - dps460-i2c-10-5a/iout1/curr2_input - - dps460-i2c-10-5a/iout1/curr2_input - dps460-i2c-10-5a/iout1/curr2_max - - dps460-i2c-10-5a/pin/power1_input @@ -1611,30 +1574,12 @@ sensors_checks: - dps460-i2c-10-5a/pout1/power2_crit - - dps460-i2c-10-5a/pout1/power2_input - dps460-i2c-10-5a/pout1/power2_max - - - dps460-i2c-10-5a/vin/in1_input - - dps460-i2c-10-5a/vin/in1_crit - - - dps460-i2c-10-5a/vin/in1_lcrit - - dps460-i2c-10-5a/vin/in1_input - - - dps460-i2c-10-5a/vin/in1_input - - dps460-i2c-10-5a/vin/in1_max - - - dps460-i2c-10-5a/vin/in1_min - - dps460-i2c-10-5a/vin/in1_input - - - dps460-i2c-10-5a/vout1/in3_input - - dps460-i2c-10-5a/vout1/in3_crit - - - dps460-i2c-10-5a/vout1/in3_lcrit - - dps460-i2c-10-5a/vout1/in3_input - - - dps460-i2c-10-5a/vout1/in3_input - - dps460-i2c-10-5a/vout1/in3_max - - - dps460-i2c-10-5a/vout1/in3_min - - dps460-i2c-10-5a/vout1/in3_input - - dps460-i2c-11-5b/iin/curr1_input - dps460-i2c-11-5b/iin/curr1_crit - - dps460-i2c-11-5b/iin/curr1_input - dps460-i2c-11-5b/iin/curr1_max - - dps460-i2c-11-5b/iout1/curr2_input - dps460-i2c-11-5b/iout1/curr2_crit - - - dps460-i2c-11-5b/iout1/curr2_lcrit - - dps460-i2c-11-5b/iout1/curr2_input - - dps460-i2c-11-5b/iout1/curr2_input - dps460-i2c-11-5b/iout1/curr2_max - - dps460-i2c-11-5b/pin/power1_input @@ -1643,22 +1588,6 @@ sensors_checks: - dps460-i2c-11-5b/pout1/power2_crit - - dps460-i2c-11-5b/pout1/power2_input - dps460-i2c-11-5b/pout1/power2_max - - - dps460-i2c-11-5b/vin/in1_input - - dps460-i2c-11-5b/vin/in1_crit - - - dps460-i2c-11-5b/vin/in1_lcrit - - dps460-i2c-11-5b/vin/in1_input - - - dps460-i2c-11-5b/vin/in1_input - - dps460-i2c-11-5b/vin/in1_max - - - dps460-i2c-11-5b/vin/in1_min - - dps460-i2c-11-5b/vin/in1_input - - - dps460-i2c-11-5b/vout1/in3_input - - dps460-i2c-11-5b/vout1/in3_crit - - - dps460-i2c-11-5b/vout1/in3_lcrit - - dps460-i2c-11-5b/vout1/in3_input - - - dps460-i2c-11-5b/vout1/in3_input - - dps460-i2c-11-5b/vout1/in3_max - - - dps460-i2c-11-5b/vout1/in3_min - - dps460-i2c-11-5b/vout1/in3_input temp: - - coretemp-isa-0000/Core 0/temp2_input - coretemp-isa-0000/Core 0/temp2_crit @@ -1676,74 +1605,38 @@ sensors_checks: - coretemp-isa-0000/Core 3/temp5_crit - - coretemp-isa-0000/Core 3/temp5_input - coretemp-isa-0000/Core 3/temp5_max - - - dps460-i2c-10-5a/Power Supply 1 temp sensor 1/temp1_input - - dps460-i2c-10-5a/Power Supply 1 temp sensor 1/temp1_crit - - - dps460-i2c-10-5a/Power Supply 1 temp sensor 1/temp1_lcrit - - dps460-i2c-10-5a/Power Supply 1 temp sensor 1/temp1_input - - dps460-i2c-10-5a/Power Supply 1 temp sensor 1/temp1_input - dps460-i2c-10-5a/Power Supply 1 temp sensor 1/temp1_max - - - dps460-i2c-10-5a/Power Supply 1 temp sensor 1/temp1_min - - dps460-i2c-10-5a/Power Supply 1 temp sensor 1/temp1_input - - - dps460-i2c-10-5a/Power Supply 1 temp sensor 2/temp2_input - - dps460-i2c-10-5a/Power Supply 1 temp sensor 2/temp2_crit - - - dps460-i2c-10-5a/Power Supply 1 temp sensor 2/temp2_lcrit - - dps460-i2c-10-5a/Power Supply 1 temp sensor 2/temp2_input - - dps460-i2c-10-5a/Power Supply 1 temp sensor 2/temp2_input - dps460-i2c-10-5a/Power Supply 1 temp sensor 2/temp2_max - - - dps460-i2c-10-5a/Power Supply 1 temp sensor 2/temp2_min - - dps460-i2c-10-5a/Power Supply 1 temp sensor 2/temp2_input - - - dps460-i2c-10-5a/Power Supply 1 temp sensor 3/temp3_input - - dps460-i2c-10-5a/Power Supply 1 temp sensor 3/temp3_crit - - - dps460-i2c-10-5a/Power Supply 1 temp sensor 3/temp3_lcrit - - dps460-i2c-10-5a/Power Supply 1 temp sensor 3/temp3_input - - dps460-i2c-10-5a/Power Supply 1 temp sensor 3/temp3_input - dps460-i2c-10-5a/Power Supply 1 temp sensor 3/temp3_max - - - dps460-i2c-10-5a/Power Supply 1 temp sensor 3/temp3_min - - dps460-i2c-10-5a/Power Supply 1 temp sensor 3/temp3_input - - - dps460-i2c-11-5b/Power Supply 2 temp sensor 1/temp1_input - - dps460-i2c-11-5b/Power Supply 2 temp sensor 1/temp1_crit - - - dps460-i2c-11-5b/Power Supply 2 temp sensor 1/temp1_lcrit - - dps460-i2c-11-5b/Power Supply 2 temp sensor 1/temp1_input - - dps460-i2c-11-5b/Power Supply 2 temp sensor 1/temp1_input - dps460-i2c-11-5b/Power Supply 2 temp sensor 1/temp1_max - - - dps460-i2c-11-5b/Power Supply 2 temp sensor 1/temp1_min - - dps460-i2c-11-5b/Power Supply 2 temp sensor 1/temp1_input - - - dps460-i2c-11-5b/Power Supply 2 temp sensor 2/temp2_input - - dps460-i2c-11-5b/Power Supply 2 temp sensor 2/temp2_crit - - - dps460-i2c-11-5b/Power Supply 2 temp sensor 2/temp2_lcrit - - dps460-i2c-11-5b/Power Supply 2 temp sensor 2/temp2_input - - dps460-i2c-11-5b/Power Supply 2 temp sensor 2/temp2_input - dps460-i2c-11-5b/Power Supply 2 temp sensor 2/temp2_max - - - dps460-i2c-11-5b/Power Supply 2 temp sensor 2/temp2_min - - dps460-i2c-11-5b/Power Supply 2 temp sensor 2/temp2_input - - - dps460-i2c-11-5b/Power Supply 2 temp sensor 3/temp3_input - - dps460-i2c-11-5b/Power Supply 2 temp sensor 3/temp3_crit - - - dps460-i2c-11-5b/Power Supply 2 temp sensor 3/temp3_lcrit - - dps460-i2c-11-5b/Power Supply 2 temp sensor 3/temp3_input - - dps460-i2c-11-5b/Power Supply 2 temp sensor 3/temp3_input - dps460-i2c-11-5b/Power Supply 2 temp sensor 3/temp3_max - - - dps460-i2c-11-5b/Power Supply 2 temp sensor 3/temp3_min - - dps460-i2c-11-5b/Power Supply 2 temp sensor 3/temp3_input - - - dx010_lm75b-i2c-14-48/Rear-panel temp sensor 1/temp1_input - - dx010_lm75b-i2c-14-48/Rear-panel temp sensor 1/temp1_max - - - dx010_lm75b-i2c-14-48/Rear-panel temp sensor 1/temp1_input - - dx010_lm75b-i2c-14-48/Rear-panel temp sensor 1/temp1_max_hyst - - - dx010_lm75b-i2c-15-4e/Rear-panel temp sensor 2/temp1_input - - dx010_lm75b-i2c-15-4e/Rear-panel temp sensor 2/temp1_max - - - dx010_lm75b-i2c-15-4e/Rear-panel temp sensor 2/temp1_input - - dx010_lm75b-i2c-15-4e/Rear-panel temp sensor 2/temp1_max_hyst - - - dx010_lm75b-i2c-5-48/Rear-panel temp sensor 1/temp1_input - - dx010_lm75b-i2c-5-48/Rear-panel temp sensor 1/temp1_max - - - dx010_lm75b-i2c-5-48/Rear-panel temp sensor 1/temp1_input - - dx010_lm75b-i2c-5-48/Rear-panel temp sensor 1/temp1_max_hyst - - - dx010_lm75b-i2c-6-49/Front-panel temp sensor 2/temp1_input - - dx010_lm75b-i2c-6-49/Front-panel temp sensor 2/temp1_max - - - dx010_lm75b-i2c-6-49/Front-panel temp sensor 2/temp1_input - - dx010_lm75b-i2c-6-49/Front-panel temp sensor 2/temp1_max_hyst - - - dx010_lm75b-i2c-7-4a/ASIC temp sensor/temp1_input - - dx010_lm75b-i2c-7-4a/ASIC temp sensor/temp1_max - - - dx010_lm75b-i2c-7-4a/ASIC temp sensor/temp1_input - - dx010_lm75b-i2c-7-4a/ASIC temp sensor/temp1_max_hyst + - - lm75b-i2c-14-48/Rear-panel temp sensor 1/temp1_input + - lm75b-i2c-14-48/Rear-panel temp sensor 1/temp1_max + - - lm75b-i2c-14-48/Rear-panel temp sensor 1/temp1_input + - lm75b-i2c-14-48/Rear-panel temp sensor 1/temp1_max_hyst + - - lm75b-i2c-15-4e/Rear-panel temp sensor 2/temp1_input + - lm75b-i2c-15-4e/Rear-panel temp sensor 2/temp1_max + - - lm75b-i2c-15-4e/Rear-panel temp sensor 2/temp1_input + - lm75b-i2c-15-4e/Rear-panel temp sensor 2/temp1_max_hyst + - - lm75b-i2c-5-48/Rear-panel temp sensor 1/temp1_input + - lm75b-i2c-5-48/Rear-panel temp sensor 1/temp1_max + - - lm75b-i2c-5-48/Rear-panel temp sensor 1/temp1_input + - lm75b-i2c-5-48/Rear-panel temp sensor 1/temp1_max_hyst + - - lm75b-i2c-6-49/Front-panel temp sensor 2/temp1_input + - lm75b-i2c-6-49/Front-panel temp sensor 2/temp1_max + - - lm75b-i2c-6-49/Front-panel temp sensor 2/temp1_input + - lm75b-i2c-6-49/Front-panel temp sensor 2/temp1_max_hyst + - - lm75b-i2c-7-4a/ASIC temp sensor/temp1_input + - lm75b-i2c-7-4a/ASIC temp sensor/temp1_max + - - lm75b-i2c-7-4a/ASIC temp sensor/temp1_input + - lm75b-i2c-7-4a/ASIC temp sensor/temp1_max_hyst non_zero: fan: - dps460-i2c-10-5a/fan1/fan1_input @@ -1764,7 +1657,6 @@ sensors_checks: - dps460-i2c-10-5a/pin/power1_input - dps460-i2c-10-5a/pout1/power2_input - dps460-i2c-10-5a/vin/in1_input - - dps460-i2c-10-5a/vout1/in3_input - dps460-i2c-11-5b/iin/curr1_input - dps460-i2c-11-5b/iout1/curr2_input - dps460-i2c-11-5b/pin/power1_input @@ -1782,11 +1674,11 @@ sensors_checks: - dps460-i2c-11-5b/Power Supply 2 temp sensor 1/temp1_input - dps460-i2c-11-5b/Power Supply 2 temp sensor 2/temp2_input - dps460-i2c-11-5b/Power Supply 2 temp sensor 3/temp3_input - - dx010_lm75b-i2c-14-48/Rear-panel temp sensor 1/temp1_input - - dx010_lm75b-i2c-15-4e/Rear-panel temp sensor 2/temp1_input - - dx010_lm75b-i2c-5-48/Rear-panel temp sensor 1/temp1_input - - dx010_lm75b-i2c-6-49/Front-panel temp sensor 2/temp1_input - - dx010_lm75b-i2c-7-4a/ASIC temp sensor/temp1_input + - lm75b-i2c-14-48/Rear-panel temp sensor 1/temp1_input + - lm75b-i2c-15-4e/Rear-panel temp sensor 2/temp1_input + - lm75b-i2c-5-48/Rear-panel temp sensor 1/temp1_input + - lm75b-i2c-6-49/Front-panel temp sensor 2/temp1_input + - lm75b-i2c-7-4a/ASIC temp sensor/temp1_input psu_skips: {} Arista-7170-64C: From 7a3ef417b12c39791f6b2f0bcd43ffc1d9e7dabc Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Mon, 28 Oct 2019 18:13:26 -0700 Subject: [PATCH 141/218] PFC wd warm-reboot: Ensure default pfc wd config is used for warm-reboot test (#1179) * Ensure default pfc wd config is used for warm-reboot test Signed-off-by: Wenda Ni * Address comment: change default_pfcwd_status to enable to start default pfc wd config Signed-off-by: Wenda Ni --- ansible/roles/test/tasks/pfc_wd.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ansible/roles/test/tasks/pfc_wd.yml b/ansible/roles/test/tasks/pfc_wd.yml index 896a8e6a0bc..f13afc41807 100644 --- a/ansible/roles/test/tasks/pfc_wd.yml +++ b/ansible/roles/test/tasks/pfc_wd.yml @@ -168,6 +168,9 @@ when: warm_reboot_test | bool == false - block: + - name: Set to default PFC WD config for warm-reboot test + shell: bash -c 'redis-cli -n 4 hset "DEVICE_METADATA|localhost" default_pfcwd_status enable; sudo pfcwd stop; sleep 5; sudo pfcwd start_default' + - name: Test PFC WD function against warm reboot include: roles/test/tasks/pfc_wd/functional_test/functional_test_warm_reboot.yml when: warm_reboot_test | bool == true From e02364f3edfba4249e27b3a233fe9c25549e2786 Mon Sep 17 00:00:00 2001 From: pavel-shirshov Date: Mon, 28 Oct 2019 16:42:08 -0700 Subject: [PATCH 142/218] Support two hw platforms in the ferret.py (#1183) --- ansible/roles/test/files/helpers/ferret.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/files/helpers/ferret.py b/ansible/roles/test/files/helpers/ferret.py index 2087a1238e6..be4dcbb3b5f 100644 --- a/ansible/roles/test/files/helpers/ferret.py +++ b/ansible/roles/test/files/helpers/ferret.py @@ -170,16 +170,29 @@ def action(self, interface): ext_eth_type = data[0x0c:0x0e] if ext_eth_type != binascii.unhexlify('0800'): print "Not 0x800 eth type" + self.hexdump(data) + print return src_ip = data[0x001a:0x001e] dst_ip = data[0x1e:0x22] gre_flags = data[0x22:0x24] gre_type = data[0x24:0x26] - # FIXME: check gre type and gre_flags - arp_request = data[0x26:] + gre_type_r = struct.unpack('!H', gre_type)[0] + if gre_type_r == 0x88be: # Broadcom + arp_request = data[0x26:] + elif gre_type_r == 0x8849: # Mellanox + arp_request = data[0x3c:] + else: + print "GRE type 0x%x is not supported" % gre_type_r + self.hexdump(data) + print + return + if len(arp_request) > self.ARP_PKT_LEN: print "Too long packet" + self.hexdump(data) + print return remote_mac, remote_ip, request_ip, op_type = self.extract_arp_info(arp_request) From fc2f88c8322f8a09bf831d6065c082c6409b5274 Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Thu, 31 Oct 2019 12:50:07 -0700 Subject: [PATCH 143/218] PFC storm stop on arista fanout: use if else semantics to prevent command trailing (#1188) * Leave a newline to prevent 'exit' trailing the preceding command Signed-off-by: Wenda Ni * Address comment: use if else semantics Signed-off-by: Wenda Ni --- ansible/roles/test/templates/pfc_storm_stop_arista.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/templates/pfc_storm_stop_arista.j2 b/ansible/roles/test/templates/pfc_storm_stop_arista.j2 index cb68b2723d6..63dfbcc8ff4 100644 --- a/ansible/roles/test/templates/pfc_storm_stop_arista.j2 +++ b/ansible/roles/test/templates/pfc_storm_stop_arista.j2 @@ -1,5 +1,5 @@ bash cd /mnt/flash -{% if pfc_storm_stop_defer_time is defined %} sleep {{pfc_storm_stop_defer_time}} &&{% endif %} sudo pkill -f "sudo python {{pfc_gen_file}} -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("Ethernet", "et") | replace("/", "_")}} -r {{ansible_eth0_ipv4_addr}}" {% if pfc_storm_stop_defer_time is defined %}&{% endif %} +{% if pfc_storm_stop_defer_time is defined %} sleep {{pfc_storm_stop_defer_time}} &&{% endif %} sudo pkill -f "sudo python {{pfc_gen_file}} -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("Ethernet", "et") | replace("/", "_")}} -r {{ansible_eth0_ipv4_addr}}" {{'&' if pfc_storm_stop_defer_time is defined else ''}} exit exit From a9b44c34055133ac5913708699a3cbefcc9065fb Mon Sep 17 00:00:00 2001 From: pavel-shirshov Date: Thu, 31 Oct 2019 18:22:29 -0700 Subject: [PATCH 144/218] Fix typo in ferret.py (#1192) --- ansible/roles/test/files/helpers/ferret.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/files/helpers/ferret.py b/ansible/roles/test/files/helpers/ferret.py index be4dcbb3b5f..19e0cccf362 100644 --- a/ansible/roles/test/files/helpers/ferret.py +++ b/ansible/roles/test/files/helpers/ferret.py @@ -181,7 +181,7 @@ def action(self, interface): gre_type_r = struct.unpack('!H', gre_type)[0] if gre_type_r == 0x88be: # Broadcom arp_request = data[0x26:] - elif gre_type_r == 0x8849: # Mellanox + elif gre_type_r == 0x8949: # Mellanox arp_request = data[0x3c:] else: print "GRE type 0x%x is not supported" % gre_type_r From a3f092f57c70799cc8d4961aa525c73e6d20780e Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Mon, 4 Nov 2019 17:27:56 +0200 Subject: [PATCH 145/218] [mlnx][fanout] simplify mlnx pfc storm on fanout functionality (#1195) Signed-off-by: Stepan Blyschak --- ansible/plugins/connection/switch.py | 4 ++++ .../roles/fanout/tasks/mlnx/check_pfcwd_fanout.yml | 4 ++-- .../fanout/tasks/mlnx/deploy_pfcwd_fanout.yml | 2 +- .../fanout/templates/mlnx_deploy_pfcwd_fanout.j2 | 5 ++--- .../test/files/mlnx/docker-tests-pfcgen/start.sh | 2 +- .../files/mlnx/docker-tests-saveargs/Dockerfile | 3 --- .../test/files/mlnx/docker-tests-saveargs/Makefile | 7 ------- .../files/mlnx/docker-tests-saveargs/save_args.sh | 3 --- ansible/roles/test/templates/pfc_storm_mlnx.j2 | 14 ++++++++------ .../roles/test/templates/pfc_storm_stop_mlnx.j2 | 12 +++++++++--- 10 files changed, 27 insertions(+), 29 deletions(-) delete mode 100644 ansible/roles/test/files/mlnx/docker-tests-saveargs/Dockerfile delete mode 100644 ansible/roles/test/files/mlnx/docker-tests-saveargs/Makefile delete mode 100755 ansible/roles/test/files/mlnx/docker-tests-saveargs/save_args.sh diff --git a/ansible/plugins/connection/switch.py b/ansible/plugins/connection/switch.py index 1e53641210b..e09408e8761 100644 --- a/ansible/plugins/connection/switch.py +++ b/ansible/plugins/connection/switch.py @@ -204,6 +204,10 @@ def exec_command(self, *args, **kwargs): elif self.sku == 'eos': prompts = ['\$ '] + if self.sku in ('mlnx_os',): + # extend with default \u@\h:\w# for docker container prompts + prompts.extend(['%s@.*:.*#' % 'root']) + prompts.append(pexpect.EOF) stdout = "" diff --git a/ansible/roles/fanout/tasks/mlnx/check_pfcwd_fanout.yml b/ansible/roles/fanout/tasks/mlnx/check_pfcwd_fanout.yml index 56ebefadb9e..c168a5dd12c 100644 --- a/ansible/roles/fanout/tasks/mlnx/check_pfcwd_fanout.yml +++ b/ansible/roles/fanout/tasks/mlnx/check_pfcwd_fanout.yml @@ -12,8 +12,8 @@ login: "{{ switch_login['MLNX-OS'] }}" - set_fact: - dockers_running: "{{output.stdout|search(\"args *storm_args\")|bool}}" - dockers_installed: "{{output.stdout|search(\"pfc_storm\") and output.stdout|search(\"storm_args\")|bool}}" + dockers_installed: "{{output.stdout|search(\"pfc_storm\")}}" + dockers_running: "{{output.stdout|search(\"storm\")|bool}}" - debug: msg: "Dockers installed{{':'}} {{dockers_installed}}" diff --git a/ansible/roles/fanout/tasks/mlnx/deploy_pfcwd_fanout.yml b/ansible/roles/fanout/tasks/mlnx/deploy_pfcwd_fanout.yml index ef1d47ff825..caedf235adf 100644 --- a/ansible/roles/fanout/tasks/mlnx/deploy_pfcwd_fanout.yml +++ b/ansible/roles/fanout/tasks/mlnx/deploy_pfcwd_fanout.yml @@ -12,7 +12,7 @@ fanout_addr: "{{device_info['mgmtip']}}" ansible_ssh_user: "{{fanout_root_user}}" ansible_ssh_pass: "{{fanout_root_pass}}" - pfcwd_dockers: "['roles/test/files/mlnx/docker-tests-pfcgen/pfc_storm.tgz', 'roles/test/files/mlnx/docker-tests-saveargs/storm_args.tgz']" + pfcwd_dockers: "['roles/test/files/mlnx/docker-tests-pfcgen/pfc_storm.tgz']" fanout_img_path: "/var/opt/tms/images/" - name: Build containers to save storm arguments and to run storm diff --git a/ansible/roles/fanout/templates/mlnx_deploy_pfcwd_fanout.j2 b/ansible/roles/fanout/templates/mlnx_deploy_pfcwd_fanout.j2 index be7e0675f1e..e15d3cbe441 100644 --- a/ansible/roles/fanout/templates/mlnx_deploy_pfcwd_fanout.j2 +++ b/ansible/roles/fanout/templates/mlnx_deploy_pfcwd_fanout.j2 @@ -4,6 +4,5 @@ docker no shutdown ping -c 5 8.8.8.8 docker label storm docker load pfc_storm.tgz -docker load storm_args.tgz -docker start storm_args latest args init label storm privileged network sdk -docker start storm_args latest args now label storm privileged network sdk +docker start pfc_storm latest storm init label storm privileged network sdk +docker start pfc_storm latest storm now label storm privileged network sdk diff --git a/ansible/roles/test/files/mlnx/docker-tests-pfcgen/start.sh b/ansible/roles/test/files/mlnx/docker-tests-pfcgen/start.sh index 250a670d687..66aab6a250e 100755 --- a/ansible/roles/test/files/mlnx/docker-tests-pfcgen/start.sh +++ b/ansible/roles/test/files/mlnx/docker-tests-pfcgen/start.sh @@ -1,3 +1,3 @@ #!/bin/bash -/root/pfc_gen.py `cat /storm/args` +sleep inf diff --git a/ansible/roles/test/files/mlnx/docker-tests-saveargs/Dockerfile b/ansible/roles/test/files/mlnx/docker-tests-saveargs/Dockerfile deleted file mode 100644 index 9a84e876df8..00000000000 --- a/ansible/roles/test/files/mlnx/docker-tests-saveargs/Dockerfile +++ /dev/null @@ -1,3 +0,0 @@ -FROM debian:jessie - -COPY ./save_args.sh /root/ diff --git a/ansible/roles/test/files/mlnx/docker-tests-saveargs/Makefile b/ansible/roles/test/files/mlnx/docker-tests-saveargs/Makefile deleted file mode 100644 index d6b834e804a..00000000000 --- a/ansible/roles/test/files/mlnx/docker-tests-saveargs/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -all: save - -build: Dockerfile - docker build -t storm_args . - -save: build - docker save storm_args:latest | gzip >storm_args.tgz diff --git a/ansible/roles/test/files/mlnx/docker-tests-saveargs/save_args.sh b/ansible/roles/test/files/mlnx/docker-tests-saveargs/save_args.sh deleted file mode 100755 index dab1533c029..00000000000 --- a/ansible/roles/test/files/mlnx/docker-tests-saveargs/save_args.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -echo $@ >/storm/args diff --git a/ansible/roles/test/templates/pfc_storm_mlnx.j2 b/ansible/roles/test/templates/pfc_storm_mlnx.j2 index e66587d83cf..b7054a02c5c 100644 --- a/ansible/roles/test/templates/pfc_storm_mlnx.j2 +++ b/ansible/roles/test/templates/pfc_storm_mlnx.j2 @@ -1,14 +1,16 @@ +{% set container_name = "storm" %} + enable configure terminal -docker no start storm + +docker exec {{ container_name }} /bin/bash +cd /root/ {% if (pfc_asym is defined) and (pfc_asym == True) %} -docker exec args "/root/save_args.sh -p {{pfc_queue_index}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("ernet 1/", "") | replace("/", "_")}}" +{% if pfc_storm_defer_time is defined %} sleep {{pfc_storm_defer_time}} &&{% endif %} python {{pfc_gen_file}} -p {{pfc_queue_index}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("ernet 1/", "") | replace("/", "_")}} & {% else %} -docker exec args "/root/save_args.sh -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("ernet 1/", "") | replace("/", "_")}} -r {{ansible_eth0_ipv4_addr}}" +{% if pfc_storm_defer_time is defined %} sleep {{pfc_storm_defer_time}} &&{% endif %} python {{pfc_gen_file}} -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("ernet 1/", "") | replace("/", "_")}} -r {{ansible_eth0_ipv4_addr}} & {% endif %} +exit -ping -c 5 8.8.8.8 -docker start pfc_storm latest storm now label storm privileged sdk network -ping -c 5 8.8.8.8 exit exit diff --git a/ansible/roles/test/templates/pfc_storm_stop_mlnx.j2 b/ansible/roles/test/templates/pfc_storm_stop_mlnx.j2 index 73ea63520c8..5d6235f1a8c 100644 --- a/ansible/roles/test/templates/pfc_storm_stop_mlnx.j2 +++ b/ansible/roles/test/templates/pfc_storm_stop_mlnx.j2 @@ -1,7 +1,13 @@ +{% set container_name = "storm" %} enable configure terminal -docker exec storm "bash -c killall5 -9" -docker no start storm -ping -c 5 8.8.8.8 + +docker exec {{ container_name }} /bin/bash +cd /root/ + +{% if pfc_storm_stop_defer_time is defined %} sleep {{pfc_storm_stop_defer_time}} &&{% endif %} pkill -f "python {{pfc_gen_file}} -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("ernet 1/", "") | replace("/", "_")}} -r {{ansible_eth0_ipv4_addr}}" {% if pfc_storm_stop_defer_time is defined %}&{% endif %} + +exit + exit exit From 07b3d024b7900aceb41db132e5f5f48bea31c0b5 Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Mon, 4 Nov 2019 11:19:18 -0800 Subject: [PATCH 146/218] [sensors] use platform string to index sensor test data (#1193) Many platforms have multiple HWSKUs defined. But they share the same set of sensor definition. It makes sense for the sensor test definition also be the same. Signed-off-by: Ying Xie --- ansible/group_vars/sonic/sku-sensors-data.yml | 474 +++++++++--------- .../sonic-common/tasks/sensors_check.yml | 10 +- 2 files changed, 234 insertions(+), 250 deletions(-) diff --git a/ansible/group_vars/sonic/sku-sensors-data.yml b/ansible/group_vars/sonic/sku-sensors-data.yml index f740f87c59a..3b98389d7ac 100644 --- a/ansible/group_vars/sonic/sku-sensors-data.yml +++ b/ansible/group_vars/sonic/sku-sensors-data.yml @@ -1,5 +1,5 @@ sensors_checks: - Force10-S6100: + x86_64-dell_s6100_c2538-r0: alarms: fan: - SMF_S6100_ON-isa-0000/Tray1 Fan1/fan1_alarm @@ -144,7 +144,7 @@ sensors_checks: psu_skips: {} - Force10-Z9100-C32: + x86_64-dell_z9100_c2538-r0: alarms: fan: - SMF_Z9100_ON-isa-0000/Tray1 Fan1/fan1_alarm @@ -265,135 +265,7 @@ sensors_checks: psu_skips: {} - Force10-Z9100-C8D48: - alarms: - fan: - - SMF_Z9100_ON-isa-0000/Tray1 Fan1/fan1_alarm - - SMF_Z9100_ON-isa-0000/Tray1 Fan1/fan1_fault - - SMF_Z9100_ON-isa-0000/Tray1 Fan2/fan2_alarm - - SMF_Z9100_ON-isa-0000/Tray1 Fan2/fan2_fault - - SMF_Z9100_ON-isa-0000/Tray2 Fan1/fan3_alarm - - SMF_Z9100_ON-isa-0000/Tray2 Fan1/fan3_fault - - SMF_Z9100_ON-isa-0000/Tray2 Fan2/fan4_alarm - - SMF_Z9100_ON-isa-0000/Tray2 Fan2/fan4_fault - - SMF_Z9100_ON-isa-0000/Tray3 Fan1/fan5_alarm - - SMF_Z9100_ON-isa-0000/Tray3 Fan1/fan5_fault - - SMF_Z9100_ON-isa-0000/Tray3 Fan2/fan6_alarm - - SMF_Z9100_ON-isa-0000/Tray3 Fan2/fan6_fault - - SMF_Z9100_ON-isa-0000/Tray4 Fan1/fan7_alarm - - SMF_Z9100_ON-isa-0000/Tray4 Fan1/fan7_fault - - SMF_Z9100_ON-isa-0000/Tray4 Fan2/fan8_alarm - - SMF_Z9100_ON-isa-0000/Tray4 Fan2/fan8_fault - - SMF_Z9100_ON-isa-0000/Tray5 Fan1/fan9_alarm - - SMF_Z9100_ON-isa-0000/Tray5 Fan1/fan9_fault - - SMF_Z9100_ON-isa-0000/Tray5 Fan2/fan10_alarm - - SMF_Z9100_ON-isa-0000/Tray5 Fan2/fan10_fault - - SMF_Z9100_ON-isa-0000/Psu1 Fan/fan11_alarm - - SMF_Z9100_ON-isa-0000/Psu1 Fan/fan11_fault - - SMF_Z9100_ON-isa-0000/Psu2 Fan/fan12_alarm - - SMF_Z9100_ON-isa-0000/Psu2 Fan/fan12_fault - temp: - - coretemp-isa-0000/Core 0/temp2_crit_alarm - - coretemp-isa-0000/Core 1/temp3_crit_alarm - - coretemp-isa-0000/Core 2/temp4_crit_alarm - - coretemp-isa-0000/Core 3/temp5_crit_alarm - power: - - SMF_Z9100_ON-isa-0000/CPU XP3R3V_EARLY/in1_alarm - - SMF_Z9100_ON-isa-0000/CPU XP5R0V_CP/in2_alarm - - SMF_Z9100_ON-isa-0000/CPU XP3R3V_STD/in3_alarm - - SMF_Z9100_ON-isa-0000/CPU XP3R3V_CP /in4_alarm - - SMF_Z9100_ON-isa-0000/CPU XP3R3V_STD/in3_alarm - - SMF_Z9100_ON-isa-0000/CPU XP3R3V_CP /in4_alarm - - SMF_Z9100_ON-isa-0000/CPU XP0R75V_VTT_A/in5_alarm - - SMF_Z9100_ON-isa-0000/CPU XP0R75V_VTT_B/in6_alarm - - SMF_Z9100_ON-isa-0000/CPU XP1R07V_CPU/in7_alarm - - SMF_Z9100_ON-isa-0000/CPU XP1R0V_CPU/in8_alarm - - SMF_Z9100_ON-isa-0000/CPU XP12R0V/in9_alarm - - SMF_Z9100_ON-isa-0000/CPU VDDR_CPU_2/in10_alarm - - SMF_Z9100_ON-isa-0000/CPU VDDR_CPU_1/in11_alarm - - SMF_Z9100_ON-isa-0000/CPU XP1R5V_CLK/in12_alarm - - SMF_Z9100_ON-isa-0000/CPU XP1R35V_CPU/in13_alarm - - SMF_Z9100_ON-isa-0000/CPU XP1R8V_CPU/in14_alarm - - SMF_Z9100_ON-isa-0000/CPU XP1R0V_CPU_VNN/in15_alarm - - SMF_Z9100_ON-isa-0000/CPU XP1R0V_CPU_VCC/in16_alarm - - SMF_Z9100_ON-isa-0000/CPU XP1R5V_EARLY/in17_alarm - - SMF_Z9100_ON-isa-0000/SW XP3R3V_MON/in19_alarm - - SMF_Z9100_ON-isa-0000/SW XP1R8V_MON/in20_alarm - - SMF_Z9100_ON-isa-0000/SW XP1R25V_MON/in21_alarm - - SMF_Z9100_ON-isa-0000/SW XP1R2V_MON/in22_alarm - - SMF_Z9100_ON-isa-0000/SW XP1R0V_SW_MON/in23_alarm - - SMF_Z9100_ON-isa-0000/SW XP1R0V_ROV_SW_MON/in24_alarm - - SMF_Z9100_ON-isa-0000/SW XP5V_MB_MON/in25_alarm - - SMF_Z9100_ON-isa-0000/SW XP1R8V_FPGA_MON/in26_alarm - - SMF_Z9100_ON-isa-0000/SW XP3R3V_FPGA_MON/in27_alarm - - SMF_Z9100_ON-isa-0000/SW XP3R3V_EARLY_MON/in28_alarm - - compares: - temp: - - - coretemp-isa-0000/Core 0/temp2_input - - coretemp-isa-0000/Core 0/temp2_crit - - - coretemp-isa-0000/Core 1/temp3_input - - coretemp-isa-0000/Core 1/temp3_crit - - - coretemp-isa-0000/Core 2/temp4_input - - coretemp-isa-0000/Core 2/temp4_crit - - - coretemp-isa-0000/Core 3/temp5_input - - coretemp-isa-0000/Core 3/temp5_crit - power: - - - SMF_Z9100_ON-isa-0000/PSU1 Input Power/power1_input - - SMF_Z9100_ON-isa-0000/PSU1 Input Power/power1_max - - - SMF_Z9100_ON-isa-0000/PSU1 Output Power/power2_input - - SMF_Z9100_ON-isa-0000/PSU1 Output Power/power2_max - - - SMF_Z9100_ON-isa-0000/PSU2 Input Power/power3_input - - SMF_Z9100_ON-isa-0000/PSU2 Input Power/power3_max - - - SMF_Z9100_ON-isa-0000/PSU2 Output Power/power4_input - - SMF_Z9100_ON-isa-0000/PSU2 Output Power/power4_max - fan: [] - non_zero: - fan: - - SMF_Z9100_ON-isa-0000/Tray1 Fan1/fan1_input - - SMF_Z9100_ON-isa-0000/Tray1 Fan2/fan2_input - - SMF_Z9100_ON-isa-0000/Tray2 Fan1/fan3_input - - SMF_Z9100_ON-isa-0000/Tray2 Fan2/fan4_input - - SMF_Z9100_ON-isa-0000/Tray3 Fan1/fan5_input - - SMF_Z9100_ON-isa-0000/Tray3 Fan2/fan6_input - - SMF_Z9100_ON-isa-0000/Tray4 Fan1/fan7_input - - SMF_Z9100_ON-isa-0000/Tray4 Fan2/fan8_input - - SMF_Z9100_ON-isa-0000/Tray5 Fan1/fan9_input - - SMF_Z9100_ON-isa-0000/Tray5 Fan2/fan10_input - - SMF_Z9100_ON-isa-0000/Psu1 Fan/fan11_input - - SMF_Z9100_ON-isa-0000/Psu2 Fan/fan12_input - power: - - SMF_Z9100_ON-isa-0000/PSU1 Input Power/power1_input - - SMF_Z9100_ON-isa-0000/PSU1 Output Power/power2_input - - SMF_Z9100_ON-isa-0000/PSU2 Input Power/power3_input - - SMF_Z9100_ON-isa-0000/PSU1 Output Power/power2_input - - SMF_Z9100_ON-isa-0000/PSU2 Input Power/power3_input - - SMF_Z9100_ON-isa-0000/PSU2 Output Power/power4_input - - SMF_Z9100_ON-isa-0000/PSU1 VIN/in29_input - - SMF_Z9100_ON-isa-0000/PSU1 VOUT/in30_input - - SMF_Z9100_ON-isa-0000/PSU2 VIN/in31_input - - SMF_Z9100_ON-isa-0000/PSU2 VOUT/in32_input - - SMF_Z9100_ON-isa-0000/XP1R0V/curr21_input - - SMF_Z9100_ON-isa-0000/XP1R0V_ROV/curr22_input - - temp: - - coretemp-isa-0000/Core 0/temp2_input - - coretemp-isa-0000/Core 1/temp3_input - - coretemp-isa-0000/Core 2/temp4_input - - coretemp-isa-0000/Core 3/temp5_input - - SMF_Z9100_ON-isa-0000/CPU On-board (U2900)/temp1_input - - "SMF_Z9100_ON-isa-0000/BCM Switch On-Board #1 (U44)/temp2_input" - - SMF_Z9100_ON-isa-0000/Front BCM On-Board (U4)/temp3_input - - SMF_Z9100_ON-isa-0000/Front BCM On-Board (U2)/temp4_input - - "SMF_Z9100_ON-isa-0000/BCM Switch On-Board #1 (U38)/temp6_input" - - SMF_Z9100_ON-isa-0000/Rear (U2900)/temp9_input - - SMF_Z9100_ON-isa-0000/PSU1 Temp/temp14_input - - SMF_Z9100_ON-isa-0000/PSU2 Temp/temp15_input - - psu_skips: {} - - - Force10-S6000: + x86_64-dell_s6000_s1220-r0: alarms: fan: - dni_dps460-i2c-1-58/fan1/fan1_alarm @@ -468,7 +340,7 @@ sensors_checks: - dni_dps460-i2c-1-59 - ltc4215-i2c-11-42 - Mellanox-SN2700: + x86_64-mlnx_msn2700-r0: alarms: fan: - dps460-i2c-10-59/fan1/fan1_alarm @@ -587,7 +459,7 @@ sensors_checks: skip_list: - dps460-i2c-10-59 - ACS-MSN2740: + x86_64-mlnx_msn2740-r0: alarms: fan: - dps460-i2c-4-58/fan1/fan1_alarm @@ -716,7 +588,7 @@ sensors_checks: skip_list: - dps460-i2c-4-59 - ACS-MSN2410: + x86_64-mlnx_msn2410-r0: alarms: fan: - dps460-i2c-10-59/fan1/fan1_alarm @@ -835,7 +707,7 @@ sensors_checks: skip_list: - dps460-i2c-10-59 - ACS-MSN2100: + x86_64-mlnx_msn2100-r0: alarms: fan: [] power: @@ -905,7 +777,8 @@ sensors_checks: - pmbus-i2c-5-41/pout2/power3_input temp: [] psu_skips: {} - ACS-MSN2010: + + x86_64-mlnx_msn2010-r0: alarms: fan: [] power: @@ -956,7 +829,214 @@ sensors_checks: - tps53679-i2c-5-71/pout1/power1_input temp: [] psu_skips: {} - Arista-7050-QX32: + + x86_64-mlnx_msn3700-r0: + alarms: + fan: + - dps460-i2c-4-58/PSU-1 Fan 1/fan1_alarm + - dps460-i2c-4-58/PSU-1 Fan 1/fan1_fault + + - dps460-i2c-4-59/PSU-2 Fan 1/fan1_alarm + - dps460-i2c-4-59/PSU-2 Fan 1/fan1_fault + + - mlxreg_fan-isa-0000/Chassis Fan Drawer-1 Tach 1/fan1_fault + - mlxreg_fan-isa-0000/Chassis Fan Drawer-1 Tach 2/fan2_fault + - mlxreg_fan-isa-0000/Chassis Fan Drawer-2 Tach 1/fan3_fault + - mlxreg_fan-isa-0000/Chassis Fan Drawer-2 Tach 2/fan4_fault + - mlxreg_fan-isa-0000/Chassis Fan Drawer-3 Tach 1/fan5_fault + - mlxreg_fan-isa-0000/Chassis Fan Drawer-3 Tach 2/fan6_fault + - mlxreg_fan-isa-0000/Chassis Fan Drawer-4 Tach 1/fan7_fault + - mlxreg_fan-isa-0000/Chassis Fan Drawer-4 Tach 2/fan8_fault + - mlxreg_fan-isa-0000/Chassis Fan Drawer-5 Tach 1/fan9_fault + - mlxreg_fan-isa-0000/Chassis Fan Drawer-5 Tach 2/fan10_fault + - mlxreg_fan-isa-0000/Chassis Fan Drawer-6 Tach 1/fan11_fault + - mlxreg_fan-isa-0000/Chassis Fan Drawer-6 Tach 2/fan12_fault + power: + - tps53679-i2c-5-70/PMIC-1 ASIC 0.8V VCORE Rail Curr (out)/curr1_crit_alarm + - tps53679-i2c-5-70/PMIC-1 ASIC 0.8V VCORE Rail Curr (out)/curr1_max_alarm + - tps53679-i2c-5-70/PMIC-1 ASIC 1.2V Rail Curr (out)/curr2_crit_alarm + - tps53679-i2c-5-70/PMIC-1 ASIC 1.2V Rail Curr (out)/curr2_max_alarm + - tps53679-i2c-5-70/PMIC-1 PSU 12V Rail (in)/in1_alarm + - tps53679-i2c-5-70/PMIC-1 ASIC 0.8V VCORE Rail (out)/in2_crit_alarm + - tps53679-i2c-5-70/PMIC-1 ASIC 0.8V VCORE Rail (out)/in2_lcrit_alarm + - tps53679-i2c-5-70/PMIC-1 ASIC 1.2V Rail (out)/in3_crit_alarm + - tps53679-i2c-5-70/PMIC-1 ASIC 1.2V Rail (out)/in3_lcrit_alarm + + - tps53679-i2c-5-71/PMIC-2 ASIC 3.3V Rail Curr (out)/curr1_crit_alarm + - tps53679-i2c-5-71/PMIC-2 ASIC 3.3V Rail Curr (out)/curr1_max_alarm + - tps53679-i2c-5-71/PMIC-2 ASIC 1.8V Rail Curr (out)/curr2_crit_alarm + - tps53679-i2c-5-71/PMIC-2 ASIC 1.8V Rail Curr (out)/curr2_max_alarm + - tps53679-i2c-5-71/PMIC-2 PSU 12V Rail (in)/in1_alarm + - tps53679-i2c-5-71/PMIC-2 ASIC 3.3V Rail (out)/in2_crit_alarm + - tps53679-i2c-5-71/PMIC-2 ASIC 3.3V Rail (out)/in2_lcrit_alarm + - tps53679-i2c-5-71/PMIC-2 ASIC 1.8V Rail (out)/in3_crit_alarm + - tps53679-i2c-5-71/PMIC-2 ASIC 1.8V Rail (out)/in3_lcrit_alarm + + - tps53679-i2c-15-58/PMIC-3 COMEX 1.8V Rail Curr (out)/curr1_crit_alarm + - tps53679-i2c-15-58/PMIC-3 COMEX 1.8V Rail Curr (out)/curr1_max_alarm + - tps53679-i2c-15-58/PMIC-3 COMEX 1.05V Rail Curr (out)/curr2_crit_alarm + - tps53679-i2c-15-58/PMIC-3 COMEX 1.05V Rail Curr (out)/curr2_max_alarm + - tps53679-i2c-15-58/PMIC-3 PSU 12V Rail (in)/in1_alarm + - tps53679-i2c-15-58/PMIC-3 COMEX 1.8V Rail (out)/in2_crit_alarm + - tps53679-i2c-15-58/PMIC-3 COMEX 1.8V Rail (out)/in2_lcrit_alarm + - tps53679-i2c-15-58/PMIC-3 COMEX 1.05V Rail (out)/in3_crit_alarm + - tps53679-i2c-15-58/PMIC-3 COMEX 1.05V Rail (out)/in3_lcrit_alarm + + - tps53679-i2c-15-61/PMIC-4 COMEX 1.2V Rail Curr (out)/curr1_crit_alarm + - tps53679-i2c-15-61/PMIC-4 COMEX 1.2V Rail Curr (out)/curr1_max_alarm + - tps53679-i2c-15-61/PMIC-4 PSU 12V Rail (in)/in1_alarm + - tps53679-i2c-15-61/PMIC-4 COMEX 1.2V Rail (out)/in2_crit_alarm + - tps53679-i2c-15-61/PMIC-4 COMEX 1.2V Rail (out)/in2_lcrit_alarm + + - dps460-i2c-4-58/PSU-1 220V Rail Curr (in)/curr1_crit_alarm + - dps460-i2c-4-58/PSU-1 220V Rail Curr (in)/curr1_max_alarm + - dps460-i2c-4-58/PSU-1 12V Rail Curr (out)/curr2_crit_alarm + - dps460-i2c-4-58/PSU-1 12V Rail Curr (out)/curr2_lcrit_alarm + - dps460-i2c-4-58/PSU-1 12V Rail Curr (out)/curr2_max_alarm + - dps460-i2c-4-58/PSU-1 220V Rail Pwr (in)/power1_alarm + - dps460-i2c-4-58/PSU-1 12V Rail Pwr (out)/power2_cap_alarm + - dps460-i2c-4-58/PSU-1 12V Rail Pwr (out)/power2_crit_alarm + - dps460-i2c-4-58/PSU-1 12V Rail Pwr (out)/power2_max_alarm + - dps460-i2c-4-58/PSU-1 220V Rail (in)/in1_crit_alarm + - dps460-i2c-4-58/PSU-1 220V Rail (in)/in1_lcrit_alarm + - dps460-i2c-4-58/PSU-1 220V Rail (in)/in1_max_alarm + - dps460-i2c-4-58/PSU-1 220V Rail (in)/in1_min_alarm + - dps460-i2c-4-58/PSU-1 12V Rail (out)/in3_crit_alarm + - dps460-i2c-4-58/PSU-1 12V Rail (out)/in3_lcrit_alarm + - dps460-i2c-4-58/PSU-1 12V Rail (out)/in3_max_alarm + - dps460-i2c-4-58/PSU-1 12V Rail (out)/in3_min_alarm + + - dps460-i2c-4-59/PSU-2 220V Rail Curr (in)/curr1_crit_alarm + - dps460-i2c-4-59/PSU-2 220V Rail Curr (in)/curr1_max_alarm + - dps460-i2c-4-59/PSU-2 12V Rail Curr (out)/curr2_crit_alarm + - dps460-i2c-4-59/PSU-2 12V Rail Curr (out)/curr2_lcrit_alarm + - dps460-i2c-4-59/PSU-2 12V Rail Curr (out)/curr2_max_alarm + - dps460-i2c-4-59/PSU-2 220V Rail Pwr (in)/power1_alarm + - dps460-i2c-4-59/PSU-2 12V Rail Pwr (out)/power2_cap_alarm + - dps460-i2c-4-59/PSU-2 12V Rail Pwr (out)/power2_crit_alarm + - dps460-i2c-4-59/PSU-2 12V Rail Pwr (out)/power2_max_alarm + - dps460-i2c-4-59/PSU-2 220V Rail (in)/in1_crit_alarm + - dps460-i2c-4-59/PSU-2 220V Rail (in)/in1_lcrit_alarm + - dps460-i2c-4-59/PSU-2 220V Rail (in)/in1_max_alarm + - dps460-i2c-4-59/PSU-2 220V Rail (in)/in1_min_alarm + - dps460-i2c-4-59/PSU-2 12V Rail (out)/in3_crit_alarm + - dps460-i2c-4-59/PSU-2 12V Rail (out)/in3_lcrit_alarm + - dps460-i2c-4-59/PSU-2 12V Rail (out)/in3_max_alarm + - dps460-i2c-4-59/PSU-2 12V Rail (out)/in3_min_alarm + temp: + - coretemp-isa-0000/Physical id 0/temp1_crit_alarm + - coretemp-isa-0000/Core 0/temp2_crit_alarm + - coretemp-isa-0000/Core 1/temp3_crit_alarm + - coretemp-isa-0000/Core 2/temp4_crit_alarm + - coretemp-isa-0000/Core 3/temp5_crit_alarm + + - mlxsw-i2c-2-48/front panel 001/temp2_fault + - mlxsw-i2c-2-48/front panel 002/temp3_fault + - mlxsw-i2c-2-48/front panel 003/temp4_fault + - mlxsw-i2c-2-48/front panel 004/temp5_fault + - mlxsw-i2c-2-48/front panel 005/temp6_fault + - mlxsw-i2c-2-48/front panel 006/temp7_fault + - mlxsw-i2c-2-48/front panel 007/temp8_fault + - mlxsw-i2c-2-48/front panel 008/temp9_fault + - mlxsw-i2c-2-48/front panel 009/temp10_fault + - mlxsw-i2c-2-48/front panel 010/temp11_fault + - mlxsw-i2c-2-48/front panel 011/temp12_fault + - mlxsw-i2c-2-48/front panel 012/temp13_fault + - mlxsw-i2c-2-48/front panel 013/temp14_fault + - mlxsw-i2c-2-48/front panel 014/temp15_fault + - mlxsw-i2c-2-48/front panel 015/temp16_fault + - mlxsw-i2c-2-48/front panel 016/temp17_fault + - mlxsw-i2c-2-48/front panel 017/temp18_fault + - mlxsw-i2c-2-48/front panel 018/temp19_fault + - mlxsw-i2c-2-48/front panel 019/temp20_fault + - mlxsw-i2c-2-48/front panel 020/temp21_fault + - mlxsw-i2c-2-48/front panel 021/temp22_fault + - mlxsw-i2c-2-48/front panel 022/temp23_fault + - mlxsw-i2c-2-48/front panel 023/temp24_fault + - mlxsw-i2c-2-48/front panel 024/temp25_fault + - mlxsw-i2c-2-48/front panel 025/temp26_fault + - mlxsw-i2c-2-48/front panel 026/temp27_fault + - mlxsw-i2c-2-48/front panel 027/temp28_fault + - mlxsw-i2c-2-48/front panel 028/temp29_fault + - mlxsw-i2c-2-48/front panel 029/temp30_fault + - mlxsw-i2c-2-48/front panel 030/temp31_fault + - mlxsw-i2c-2-48/front panel 031/temp32_fault + - mlxsw-i2c-2-48/front panel 032/temp33_fault + + - tps53679-i2c-5-70/PMIC-1 Temp 1/temp1_crit_alarm + - tps53679-i2c-5-70/PMIC-1 Temp 1/temp1_max_alarm + - tps53679-i2c-5-70/PMIC-1 Temp 2/temp2_crit_alarm + - tps53679-i2c-5-70/PMIC-1 Temp 2/temp2_max_alarm + + - tps53679-i2c-5-71/PMIC-2 Temp 1/temp1_crit_alarm + - tps53679-i2c-5-71/PMIC-2 Temp 1/temp1_max_alarm + - tps53679-i2c-5-71/PMIC-2 Temp 2/temp2_crit_alarm + - tps53679-i2c-5-71/PMIC-2 Temp 2/temp2_max_alarm + + - tps53679-i2c-15-58/PMIC-3 Temp 1/temp1_crit_alarm + - tps53679-i2c-15-58/PMIC-3 Temp 1/temp1_max_alarm + - tps53679-i2c-15-58/PMIC-3 Temp 2/temp2_crit_alarm + - tps53679-i2c-15-58/PMIC-3 Temp 2/temp2_max_alarm + + - tps53679-i2c-15-61/PMIC-4 Temp 1/temp1_crit_alarm + - tps53679-i2c-15-61/PMIC-4 Temp 1/temp1_max_alarm + - tps53679-i2c-15-61/PMIC-4 Temp 2/temp2_crit_alarm + - tps53679-i2c-15-61/PMIC-4 Temp 2/temp2_max_alarm + + - dps460-i2c-4-58/PSU-1 Temp 1/temp1_crit_alarm + - dps460-i2c-4-58/PSU-1 Temp 1/temp1_lcrit_alarm + - dps460-i2c-4-58/PSU-1 Temp 1/temp1_max_alarm + - dps460-i2c-4-58/PSU-1 Temp 1/temp1_min_alarm + - dps460-i2c-4-58/PSU-1 Temp 2/temp2_crit_alarm + - dps460-i2c-4-58/PSU-1 Temp 2/temp2_lcrit_alarm + - dps460-i2c-4-58/PSU-1 Temp 2/temp2_max_alarm + - dps460-i2c-4-58/PSU-1 Temp 2/temp2_min_alarm + - dps460-i2c-4-58/PSU-1 Temp 3/temp3_crit_alarm + - dps460-i2c-4-58/PSU-1 Temp 3/temp3_lcrit_alarm + - dps460-i2c-4-58/PSU-1 Temp 3/temp3_max_alarm + - dps460-i2c-4-58/PSU-1 Temp 3/temp3_min_alarm + + - dps460-i2c-4-59/PSU-2 Temp 1/temp1_crit_alarm + - dps460-i2c-4-59/PSU-2 Temp 1/temp1_lcrit_alarm + - dps460-i2c-4-59/PSU-2 Temp 1/temp1_max_alarm + - dps460-i2c-4-59/PSU-2 Temp 1/temp1_min_alarm + - dps460-i2c-4-59/PSU-2 Temp 2/temp2_crit_alarm + - dps460-i2c-4-59/PSU-2 Temp 2/temp2_lcrit_alarm + - dps460-i2c-4-59/PSU-2 Temp 2/temp2_max_alarm + - dps460-i2c-4-59/PSU-2 Temp 2/temp2_min_alarm + - dps460-i2c-4-59/PSU-2 Temp 3/temp3_crit_alarm + - dps460-i2c-4-59/PSU-2 Temp 3/temp3_lcrit_alarm + - dps460-i2c-4-59/PSU-2 Temp 3/temp3_max_alarm + - dps460-i2c-4-59/PSU-2 Temp 3/temp3_min_alarm + compares: + power: [] + temp: + - - coretemp-isa-0000/Physical id 0/temp1_input + - coretemp-isa-0000/Physical id 0/temp1_max + - - coretemp-isa-0000/Core 0/temp2_input + - coretemp-isa-0000/Core 0/temp2_max + - - coretemp-isa-0000/Core 1/temp3_input + - coretemp-isa-0000/Core 1/temp3_max + - - coretemp-isa-0000/Core 2/temp4_input + - coretemp-isa-0000/Core 2/temp4_max + - - coretemp-isa-0000/Core 3/temp5_input + - coretemp-isa-0000/Core 3/temp5_max + + - - tmp102-i2c-7-49/Ambient Fan Side Temp (air intake)/temp1_input + - tmp102-i2c-7-49/Ambient Fan Side Temp (air intake)/temp1_max + + - - tmp102-i2c-7-4a/Ambient Port Side Temp (air exhaust)/temp1_input + - tmp102-i2c-7-4a/Ambient Port Side Temp (air exhaust)/temp1_max + + - - tmp102-i2c-15-49/Ambient COMEX Temp/temp1_input + - tmp102-i2c-15-49/Ambient COMEX Temp/temp1_max + non_zero: + fan: [] + power: [] + temp: [] + psu_skips: {} + + x86_64-arista_7050_qx32: alarms: fan: [] power: @@ -1009,7 +1089,7 @@ sensors_checks: psu_skips: {} - Arista-7260CX3-D108C8: + x86_64-arista_7260cx3_64: alarms: fan: - pmbus-i2c-3-58/fan1/fan1_alarm @@ -1098,7 +1178,7 @@ sensors_checks: psu_skips: {} - INGRASYS-S9100-C32: + x86_64-ingrasys_s9100-r0: alarms: fan: - w83795adg-i2c-0-2f/FANTRAY 1-A/fan1_alarm @@ -1148,7 +1228,7 @@ sensors_checks: temp: [] psu_skips: {} - INGRASYS-S8900-54XC: + x86_64-ingrasys_s8900_54xc-r0: alarms: fan: - w83795adg-i2c-0-2f/FANTRAY 1-A/fan1_alarm @@ -1198,7 +1278,7 @@ sensors_checks: temp: [] psu_skips: {} - INGRASYS-S8900-64XC: + x86_64-ingrasys_s8900_64xc-r0: alarms: fan: - w83795adg-i2c-0-2f/FANTRAY 1-A/fan1_alarm @@ -1256,7 +1336,7 @@ sensors_checks: temp: [] psu_skips: {} - INGRASYS-S8810-32Q: + x86_64-ingrasys_s8810_32q-r0: alarms: fan: - w83795adg-i2c-0-2f/FANTRAY 1-A/fan1_alarm @@ -1309,7 +1389,7 @@ sensors_checks: temp: [] psu_skips: {} - INGRASYS-S9130-32X: + x86_64-ingrasys_s9130_32x-r0: alarms: fan: - w83795adg-i2c-8-2f/FANTRAY 1-A/fan1_alarm @@ -1360,7 +1440,7 @@ sensors_checks: temp: [] psu_skips: {} - Arista-7060CX-32S-C32: + x86_64-arista_7060_cx32s: alarms: fan: - pmbus-i2c-5-58/fan1/fan1_alarm @@ -1429,7 +1509,7 @@ sensors_checks: psu_skips: {} - Accton-AS7712-32X: + x86_64-accton_as7712_32x-r0: alarms: fan: - as7712_32x_fan-i2c-2-66/fan1/fan1_fault @@ -1502,7 +1582,7 @@ sensors_checks: skip_list: - ym2651-i2c-11-5b - Seastone-DX010: + x86_64-cel_seastone-r0: alarms: fan: - dps460-i2c-10-5a/fan1/fan1_alarm @@ -1681,7 +1761,7 @@ sensors_checks: - lm75b-i2c-7-4a/ASIC temp sensor/temp1_input psu_skips: {} - Arista-7170-64C: + x86_64-arista_7170_64c: alarms: fan: - dps1900-i2c-6-58/fan1/fan1_alarm @@ -1761,103 +1841,7 @@ sensors_checks: psu_skips: {} - Arista-7260CX3-Q64: - alarms: - fan: - - pmbus-i2c-3-58/fan1/fan1_alarm - - pmbus-i2c-4-58/fan1/fan1_alarm - - pmbus-i2c-3-58/fan1/fan1_fault - - pmbus-i2c-4-58/fan1/fan1_fault - - la_cpld-i2c-85-60/fan1/fan1_fault - - la_cpld-i2c-85-60/fan2/fan2_fault - - la_cpld-i2c-85-60/fan3/fan3_fault - - la_cpld-i2c-85-60/fan4/fan4_fault - power: - - pmbus-i2c-3-58/iin/curr1_max_alarm - - pmbus-i2c-3-58/iout1/curr2_max_alarm - - pmbus-i2c-3-58/iout1/curr2_crit_alarm - - pmbus-i2c-3-58/iout2/curr3_crit_alarm - - pmbus-i2c-3-58/vin/in1_alarm - - pmbus-i2c-3-58/vout1/in2_lcrit_alarm - - pmbus-i2c-3-58/vout1/in2_crit_alarm - - pmbus-i2c-4-58/iin/curr1_max_alarm - - pmbus-i2c-4-58/iout1/curr2_max_alarm - - pmbus-i2c-4-58/iout1/curr2_crit_alarm - - pmbus-i2c-4-58/iout2/curr3_crit_alarm - - pmbus-i2c-4-58/vin/in1_alarm - - pmbus-i2c-4-58/vout1/in2_lcrit_alarm - - pmbus-i2c-4-58/vout1/in2_crit_alarm - temp: - - coretemp-isa-0000/Physical id 0/temp1_crit_alarm - - coretemp-isa-0000/Core 0/temp2_crit_alarm - - coretemp-isa-0000/Core 1/temp3_crit_alarm - - lm73-i2c-88-48/Front panel temp sensor/temp1_min_alarm - - lm73-i2c-88-48/Front panel temp sensor/temp1_max_alarm - - max6658-i2c-1-4c/Asic temp sensor/temp1_min_alarm - - max6658-i2c-1-4c/Asic temp sensor/temp1_max_alarm - - max6658-i2c-1-4c/Asic temp sensor/temp1_crit_alarm - - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_min_alarm - - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_max_alarm - - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_crit_alarm - - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_min_alarm - - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_max_alarm - - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_crit_alarm - - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_fault - - pmbus-i2c-3-58/Power supply 1 exhaust temp sensor/temp3_alarm - - pmbus-i2c-3-58/Power supply 1 inlet temp sensor/temp2_alarm - - pmbus-i2c-3-58/Power supply 1 hotspot sensor/temp1_alarm - - pmbus-i2c-4-58/Power supply 2 exhaust temp sensor/temp3_alarm - - pmbus-i2c-4-58/Power supply 2 inlet temp sensor/temp2_alarm - - pmbus-i2c-4-58/Power supply 2 hotspot sensor/temp1_alarm - - compares: - fan: [] - power: - - - pmbus-i2c-3-58/iin/curr1_input - - pmbus-i2c-3-58/iin/curr1_max - - - pmbus-i2c-3-58/iout1/curr2_input - - pmbus-i2c-3-58/iout1/curr2_max - - - pmbus-i2c-4-58/iin/curr1_input - - pmbus-i2c-4-58/iin/curr1_max - - - pmbus-i2c-4-58/iout1/curr2_input - - pmbus-i2c-4-58/iout1/curr2_max - temp: - - - coretemp-isa-0000/Physical id 0/temp1_input - - coretemp-isa-0000/Physical id 0/temp1_max - - - coretemp-isa-0000/Core 0/temp2_input - - coretemp-isa-0000/Core 0/temp2_max - - - coretemp-isa-0000/Core 1/temp3_input - - coretemp-isa-0000/Core 1/temp3_max - - - lm73-i2c-88-48/Front panel temp sensor/temp1_input - - lm73-i2c-88-48/Front panel temp sensor/temp1_max - - - max6658-i2c-1-4c/Asic temp sensor/temp1_input - - max6658-i2c-1-4c/Asic temp sensor/temp1_max - - - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_input - - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_max - - - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_input - - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_max - - non_zero: - fan: - - pmbus-i2c-3-58/fan1/fan1_input - - pmbus-i2c-4-58/fan1/fan1_input - - la_cpld-i2c-85-60/fan1/fan1_input - - la_cpld-i2c-85-60/fan2/fan2_input - - la_cpld-i2c-85-60/fan3/fan3_input - - la_cpld-i2c-85-60/fan4/fan4_input - power: - - pmbus-i2c-4-58/pin/power1_input - - pmbus-i2c-4-58/pout1/power2_input - - pmbus-i2c-4-58/pout2/power3_input - - pmbus-i2c-3-58/pin/power1_input - - pmbus-i2c-3-58/pout1/power2_input - - pmbus-i2c-3-58/pout2/power3_input - temp: - - pch_haswell-virtual-0/temp1/temp1_input - - psu_skips: {} - - Celestica-E1031-T48S4: + x86_64-cel_e1031-r0: alarms: fan: [] power: [] @@ -1882,7 +1866,7 @@ sensors_checks: temp: [] psu_skips: {} - Arista-7050-QX-32S: + x86_64-arista_7050_qx32s: alarms: fan: - pmbus-i2c-5-58/fan1/fan1_alarm diff --git a/ansible/roles/sonic-common/tasks/sensors_check.yml b/ansible/roles/sonic-common/tasks/sensors_check.yml index 8e99b77548c..1a915d9e387 100644 --- a/ansible/roles/sonic-common/tasks/sensors_check.yml +++ b/ansible/roles/sonic-common/tasks/sensors_check.yml @@ -2,15 +2,15 @@ shell: docker ps -a --format '{{'{{'}}.Image{{'}} {{'}}.Names{{'}}'}}' | grep 'platform' | awk '{print $2}' register: pmon_ps +- name: Get platform name + shell: show platform summary | grep Platform | awk '{print $2}' + register: platform + - set_fact: ansible_python_interpreter: "docker exec -i {{ pmon_ps.stdout }} python" -- set_fact: - minigraph_hwsku: "Mellanox-SN2700" - when: minigraph_hwsku == 'ACS-MSN2700' - - name: Gather sensors - sensors_facts: checks={{ sensors_checks[minigraph_hwsku] }} + sensors_facts: checks={{ sensors_checks[platform.stdout] }} vars: ansible_shell_type: docker From 63911fc121aab9f5e166612c193918f465f6e079 Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Wed, 13 Nov 2019 21:26:45 +0200 Subject: [PATCH 147/218] [pfcwd] ignore error log about deprecated API (mellanox) (#1209) --- .../test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages b/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages index 7cff6813972..00a801add90 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages @@ -9,3 +9,4 @@ r, ".* SAI_STATUS_BUFFER_OVERFLOW" r, ".* ERR ntpd.*routing socket reports: No buffer space available.*" r, ".* ERR syncd.*" r, ".* syncd .* ERROR +HOST_INTERFACE" +r, ".* syncd .* ERROR SX_API_ROUTER: uc_route_data_p->next_hop_list_p: This Parameter is deprecated and will be removed in the future.*" \ No newline at end of file From a7f4a4e9af7a638fdf0184738248d575e17e2c2f Mon Sep 17 00:00:00 2001 From: Stephen Sun <5379172+stephenxs@users.noreply.github.com> Date: Tue, 19 Nov 2019 18:00:54 +0800 Subject: [PATCH 148/218] [test_sfp.py]Ignore the error logs for known issue in SFP reset & LPM test (#1205) * [test_sfp.py]Fix the issue that error logs of reading eeprom found during test_sfp running by ignore the log The log is expected. * only ignore the error message on Mellanox platform --- tests/platform/test_sfp.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tests/platform/test_sfp.py b/tests/platform/test_sfp.py index 7de20af0042..d6b057daff7 100644 --- a/tests/platform/test_sfp.py +++ b/tests/platform/test_sfp.py @@ -13,7 +13,11 @@ import pytest from platform_fixtures import conn_graph_facts +from loganalyzer import LogAnalyzer +pytestmark = [ + pytest.mark.disable_loganalyzer # disable automatic loganalyzer +] def parse_output(output_lines): """ @@ -56,9 +60,15 @@ def test_check_sfp_status_and_configure_sfp(testbed_devices, conn_graph_facts): * show interface transceiver eeprom * sfputil reset """ - ans_host = testbed_devices["dut"] + if ans_host.facts["asic_type"] in ["mellanox"]: + loganalyzer = LogAnalyzer(ansible_host=ans_host, marker_prefix='sfp_cfg') + loganalyzer.load_common_config() + + loganalyzer.ignore_regex.append("kernel.*Eeprom query failed*") + marker = loganalyzer.init() + cmd_sfp_presence = "sudo sfputil show presence" cmd_sfp_eeprom = "sudo sfputil show eeprom" cmd_sfp_reset = "sudo sfputil reset" @@ -114,6 +124,9 @@ def test_check_sfp_status_and_configure_sfp(testbed_devices, conn_graph_facts): assert len(intf_facts["ansible_interface_link_down_ports"]) == 0, \ "Some interfaces are down: %s" % str(intf_facts["ansible_interface_link_down_ports"]) + if ans_host.facts["asic_type"] in ["mellanox"]: + loganalyzer.analyze(marker) + def test_check_sfp_low_power_mode(testbed_devices, conn_graph_facts): """ @@ -126,6 +139,13 @@ def test_check_sfp_low_power_mode(testbed_devices, conn_graph_facts): """ ans_host = testbed_devices["dut"] + if ans_host.facts["asic_type"] in ["mellanox"]: + loganalyzer = LogAnalyzer(ansible_host=ans_host, marker_prefix='sfp_lpm') + loganalyzer.load_common_config() + + loganalyzer.ignore_regex.append("Eeprom query failed") + marker = loganalyzer.init() + cmd_sfp_presence = "sudo sfputil show presence" cmd_sfp_show_lpmode = "sudo sfputil show lpmode" cmd_sfp_set_lpmode = "sudo sfputil lpmode" @@ -178,3 +198,6 @@ def test_check_sfp_low_power_mode(testbed_devices, conn_graph_facts): intf_facts = ans_host.interface_facts(up_ports=mg_facts["minigraph_ports"])["ansible_facts"] assert len(intf_facts["ansible_interface_link_down_ports"]) == 0, \ "Some interfaces are down: %s" % str(intf_facts["ansible_interface_link_down_ports"]) + + if ans_host.facts["asic_type"] in ["mellanox"]: + loganalyzer.analyze(marker) From cfbeafd89ebc06fff3cbb4920997b3b94c0e6767 Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Sat, 9 Nov 2019 09:14:10 +0200 Subject: [PATCH 149/218] [tests] add sym link to ansible.cfg for pytest-tests (#1172) Need to make pytest-ansible to use ansible.cfg, otherwise if hosts are not in known_hosts we will get the following error: 16:02:49 " def _run(self, *module_args, **complex_args):", 16:02:49 " module_ignore_errors = complex_args.pop('module_ignore_errors', False)", 16:02:49 " module_async = complex_args.pop('module_async', False)", 16:02:49 " ", 16:02:49 " if module_async:", 16:02:49 " def run_module(module_args, complex_args):", 16:02:49 " return self.module(*module_args, **complex_args)[self.hostname]", 16:02:49 " pool = ThreadPool()", 16:02:49 " result = pool.apply_async(run_module, (module_args, complex_args))", 16:02:49 " return pool, result", 16:02:49 " ", 16:02:49 " res = self.module(*module_args, **complex_args)[self.hostname]", 16:02:49 " if res.is_failed and not module_ignore_errors:", 16:02:49 "> raise RunAnsibleModuleFail(\"run module {} failed, errmsg {}\".format(self.module_name, res))", 16:02:49 "E RunAnsibleModuleFail: run module command failed, errmsg {'msg': u\"ERROR! Using a SSH password instead of a key is not possible because Host Key checking is enabled and sshpass does not support this. Please add this host's fingerprint to your known_hosts file to manage this host.\", 'failed': True}", 16:02:49 "", Signed-off-by: Stepan Blyschak --- tests/ansible.cfg | 1 + 1 file changed, 1 insertion(+) create mode 120000 tests/ansible.cfg diff --git a/tests/ansible.cfg b/tests/ansible.cfg new file mode 120000 index 00000000000..0b986ffbd15 --- /dev/null +++ b/tests/ansible.cfg @@ -0,0 +1 @@ +../ansible/ansible.cfg \ No newline at end of file From 9416328ebbb7594dec8bdfdfbba6a560693f2dd5 Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Mon, 11 Nov 2019 11:30:07 -0800 Subject: [PATCH 150/218] Add port map for Q64 hwsku (#1199) Signed-off-by: Wenda Ni --- .../files/brcm/66_interface_to_front_map.ini | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 ansible/roles/test/files/brcm/66_interface_to_front_map.ini diff --git a/ansible/roles/test/files/brcm/66_interface_to_front_map.ini b/ansible/roles/test/files/brcm/66_interface_to_front_map.ini new file mode 100644 index 00000000000..1e3cc385c33 --- /dev/null +++ b/ansible/roles/test/files/brcm/66_interface_to_front_map.ini @@ -0,0 +1,67 @@ +# ptf host interface @ switch front port name +0@Ethernet0 +1@Ethernet4 +2@Ethernet8 +3@Ethernet12 +4@Ethernet16 +5@Ethernet20 +6@Ethernet24 +7@Ethernet28 +8@Ethernet32 +9@Ethernet36 +10@Ethernet40 +11@Ethernet44 +12@Ethernet48 +13@Ethernet52 +14@Ethernet56 +15@Ethernet60 +16@Ethernet64 +17@Ethernet68 +18@Ethernet72 +19@Ethernet76 +20@Ethernet80 +21@Ethernet84 +22@Ethernet88 +23@Ethernet92 +24@Ethernet96 +25@Ethernet100 +26@Ethernet104 +27@Ethernet108 +28@Ethernet112 +29@Ethernet116 +30@Ethernet120 +31@Ethernet124 +32@Ethernet128 +33@Ethernet132 +34@Ethernet136 +35@Ethernet140 +36@Ethernet144 +37@Ethernet148 +38@Ethernet152 +39@Ethernet156 +40@Ethernet160 +41@Ethernet164 +42@Ethernet168 +43@Ethernet172 +44@Ethernet176 +45@Ethernet180 +46@Ethernet184 +47@Ethernet188 +48@Ethernet192 +49@Ethernet196 +50@Ethernet200 +51@Ethernet204 +52@Ethernet208 +53@Ethernet212 +54@Ethernet216 +55@Ethernet220 +56@Ethernet224 +57@Ethernet228 +58@Ethernet232 +59@Ethernet236 +60@Ethernet240 +61@Ethernet244 +62@Ethernet248 +63@Ethernet252 +64@Ethernet256 +65@Ethernet260 From 349004355e4c344f52f62d91036240af5a63b4b4 Mon Sep 17 00:00:00 2001 From: Renuka Manavalan <47282725+renukamanavalan@users.noreply.github.com> Date: Sun, 1 Dec 2019 12:38:38 -0800 Subject: [PATCH 151/218] Merge pull request #1219 from renukamanavalan/clet (#1236) configlet support extended to testbed. --- ansible/config_sonic_basedon_testbed.yml | 11 + ansible/library/test_facts.py | 5 +- ansible/minigraph/switch-t1-64-lag-clet.xml | 2349 +++++++++++++++++ .../test/files/ptftests/IP_decap_test.py | 2 +- ansible/roles/test/files/ptftests/dip_sip.py | 4 +- ansible/roles/test/files/ptftests/fib_test.py | 2 +- ansible/roles/test/files/ptftests/mtu_test.py | 2 +- ansible/roles/test/tasks/bgp_gr_helper.yml | 2 +- .../test/tasks/bgp_gr_helper/get_vm_info.yml | 2 +- .../roles/test/tasks/bgp_multipath_relax.yml | 4 +- ansible/roles/test/tasks/decap.yml | 4 +- ansible/roles/test/tasks/dip_sip.yml | 4 +- .../test/tasks/everflow_testbed/run_test.yml | 2 +- ansible/roles/test/tasks/mtu.yml | 2 +- ansible/roles/test/tasks/shared-fib.yml | 4 +- ansible/roles/test/templates/fib.j2 | 4 +- ansible/roles/test/vars/testcases.yml | 60 +- ansible/testbed-new.yaml | 2 +- .../configlet/t1-64-lag-clet/apply_clet.sh | 6 + .../t1-64-lag-clet/clet-add_20T0.json | 221 ++ .../t1-64-lag-clet/clet-to_clear.json | 9 + ansible/vars/topo_t1-64-lag-clet.yml | 683 +++++ ansible/veos | 2 +- ansible/veos.vtb | 2 +- tests/veos.vtb | 2 +- 25 files changed, 3335 insertions(+), 55 deletions(-) create mode 100644 ansible/minigraph/switch-t1-64-lag-clet.xml create mode 100755 ansible/vars/configlet/t1-64-lag-clet/apply_clet.sh create mode 100644 ansible/vars/configlet/t1-64-lag-clet/clet-add_20T0.json create mode 100644 ansible/vars/configlet/t1-64-lag-clet/clet-to_clear.json create mode 100644 ansible/vars/topo_t1-64-lag-clet.yml diff --git a/ansible/config_sonic_basedon_testbed.yml b/ansible/config_sonic_basedon_testbed.yml index d6dcb245b5b..f07c64aa8f5 100644 --- a/ansible/config_sonic_basedon_testbed.yml +++ b/ansible/config_sonic_basedon_testbed.yml @@ -114,6 +114,12 @@ dest=/etc/sonic/minigraph.xml become: true + - name: Copy corresponding configlet files if apply_configlet=true + copy: src=vars/configlet/{{ topo }}/ + dest=/etc/sonic/ + become: true + when: apply_configlet is defined and apply_configlet|bool == true + - name: disable automatic minigraph update if we are deploying new minigraph into SONiC lineinfile: name: /etc/sonic/updategraph.conf @@ -135,6 +141,11 @@ become: true shell: config bgp startup all + - name: execute configlet application script, which applies configlets in strict order. + become: true + shell: bash -c "/etc/sonic/apply_clet.sh" + when: apply_configlet is defined and apply_configlet|bool == true + - name: execute cli "config save -y" to save current minigraph as startup-config become: true shell: config save -y diff --git a/ansible/library/test_facts.py b/ansible/library/test_facts.py index 8233b13dae6..67ac2b56a2f 100644 --- a/ansible/library/test_facts.py +++ b/ansible/library/test_facts.py @@ -38,7 +38,7 @@ testcases: acl: filename: acl.yml - topologies: [t1, t1-lag, t1-64-lag] + topologies: [t1, t1-lag, t1-64-lag, t1-64-lag-clet] execvar: ptf_host: testbed_type: @@ -49,7 +49,7 @@ ptf_host: bgp_fact: filename: bgp_fact.yml - topologies: [t0, t0-64, t0-64-32, t1, t1-lag, t1-64-lag] + topologies: [t0, t0-64, t0-64-32, t1, t1-lag, t1-64-lag, t1-64-lag-clet] ... To use it: @@ -85,6 +85,7 @@ t1: [acl, bgp_fact, bgp_multipath_relax, decap, everflow_testbed, fib, lldp, pfc_wd] t1-lag: [acl, bgp_fact, bgp_multipath_relax, decap, everflow_testbed, fib, lldp, lag_2, pfc_wd] t1-64-lag: [acl, bgp_fact, bgp_multipath_relax, decap, everflow_testbed, fib, lldp, lag_2, pfc_wd] + t1-64-lag-clet: [acl, bgp_fact, bgp_multipath_relax, decap, everflow_testbed, fib, lldp, lag_2, pfc_wd] } ''' diff --git a/ansible/minigraph/switch-t1-64-lag-clet.xml b/ansible/minigraph/switch-t1-64-lag-clet.xml new file mode 100644 index 00000000000..700558d38c5 --- /dev/null +++ b/ansible/minigraph/switch-t1-64-lag-clet.xml @@ -0,0 +1,2349 @@ + + + + + + ARISTA01T0 + 10.0.0.33 + switch-t1-64-lag-clet + 10.0.0.32 + 1 + 10 + 3 + + + ARISTA01T0 + FC00::42 + switch-t1-64-lag-clet + FC00::41 + 1 + 10 + 3 + + + ARISTA02T0 + 10.0.0.35 + switch-t1-64-lag-clet + 10.0.0.34 + 1 + 10 + 3 + + + ARISTA02T0 + FC00::46 + switch-t1-64-lag-clet + FC00::45 + 1 + 10 + 3 + + + ARISTA03T0 + 10.0.0.37 + switch-t1-64-lag-clet + 10.0.0.36 + 1 + 10 + 3 + + + ARISTA03T0 + FC00::4A + switch-t1-64-lag-clet + FC00::49 + 1 + 10 + 3 + + + ARISTA04T0 + 10.0.0.39 + switch-t1-64-lag-clet + 10.0.0.38 + 1 + 10 + 3 + + + ARISTA04T0 + FC00::4E + switch-t1-64-lag-clet + FC00::4D + 1 + 10 + 3 + + + ARISTA05T0 + 10.0.0.41 + switch-t1-64-lag-clet + 10.0.0.40 + 1 + 10 + 3 + + + ARISTA05T0 + FC00::52 + switch-t1-64-lag-clet + FC00::51 + 1 + 10 + 3 + + + ARISTA06T0 + 10.0.0.43 + switch-t1-64-lag-clet + 10.0.0.42 + 1 + 10 + 3 + + + ARISTA06T0 + FC00::56 + switch-t1-64-lag-clet + FC00::55 + 1 + 10 + 3 + + + ARISTA07T0 + 10.0.0.45 + switch-t1-64-lag-clet + 10.0.0.44 + 1 + 10 + 3 + + + ARISTA07T0 + FC00::5A + switch-t1-64-lag-clet + FC00::59 + 1 + 10 + 3 + + + ARISTA08T0 + 10.0.0.47 + switch-t1-64-lag-clet + 10.0.0.46 + 1 + 10 + 3 + + + ARISTA08T0 + FC00::5E + switch-t1-64-lag-clet + FC00::5D + 1 + 10 + 3 + + + ARISTA09T0 + 10.0.0.49 + switch-t1-64-lag-clet + 10.0.0.48 + 1 + 10 + 3 + + + ARISTA09T0 + FC00::62 + switch-t1-64-lag-clet + FC00::61 + 1 + 10 + 3 + + + ARISTA10T0 + 10.0.0.51 + switch-t1-64-lag-clet + 10.0.0.50 + 1 + 10 + 3 + + + ARISTA10T0 + FC00::66 + switch-t1-64-lag-clet + FC00::65 + 1 + 10 + 3 + + + ARISTA11T0 + 10.0.0.53 + switch-t1-64-lag-clet + 10.0.0.52 + 1 + 10 + 3 + + + ARISTA11T0 + FC00::6A + switch-t1-64-lag-clet + FC00::69 + 1 + 10 + 3 + + + ARISTA12T0 + 10.0.0.55 + switch-t1-64-lag-clet + 10.0.0.54 + 1 + 10 + 3 + + + ARISTA12T0 + FC00::6E + switch-t1-64-lag-clet + FC00::6D + 1 + 10 + 3 + + + ARISTA13T0 + 10.0.0.57 + switch-t1-64-lag-clet + 10.0.0.56 + 1 + 10 + 3 + + + ARISTA13T0 + FC00::72 + switch-t1-64-lag-clet + FC00::71 + 1 + 10 + 3 + + + ARISTA14T0 + 10.0.0.59 + switch-t1-64-lag-clet + 10.0.0.58 + 1 + 10 + 3 + + + ARISTA14T0 + FC00::76 + switch-t1-64-lag-clet + FC00::75 + 1 + 10 + 3 + + + ARISTA15T0 + 10.0.0.61 + switch-t1-64-lag-clet + 10.0.0.60 + 1 + 10 + 3 + + + ARISTA15T0 + FC00::7A + switch-t1-64-lag-clet + FC00::79 + 1 + 10 + 3 + + + ARISTA16T0 + 10.0.0.63 + switch-t1-64-lag-clet + 10.0.0.62 + 1 + 10 + 3 + + + ARISTA16T0 + FC00::7E + switch-t1-64-lag-clet + FC00::7D + 1 + 10 + 3 + + + ARISTA17T0 + 10.0.0.65 + switch-t1-64-lag-clet + 10.0.0.64 + 1 + 10 + 3 + + + ARISTA17T0 + FC00::82 + switch-t1-64-lag-clet + FC00::81 + 1 + 10 + 3 + + + ARISTA18T0 + 10.0.0.67 + switch-t1-64-lag-clet + 10.0.0.66 + 1 + 10 + 3 + + + ARISTA18T0 + FC00::86 + switch-t1-64-lag-clet + FC00::85 + 1 + 10 + 3 + + + ARISTA19T0 + 10.0.0.69 + switch-t1-64-lag-clet + 10.0.0.68 + 1 + 10 + 3 + + + ARISTA19T0 + FC00::8A + switch-t1-64-lag-clet + FC00::89 + 1 + 10 + 3 + + + switch-t1-64-lag-clet + 10.0.0.0 + ARISTA01T2 + 10.0.0.1 + 1 + 10 + 3 + + + switch-t1-64-lag-clet + FC00::1 + ARISTA01T2 + FC00::2 + 1 + 10 + 3 + + + switch-t1-64-lag-clet + 10.0.0.4 + ARISTA03T2 + 10.0.0.5 + 1 + 10 + 3 + + + switch-t1-64-lag-clet + FC00::5 + ARISTA01T2 + FC00::6 + 1 + 10 + 3 + + + switch-t1-64-lag-clet + 10.0.0.8 + ARISTA05T2 + 10.0.0.9 + 1 + 10 + 3 + + + switch-t1-64-lag-clet + FC00::9 + ARISTA05T2 + FC00::A + 1 + 10 + 3 + + + switch-t1-64-lag-clet + 10.0.0.12 + ARISTA07T2 + 10.0.0.13 + 1 + 10 + 3 + + + switch-t1-64-lag-clet + FC00::D + ARISTA05T2 + FC00::E + 1 + 10 + 3 + + + + + 65100 + switch-t1-64-lag-clet + + +
10.0.0.33
+ + +
+ +
10.0.0.35
+ + +
+ +
10.0.0.37
+ + +
+ +
10.0.0.39
+ + +
+ +
10.0.0.41
+ + +
+ +
10.0.0.43
+ + +
+ +
10.0.0.45
+ + +
+ +
10.0.0.47
+ + +
+ +
10.0.0.49
+ + +
+ +
10.0.0.51
+ + +
+ +
10.0.0.53
+ + +
+ +
10.0.0.55
+ + +
+ +
10.0.0.57
+ + +
+ +
10.0.0.59
+ + +
+ +
10.0.0.61
+ + +
+ +
10.0.0.63
+ + +
+ +
10.0.0.65
+ + +
+ +
10.0.0.67
+ + +
+ +
10.0.0.69
+ + +
+ +
10.0.0.1
+ + +
+ +
10.0.0.5
+ + +
+ +
10.0.0.9
+ + +
+ +
10.0.0.13
+ + +
+
+ +
+ + 64001 + ARISTA01T0 + + + + 64002 + ARISTA02T0 + + + + 64003 + ARISTA03T0 + + + + 64004 + ARISTA04T0 + + + + 64005 + ARISTA05T0 + + + + 64006 + ARISTA06T0 + + + + 64007 + ARISTA07T0 + + + + 64008 + ARISTA08T0 + + + + 64009 + ARISTA09T0 + + + + 64010 + ARISTA10T0 + + + + 64011 + ARISTA11T0 + + + + 64012 + ARISTA12T0 + + + + 64013 + ARISTA13T0 + + + + 64014 + ARISTA14T0 + + + + 64015 + ARISTA15T0 + + + + 64016 + ARISTA16T0 + + + + 64017 + ARISTA17T0 + + + + 64018 + ARISTA18T0 + + + + 64019 + ARISTA19T0 + + + + 65200 + ARISTA01T2 + + + + 65200 + ARISTA03T2 + + + + 65200 + ARISTA05T2 + + + + 65200 + ARISTA07T2 + + +
+
+ + + + + + HostIP + Loopback0 + + 10.1.0.32/32 + + 10.1.0.32/32 + + + HostIP1 + Loopback0 + + FC00:1::32/128 + + FC00:1::32/128 + + + + + HostIP + eth0 + + 10.64.247.225/23 + + 10.64.247.225/23 + + + V6HostIP + eth0 + + FC00:2::32/64 + + FC00:2::32/64 + + + + + + switch-t1-64-lag-clet + + + PortChannelInterface + PortChannel0 + fortyGigE1/1/1;fortyGigE1/1/2 + + + + PortChannelInterface + PortChannel4 + fortyGigE1/1/5;fortyGigE1/1/6 + + + + PortChannelInterface + PortChannel8 + fortyGigE1/2/1;fortyGigE1/2/2 + + + + PortChannelInterface + PortChannel12 + fortyGigE1/2/5;fortyGigE1/2/6 + + + + PortChannelInterface + PortChannel34 + fortyGigE1/3/3 + + + + PortChannelInterface + PortChannel36 + fortyGigE1/3/5 + + + + PortChannelInterface + PortChannel37 + fortyGigE1/3/6 + + + + PortChannelInterface + PortChannel38 + fortyGigE1/3/7 + + + + PortChannelInterface + PortChannel39 + fortyGigE1/3/8 + + + + PortChannelInterface + PortChannel42 + fortyGigE1/3/11 + + + + PortChannelInterface + PortChannel44 + fortyGigE1/3/13 + + + + PortChannelInterface + PortChannel45 + fortyGigE1/3/14 + + + + PortChannelInterface + PortChannel46 + fortyGigE1/3/15 + + + + PortChannelInterface + PortChannel47 + fortyGigE1/3/16 + + + + PortChannelInterface + PortChannel50 + fortyGigE1/4/3 + + + + PortChannelInterface + PortChannel52 + fortyGigE1/4/5 + + + + PortChannelInterface + PortChannel53 + fortyGigE1/4/6 + + + + PortChannelInterface + PortChannel54 + fortyGigE1/4/7 + + + + PortChannelInterface + PortChannel55 + fortyGigE1/4/8 + + + + PortChannelInterface + PortChannel58 + fortyGigE1/4/11 + + + + PortChannelInterface + PortChannel60 + fortyGigE1/4/13 + + + + PortChannelInterface + PortChannel61 + fortyGigE1/4/14 + + + + PortChannelInterface + PortChannel62 + fortyGigE1/4/15 + + + + PortChannelInterface + PortChannel63 + fortyGigE1/4/16 + + + + + + + IPInterface + + PortChannel0 + 10.0.0.0/31 + + + IPInterface + + PortChannel0 + FC00::1/126 + + + IPInterface + + PortChannel4 + 10.0.0.4/31 + + + IPInterface + + PortChannel4 + FC00::5/126 + + + IPInterface + + PortChannel8 + 10.0.0.8/31 + + + IPInterface + + PortChannel8 + FC00::9/126 + + + IPInterface + + PortChannel12 + 10.0.0.12/31 + + + IPInterface + + PortChannel12 + FC00::D/126 + + + IPInterface + + PortChannel34 + 10.0.0.32/31 + + + IPInterface + + PortChannel34 + FC00::41/126 + + + IPInterface + + PortChannel36 + 10.0.0.34/31 + + + IPInterface + + PortChannel36 + FC00::45/126 + + + IPInterface + + PortChannel37 + 10.0.0.36/31 + + + IPInterface + + PortChannel37 + FC00::49/126 + + + IPInterface + + PortChannel38 + 10.0.0.38/31 + + + IPInterface + + PortChannel38 + FC00::4D/126 + + + IPInterface + + PortChannel39 + 10.0.0.40/31 + + + IPInterface + + PortChannel39 + FC00::51/126 + + + IPInterface + + PortChannel42 + 10.0.0.42/31 + + + IPInterface + + PortChannel42 + FC00::55/126 + + + IPInterface + + PortChannel44 + 10.0.0.44/31 + + + IPInterface + + PortChannel44 + FC00::59/126 + + + IPInterface + + PortChannel45 + 10.0.0.46/31 + + + IPInterface + + PortChannel45 + FC00::5D/126 + + + IPInterface + + PortChannel46 + 10.0.0.48/31 + + + IPInterface + + PortChannel46 + FC00::61/126 + + + IPInterface + + PortChannel47 + 10.0.0.50/31 + + + IPInterface + + PortChannel47 + FC00::65/126 + + + IPInterface + + PortChannel50 + 10.0.0.52/31 + + + IPInterface + + PortChannel50 + FC00::69/126 + + + IPInterface + + PortChannel52 + 10.0.0.54/31 + + + IPInterface + + PortChannel52 + FC00::6D/126 + + + IPInterface + + PortChannel53 + 10.0.0.56/31 + + + IPInterface + + PortChannel53 + FC00::71/126 + + + IPInterface + + PortChannel54 + 10.0.0.58/31 + + + IPInterface + + PortChannel54 + FC00::75/126 + + + IPInterface + + PortChannel55 + 10.0.0.60/31 + + + IPInterface + + PortChannel55 + FC00::79/126 + + + IPInterface + + PortChannel58 + 10.0.0.62/31 + + + IPInterface + + PortChannel58 + FC00::7D/126 + + + IPInterface + + PortChannel60 + 10.0.0.64/31 + + + IPInterface + + PortChannel60 + FC00::81/126 + + + IPInterface + + PortChannel61 + 10.0.0.66/31 + + + IPInterface + + PortChannel61 + FC00::85/126 + + + IPInterface + + PortChannel62 + 10.0.0.68/31 + + + IPInterface + + PortChannel62 + FC00::89/126 + + + + + + + + + + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/1/1 + ARISTA01T2 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/1/2 + ARISTA01T2 + Ethernet2 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/1/5 + ARISTA03T2 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/1/6 + ARISTA03T2 + Ethernet2 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/2/1 + ARISTA05T2 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/2/2 + ARISTA05T2 + Ethernet2 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/2/5 + ARISTA07T2 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/2/6 + ARISTA07T2 + Ethernet2 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/3/3 + ARISTA01T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/3/3 + ARISTA01T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/3/6 + ARISTA01T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/3/7 + ARISTA01T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/3/8 + ARISTA01T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/3/11 + ARISTA02T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/3/11 + ARISTA02T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/3/14 + ARISTA02T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/3/15 + ARISTA02T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/3/16 + ARISTA02T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/4/3 + ARISTA03T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/4/3 + ARISTA03T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/4/6 + ARISTA03T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/4/7 + ARISTA03T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/4/8 + ARISTA03T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/4/11 + ARISTA04T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/4/11 + ARISTA04T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/4/14 + ARISTA04T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/4/15 + ARISTA04T0 + Ethernet1 + + + DeviceInterfaceLink + switch-t1-64-lag-clet + fortyGigE1/4/16 + ARISTA04T0 + Ethernet1 + + + + + switch-t1-64-lag-clet + Force10-S6100 + + 10.64.247.225 + + + + "ARISTA01T0" + Arista-VM + + 10.64.247.204 + + + + "ARISTA01T2" + Arista-VM + + 10.64.247.200 + + + + "ARISTA02T0" + Arista-VM + + 10.64.247.205 + + + + "ARISTA03T0" + Arista-VM + + 10.64.247.206 + + + + "ARISTA03T2" + Arista-VM + + 10.64.247.201 + + + + "ARISTA04T0" + Arista-VM + + 10.64.247.207 + + + + "ARISTA05T0" + Arista-VM + + 10.64.247.208 + + + + "ARISTA05T2" + Arista-VM + + 10.64.247.202 + + + + "ARISTA06T0" + Arista-VM + + 10.64.247.209 + + + + "ARISTA07T0" + Arista-VM + + 10.64.247.210 + + + + "ARISTA07T2" + Arista-VM + + 10.64.247.203 + + + + "ARISTA08T0" + Arista-VM + + 10.64.247.211 + + + + "ARISTA09T0" + Arista-VM + + 10.64.247.212 + + + + "ARISTA10T0" + Arista-VM + + 10.64.247.213 + + + + "ARISTA11T0" + Arista-VM + + 10.64.247.214 + + + + "ARISTA12T0" + Arista-VM + + 10.64.247.215 + + + + "ARISTA13T0" + Arista-VM + + 10.64.247.216 + + + + "ARISTA14T0" + Arista-VM + + 10.64.247.217 + + + + "ARISTA15T0" + Arista-VM + + 10.64.247.218 + + + + "ARISTA16T0" + Arista-VM + + 10.64.247.219 + + + + "ARISTA17T0" + Arista-VM + + 10.64.247.220 + + + + "ARISTA18T0" + Arista-VM + + 10.64.247.221 + + + + "ARISTA19T0" + Arista-VM + + 10.64.247.222 + + + + + + + true + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/1 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/2 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/3 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/4 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/5 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/6 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/7 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/8 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/9 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/10 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/11 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/12 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/13 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/14 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/15 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/1/16 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/1 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/2 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/3 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/4 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/5 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/6 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/7 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/8 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/9 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/10 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/11 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/12 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/13 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/14 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/15 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/2/16 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/1 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/2 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/3 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/4 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/5 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/6 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/7 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/8 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/9 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/10 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/11 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/12 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/13 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/14 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/15 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/3/16 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/1 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/2 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/3 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/4 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/5 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/6 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/7 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/8 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/9 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/10 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/11 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/12 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/13 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/14 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/15 + + false + 0 + 0 + 40000 + + + DeviceInterface + + true + true + 1 + fortyGigE1/4/16 + + false + 0 + 0 + 40000 + + + true + 0 + Force10-S6100 + + + + switch-t1-64-lag-clet + Force10-S6100 +
diff --git a/ansible/roles/test/files/ptftests/IP_decap_test.py b/ansible/roles/test/files/ptftests/IP_decap_test.py index a7441e5c4aa..e00b3119d3a 100644 --- a/ansible/roles/test/files/ptftests/IP_decap_test.py +++ b/ansible/roles/test/files/ptftests/IP_decap_test.py @@ -91,7 +91,7 @@ def setUp(self): self.fib = fib.Fib(self.test_params['fib_info']) if self.test_params['testbed_type'] == 't1' or self.test_params['testbed_type'] == 't1-lag': self.src_ports = range(0, 32) - if self.test_params['testbed_type'] == 't1-64-lag': + if self.test_params['testbed_type'] == 't1-64-lag' or self.test_params['testbed_type'] == 't1-64-lag-clet': self.src_ports = [0, 1, 4, 5, 16, 17, 20, 21, 34, 36, 37, 38, 39, 42, 44, 45, 46, 47, 50, 52, 53, 54, 55, 58, 60, 61, 62, 63] if self.test_params['testbed_type'] == 't0': self.src_ports = range(1, 25) + range(28, 32) diff --git a/ansible/roles/test/files/ptftests/dip_sip.py b/ansible/roles/test/files/ptftests/dip_sip.py index a05c9bea530..eb6ab52c10e 100644 --- a/ansible/roles/test/files/ptftests/dip_sip.py +++ b/ansible/roles/test/files/ptftests/dip_sip.py @@ -5,7 +5,7 @@ This test uses UDP packets to validate that HW supports routing of L3 packets with DIP=SIP Topologies: - Supports t0, t0-16, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag and t1-64-lag topology + Supports t0, t0-16, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag t1-64-lag and t1-64-lag-clet topology Parameters: testbed_type - testbed type @@ -185,7 +185,7 @@ def tearDown(self): #-------------------------------------------------------------------------- def runTest(self): - if self.testbed_type in ['t0', 't0-16', 't0-56', 't0-64', 't0-64-32', 't0-116', 't1', 't1-lag', 't1-64-lag']: + if self.testbed_type in ['t0', 't0-16', 't0-56', 't0-64', 't0-64-32', 't0-116', 't1', 't1-lag', 't1-64-lag', 't1-64-lag-clet']: self.log("Run PORT/LAG-router based test") test = PortLagRouterBasedTest(self) diff --git a/ansible/roles/test/files/ptftests/fib_test.py b/ansible/roles/test/files/ptftests/fib_test.py index d4f7fbfe7e4..89c95498005 100644 --- a/ansible/roles/test/files/ptftests/fib_test.py +++ b/ansible/roles/test/files/ptftests/fib_test.py @@ -97,7 +97,7 @@ def setUp(self): if self.test_params['testbed_type'] == 't1' or self.test_params['testbed_type'] == 't1-lag': self.src_ports = range(0, 32) - if self.test_params['testbed_type'] == 't1-64-lag': + if self.test_params['testbed_type'] == 't1-64-lag' or self.test_params['testbed_type'] == 't1-64-lag-clet': self.src_ports = [0, 1, 4, 5, 16, 17, 20, 21, 34, 36, 37, 38, 39, 42, 44, 45, 46, 47, 50, 52, 53, 54, 55, 58, 60, 61, 62, 63] if self.test_params['testbed_type'] == 't0': self.src_ports = range(1, 25) + range(28, 32) diff --git a/ansible/roles/test/files/ptftests/mtu_test.py b/ansible/roles/test/files/ptftests/mtu_test.py index 154caaf1b6c..60d7c595ab3 100644 --- a/ansible/roles/test/files/ptftests/mtu_test.py +++ b/ansible/roles/test/files/ptftests/mtu_test.py @@ -129,7 +129,7 @@ def check_ip_mtu(self): dst_port_list = [] if self.testbed_type == 't1' or self.testbed_type == 't1-lag': dst_port_list = [31] - elif self.testbed_type == 't1-64-lag': + elif self.testbed_type == 't1-64-lag' or self.testbed_type == 't1-64-lag-clet': dst_port_list = [58] (matched_index, received) = verify_packet_any_port(self, masked_exp_pkt, dst_port_list) diff --git a/ansible/roles/test/tasks/bgp_gr_helper.yml b/ansible/roles/test/tasks/bgp_gr_helper.yml index 0a03c0f8a90..efeb3182404 100644 --- a/ansible/roles/test/tasks/bgp_gr_helper.yml +++ b/ansible/roles/test/tasks/bgp_gr_helper.yml @@ -6,7 +6,7 @@ when: testbed_type is not defined - fail: msg="testbed_type {{testbed_type}} is unsupported." - when: testbed_type not in ['t1', 't1-lag', 't1-64-lag'] + when: testbed_type not in ['t1', 't1-lag', 't1-64-lag', 't1-64-lag-clet'] - name: Get VM info. include: "roles/test/tasks/bgp_gr_helper/get_vm_info.yml" diff --git a/ansible/roles/test/tasks/bgp_gr_helper/get_vm_info.yml b/ansible/roles/test/tasks/bgp_gr_helper/get_vm_info.yml index 2f89162d18c..b0ba4af0927 100644 --- a/ansible/roles/test/tasks/bgp_gr_helper/get_vm_info.yml +++ b/ansible/roles/test/tasks/bgp_gr_helper/get_vm_info.yml @@ -16,7 +16,7 @@ vm_name: "{{ item.value.name }}" vm_intf: "{{ item.key }}" with_dict: "{{ minigraph_neighbors }}" - when: "testbed_type in ['t1', 't1-lag', 't1-64-lag'] and 'T0' in item.value.name and not vm_name" + when: "testbed_type in ['t1', 't1-lag', 't1-64-lag', 't1-64-lag-clet'] and 'T0' in item.value.name and not vm_name" - name: Get neighbor IPv4 address. set_fact: diff --git a/ansible/roles/test/tasks/bgp_multipath_relax.yml b/ansible/roles/test/tasks/bgp_multipath_relax.yml index d829f818fc2..0f924a0f78a 100644 --- a/ansible/roles/test/tasks/bgp_multipath_relax.yml +++ b/ansible/roles/test/tasks/bgp_multipath_relax.yml @@ -9,8 +9,8 @@ - fail: msg="please provide testbed_type for bgp_multipath_relax test" when: testbed_type is not defined -- fail: mgs="This test only works for leaf routers as DUT in topology, t1, t1-lag, t1-64-lag" - when: testbed_type not in ['t1', 't1-lag', 't1-64-lag'] +- fail: mgs="This test only works for leaf routers as DUT in topology, t1, t1-lag, t1-64-lag, t1-64-lag-clet" + when: testbed_type not in ['t1', 't1-lag', 't1-64-lag', 't1-64-lag-clet'] - name: Gathering minigraph facts about the device minigraph_facts: host={{ inventory_hostname }} diff --git a/ansible/roles/test/tasks/decap.yml b/ansible/roles/test/tasks/decap.yml index cf93cf5014f..edc592a8086 100644 --- a/ansible/roles/test/tasks/decap.yml +++ b/ansible/roles/test/tasks/decap.yml @@ -39,7 +39,7 @@ - name: Expand properties into props set_fact: props="{{configuration_properties['spine']}}" - when: testbed_type in ['t1', 't1-lag', 't1-64-lag'] + when: testbed_type in ['t1', 't1-lag', 't1-64-lag', 't1-64-lag-clet'] - name: Expand properties into props set_fact: props="{{configuration_properties['common']}}" @@ -47,7 +47,7 @@ - name: Expand properties into props set_fact: props_tor="{{configuration_properties['tor']}}" - when: testbed_type in ['t1', 't1-lag', 't1-64-lag'] + when: testbed_type in ['t1', 't1-lag', 't1-64-lag', 't1-64-lag-clet'] # Gather minigraph facts - name: Gathering minigraph facts about the device diff --git a/ansible/roles/test/tasks/dip_sip.yml b/ansible/roles/test/tasks/dip_sip.yml index 49ac22302f8..d79111dee74 100644 --- a/ansible/roles/test/tasks/dip_sip.yml +++ b/ansible/roles/test/tasks/dip_sip.yml @@ -2,7 +2,7 @@ when: testbed_type is not defined - fail: msg="testbed_type {{ test_type }} is invalid" - when: testbed_type not in ['t0', 't0-16', 't0-56', 't0-64', 't0-64-32', 't0-116', 't1', 't1-lag', 't1-64-lag'] + when: testbed_type not in ['t0', 't0-16', 't0-56', 't0-64', 't0-64-32', 't0-116', 't1', 't1-lag', 't1-64-lag', 't1-64-lag-clet'] - include_vars: "vars/topo_{{ testbed_type }}.yml" @@ -126,4 +126,4 @@ vars: dst_lag: "default('')" src_lag: "default('')" - when: testbed_type in ['t0', 't0-16', 't0-56', 't0-64', 't0-64-32', 't0-116', 't1-lag', 't1-64-lag'] + when: testbed_type in ['t0', 't0-16', 't0-56', 't0-64', 't0-64-32', 't0-116', 't1-lag', 't1-64-lag', 't1-64-lag-clet'] diff --git a/ansible/roles/test/tasks/everflow_testbed/run_test.yml b/ansible/roles/test/tasks/everflow_testbed/run_test.yml index 847a675333d..a59faf17321 100644 --- a/ansible/roles/test/tasks/everflow_testbed/run_test.yml +++ b/ansible/roles/test/tasks/everflow_testbed/run_test.yml @@ -6,7 +6,7 @@ when: testbed_type is not defined - fail: msg="testbed_type {{testbed_type}} is invalid." - when: testbed_type not in ['t1-lag', 't1', 't1-64-lag'] + when: testbed_type not in ['t1-lag', 't1', 't1-64-lag', 't1-64-lag-clet'] - name: Gathering minigraph facts about the device minigraph_facts: host={{ inventory_hostname }} diff --git a/ansible/roles/test/tasks/mtu.yml b/ansible/roles/test/tasks/mtu.yml index 291b85e9de9..6e97b4e80ae 100644 --- a/ansible/roles/test/tasks/mtu.yml +++ b/ansible/roles/test/tasks/mtu.yml @@ -7,7 +7,7 @@ when: testbed_type is not defined - fail: msg="testbed_type {{testbed_type}} is invalid." - when: testbed_type not in ['t1-lag', 't1', 't1-64-lag'] + when: testbed_type not in ['t1-lag', 't1', 't1-64-lag', 't1-64-lag-clet'] - include_vars: "vars/topo_{{testbed_type}}.yml" diff --git a/ansible/roles/test/tasks/shared-fib.yml b/ansible/roles/test/tasks/shared-fib.yml index 4bd1fac2b2c..9a88cda3fb9 100644 --- a/ansible/roles/test/tasks/shared-fib.yml +++ b/ansible/roles/test/tasks/shared-fib.yml @@ -14,7 +14,7 @@ - name: Expand properties into props set_fact: props="{{configuration_properties['spine']}}" - when: testbed_type in ['t1', 't1-lag', 't1-64-lag'] + when: testbed_type in ['t1', 't1-lag', 't1-64-lag', 't1-64-lag-clet'] - name: Expand properties into props set_fact: props="{{configuration_properties['common']}}" @@ -22,7 +22,7 @@ - name: Expand ToR properties into props set_fact: props_tor="{{configuration_properties['tor']}}" - when: testbed_type in ['t1', 't1-lag', 't1-64-lag'] + when: testbed_type in ['t1', 't1-lag', 't1-64-lag', 't1-64-lag-clet'] - name: Gathering minigraph facts about the device minigraph_facts: host={{ inventory_hostname }} diff --git a/ansible/roles/test/templates/fib.j2 b/ansible/roles/test/templates/fib.j2 index 6044ba63ca4..be18fc23814 100644 --- a/ansible/roles/test/templates/fib.j2 +++ b/ansible/roles/test/templates/fib.j2 @@ -4,7 +4,7 @@ {% elif testbed_type == 't0' or testbed_type == 't0-52'or testbed_type == 't0-64' or testbed_type == 't1-lag' or testbed_type == 't0-64-32' %} 0.0.0.0/0 {% for portchannel, v in minigraph_portchannels.iteritems() %} [{% for member in v.members %}{{ '%d' % minigraph_port_indices[member]}}{% if not loop.last %} {% endif %}{% endfor %}]{% if not loop.last %} {% endif %}{% endfor %} -{% elif testbed_type == 't1-64-lag' %} +{% elif (testbed_type == 't1-64-lag') or (testbed_type == 't1-64-lag-clet') %} 0.0.0.0/0 [0 1] [4 5] [16 17] [20 21] {% elif testbed_type == 't0-116' %} 0.0.0.0/0 [24 25] [26 27] [28 29] [30 31] @@ -27,7 +27,7 @@ 20C0:A8{{ '%02X' % podset }}:0:{{ '%02X' % (tor * 16 + subnet)}}::/64 {% for portchannel, v in minigraph_portchannels.iteritems() %} [{% for member in v.members %}{{ '%d' % minigraph_port_indices[member]}}{% if not loop.last %} {% endif %}{% endfor %}]{% if not loop.last %} {% endif %}{% endfor %} -{% elif testbed_type == 't1-64-lag' %} +{% elif (testbed_type == 't1-64-lag') or (testbed_type == 't1-64-lag-clet') %} 192.168.{{ podset }}.{{ tor * 16 + subnet }}/32 [0 1] [4 5] [16 17] [20 21] 20C0:A8{{ '%02X' % podset }}:0:{{ '%02X' % (tor * 16 + subnet)}}::/64 [0 1] [4 5] [16 17] [20 21] diff --git a/ansible/roles/test/vars/testcases.yml b/ansible/roles/test/vars/testcases.yml index ff181c4a274..5f9aa5bae45 100644 --- a/ansible/roles/test/vars/testcases.yml +++ b/ansible/roles/test/vars/testcases.yml @@ -1,28 +1,28 @@ testcases: acl: filename: acltb.yml - topologies: [t1, t1-lag, t1-64-lag] + topologies: [t1, t1-lag, t1-64-lag, t1-64-lag-clet] required_vars: ptf_host: testbed_type: arp: filename: arpall.yml - topologies: [ptf32, ptf64, t1, t1-lag, t1-64-lag] + topologies: [ptf32, ptf64, t1, t1-lag, t1-64-lag, t1-64-lag-clet] required_vars: ptf_host: bgp_fact: filename: bgp_fact.yml - topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] + topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet] bgp_gr_helper: filename: bgp_gr_helper.yml - topologies: [t1, t1-lag, t1-64-lag] + topologies: [t1, t1-lag, t1-64-lag, t1-64-lag-clet] bgp_multipath_relax: filename: bgp_multipath_relax.yml - topologies: [t1, t1-lag, t1-64-lag] + topologies: [t1, t1-lag, t1-64-lag, t1-64-lag-clet] required_vars: ptf_host: testbed_type: @@ -36,11 +36,11 @@ testcases: config: filename: config.yml - topologies: [t1-lag, t1-64-lag, t0, t0-64, t0-116] + topologies: [t1-lag, t1-64-lag, t1-64-lag-clet, t0, t0-64, t0-116] continuous_reboot: filename: continuous_reboot.yml - topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] + topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet] copp: filename: copp.yml @@ -50,7 +50,7 @@ testcases: decap: filename: decap.yml - topologies: [t1, t1-lag, t1-64-lag, t0, t0-52, t0-56, t0-64, t0-116] + topologies: [t1, t1-lag, t1-64-lag, t1-64-lag-clet, t0, t0-52, t0-56, t0-64, t0-116] required_vars: ptf_host: testbed_type: @@ -68,7 +68,7 @@ testcases: everflow_testbed: filename: everflow_testbed.yml - topologies: [t1, t1-lag, t1-64-lag] + topologies: [t1, t1-lag, t1-64-lag, t1-64-lag-clet] required_vars: ptf_host: testbed_type: @@ -110,14 +110,14 @@ testcases: fib: filename: simple-fib.yml - topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] + topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet] required_vars: ptf_host: testbed_type: warm-reboot-fib: filename: warm-reboot-fib.yml - topologies: [t0, t0-16, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] + topologies: [t0, t0-16, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet] required_vars: ptf_host: testbed_type: @@ -146,26 +146,26 @@ testcases: lag_2: filename: lag_2.yml - topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1-lag, t1-64-lag] + topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1-lag, t1-64-lag, t1-64-lag-clet] required_vars: ptf_host: testbed_type: lldp: filename: lldp.yml - topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-116, t0-64-32, t1, t1-lag, t1-64-lag] + topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-116, t0-64-32, t1, t1-lag, t1-64-lag, t1-64-lag-clet] link_flap: filename: link_flap.yml - topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] mem_check: filename: mem_check.yml - topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] mtu: filename: mtu.yml - topologies: [t1, t1-lag, t1-64-lag] + topologies: [t1, t1-lag, t1-64-lag, t1-64-lag-clet] required_vars: ptf_host: testbed_type: @@ -178,19 +178,19 @@ testcases: neighbour_mac_noptf: filename: neighbour-mac-noptf.yml - topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] ntp: filename: ntp.yml - topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] pfc_wd: filename: pfc_wd.yml - topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] port_toggle: filename: port_toggle.yml - topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] qos: filename: qos.yml @@ -202,39 +202,39 @@ testcases: reboot: filename: reboot.yml - topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] repeat_harness: filename: repeat_harness.yml - topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] restart_swss: filename: run_config_cleanup.yml - topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] restart_swss_service: filename: restart_swss.yml - topologies: [t0, t0-16, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-16, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] restart_syncd: filename: restart_syncd.yml - topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] sensors: filename: sensors_check.yml - topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] service_acl: filename: service_acl.yml - topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] snmp: filename: snmp.yml - topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] syslog: filename: syslog.yml - topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, ptf32, ptf64] + topologies: [t0, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] vlan: filename: vlantb.yml @@ -249,7 +249,7 @@ testcases: dip_sip: filename: dip_sip.yml - topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag] + topologies: [t0, t0-16, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet] required_vars: ptf_host: testbed_type: diff --git a/ansible/testbed-new.yaml b/ansible/testbed-new.yaml index a28ce7764f3..2793a692549 100644 --- a/ansible/testbed-new.yaml +++ b/ansible/testbed-new.yaml @@ -227,7 +227,7 @@ veos_groups: servers: children: [server_1, server_2] # source: sonic-mgmt/veos vars: - topologies: ['t1', 't1-lag', 't1-64-lag', 't0', 't0-56', 't0-52', 'ptf32', 'ptf64', 't0-64', 't0-64-32', 't0-116'] # source: sonic-mgmt/veos + topologies: ['t1', 't1-lag', 't1-64-lag', 't1-64-lag-clet', 't0', 't0-56', 't0-52', 'ptf32', 'ptf64', 't0-64', 't0-64-32', 't0-116'] # source: sonic-mgmt/veos server_1: children: [vm_host_1, vms_1] # source: sonic-mgmt/veos vars: diff --git a/ansible/vars/configlet/t1-64-lag-clet/apply_clet.sh b/ansible/vars/configlet/t1-64-lag-clet/apply_clet.sh new file mode 100755 index 00000000000..99833f4438f --- /dev/null +++ b/ansible/vars/configlet/t1-64-lag-clet/apply_clet.sh @@ -0,0 +1,6 @@ +#! /bin/bash + +# Sleep to let all BGP sessions go up & running before adding a T0 +sleep 1m +/usr/bin/configlet -j /etc/sonic/clet-to_clear.json -d +/usr/bin/configlet -j /etc/sonic/clet-add_20T0.json -u diff --git a/ansible/vars/configlet/t1-64-lag-clet/clet-add_20T0.json b/ansible/vars/configlet/t1-64-lag-clet/clet-add_20T0.json new file mode 100644 index 00000000000..411c339ea21 --- /dev/null +++ b/ansible/vars/configlet/t1-64-lag-clet/clet-add_20T0.json @@ -0,0 +1,221 @@ +[ + { + "PORT": { + "Ethernet63": { + "description": "ARISTA20T0:Ethernet1", + "mtu": "9100", + "admin_status": "down" + } + } + }, + { + "PORTCHANNEL": { + "PortChannel0024": { + "admin_status": "up", + "min_links": "1", + "mtu": "9100" + } + } + }, + { + "PORTCHANNEL_MEMBER": { + "PortChannel0024|Ethernet63": {} + } + }, + { + "PORTCHANNEL_INTERFACE": { + "PortChannel0024|10.0.0.70/31": {}, + "PortChannel0024|FC00::8D/126": {} + } + }, + { + "ACL_TABLE": { + "EVERFLOW": { + "type": "MIRROR", + "policy_desc": "EVERFLOW", + "ports": [ + "PortChannel0001", + "PortChannel0002", + "PortChannel0003", + "PortChannel0004", + "PortChannel0005", + "PortChannel0006", + "PortChannel0007", + "PortChannel0008", + "PortChannel0009", + "PortChannel0010", + "PortChannel0011", + "PortChannel0012", + "PortChannel0013", + "PortChannel0014", + "PortChannel0015", + "PortChannel0016", + "PortChannel0017", + "PortChannel0018", + "PortChannel0019", + "PortChannel0020", + "PortChannel0021", + "PortChannel0022", + "PortChannel0023", + "PortChannel0024" + ] + }, + "EVERFLOWV6": { + "type": "MIRRORV6", + "policy_desc": "EVERFLOWV6", + "ports": [ + "PortChannel0001", + "PortChannel0002", + "PortChannel0003", + "PortChannel0004", + "PortChannel0005", + "PortChannel0006", + "PortChannel0007", + "PortChannel0008", + "PortChannel0009", + "PortChannel0010", + "PortChannel0011", + "PortChannel0012", + "PortChannel0013", + "PortChannel0014", + "PortChannel0015", + "PortChannel0016", + "PortChannel0017", + "PortChannel0018", + "PortChannel0019", + "PortChannel0020", + "PortChannel0021", + "PortChannel0022", + "PortChannel0023", + "PortChannel0024" + ] + } + } + }, + { + "DEVICE_NEIGHBOR": { + "Ethernet63": { + "name": "ARISTA20T0", + "port": "Ethernet1" + } + } + }, + { + "DEVICE_NEIGHBOR_METADATA": { + "ARISTA20T0": { + "lo_addr": "None", + "mgmt_addr": "10.64.247.223", + "hwsku": "Arista-VM", + "type": "ToRRouter", + "deployment_id": "2" + } + } + }, + { + "CABLE_LENGTH": { + "AZURE": { + "Ethernet63": "300m" + } + } + }, + { + "QUEUE": { + "Ethernet63|0": { + "scheduler": "[SCHEDULER|scheduler.0]" + }, + "Ethernet63|1": { + "scheduler": "[SCHEDULER|scheduler.0]" + }, + "Ethernet63|2": { + "scheduler": "[SCHEDULER|scheduler.0]" + }, + "Ethernet63|3": { + "wred_profile": "[WRED_PROFILE|AZURE_LOSSLESS]", + "scheduler": "[SCHEDULER|scheduler.1]" + }, + "Ethernet63|4": { + "wred_profile": "[WRED_PROFILE|AZURE_LOSSLESS]", + "scheduler": "[SCHEDULER|scheduler.1]" + }, + "Ethernet63|5": { + "scheduler": "[SCHEDULER|scheduler.0]" + }, + "Ethernet63|6": { + "scheduler": "[SCHEDULER|scheduler.0]" + } + } + }, + { + "BUFFER_PG": { + "Ethernet63|0": { + "profile": "[BUFFER_PROFILE|ingress_lossy_profile]" + } + } + }, + { + "BUFFER_QUEUE": { + "Ethernet63|0-2": { + "profile": "[BUFFER_PROFILE|egress_lossy_profile]" + }, + "Ethernet63|3-4": { + "profile": "[BUFFER_PROFILE|egress_lossless_profile]" + }, + "Ethernet63|5-6": { + "profile": "[BUFFER_PROFILE|egress_lossy_profile]" + } + + } + }, + { + "PORT_QOS_MAP": { + "Ethernet63": { + "tc_to_pg_map": "[TC_TO_PRIORITY_GROUP_MAP|AZURE]", + "tc_to_queue_map": "[TC_TO_QUEUE_MAP|AZURE]", + "pfc_enable": "3,4", + "pfc_to_queue_map": "[MAP_PFC_PRIORITY_TO_QUEUE|AZURE]", + "dscp_to_tc_map": "[DSCP_TO_TC_MAP|AZURE]" + } + } + }, + { + "PFC_WD": { + "Ethernet63": { + "action": "drop", + "detection_time": "400", + "restoration_time": "400" + } + } + }, + { + "BGP_NEIGHBOR": { + "10.0.0.71": { + "rrclient": "0", + "name": "ARISTA20T0", + "local_addr": "10.0.0.70", + "nhopself": "0", + "admin_status": "up", + "holdtime": "10", + "asn": "64020", + "keepalive": "3" + }, + "fc00::8e": { + "rrclient": "0", + "name": "ARISTA20T0", + "local_addr": "fc00::8d", + "nhopself": "0", + "admin_status": "up", + "holdtime": "10", + "asn": "64020", + "keepalive": "3" + } + } + }, + { + "PORT": { + "Ethernet63": { + "admin_status": "up" + } + } + } +] + diff --git a/ansible/vars/configlet/t1-64-lag-clet/clet-to_clear.json b/ansible/vars/configlet/t1-64-lag-clet/clet-to_clear.json new file mode 100644 index 00000000000..ade4cac87e3 --- /dev/null +++ b/ansible/vars/configlet/t1-64-lag-clet/clet-to_clear.json @@ -0,0 +1,9 @@ +[ + { + "ACL_TABLE": { + "EVERFLOW": {}, + "EVERFLOWV6": {} + } + } +] + diff --git a/ansible/vars/topo_t1-64-lag-clet.yml b/ansible/vars/topo_t1-64-lag-clet.yml new file mode 100644 index 00000000000..91bad53bfc6 --- /dev/null +++ b/ansible/vars/topo_t1-64-lag-clet.yml @@ -0,0 +1,683 @@ +topology: + VMs: + ARISTA01T2: + vlans: + - 0 + - 1 + vm_offset: 0 + ARISTA03T2: + vlans: + - 4 + - 5 + vm_offset: 1 + ARISTA05T2: + vlans: + - 16 + - 17 + vm_offset: 2 + ARISTA07T2: + vlans: + - 20 + - 21 + vm_offset: 3 + ARISTA01T0: + vlans: + - 34 + vm_offset: 4 + ARISTA02T0: + vlans: + - 36 + vm_offset: 5 + ARISTA03T0: + vlans: + - 37 + vm_offset: 6 + ARISTA04T0: + vlans: + - 38 + vm_offset: 7 + ARISTA05T0: + vlans: + - 39 + vm_offset: 8 + ARISTA06T0: + vlans: + - 42 + vm_offset: 9 + ARISTA07T0: + vlans: + - 44 + vm_offset: 10 + ARISTA08T0: + vlans: + - 45 + vm_offset: 11 + ARISTA09T0: + vlans: + - 46 + vm_offset: 12 + ARISTA10T0: + vlans: + - 47 + vm_offset: 13 + ARISTA11T0: + vlans: + - 50 + vm_offset: 14 + ARISTA12T0: + vlans: + - 52 + vm_offset: 15 + ARISTA13T0: + vlans: + - 53 + vm_offset: 16 + ARISTA14T0: + vlans: + - 54 + vm_offset: 17 + ARISTA15T0: + vlans: + - 55 + vm_offset: 18 + ARISTA16T0: + vlans: + - 58 + vm_offset: 19 + ARISTA17T0: + vlans: + - 60 + vm_offset: 20 + ARISTA18T0: + vlans: + - 61 + vm_offset: 21 + ARISTA19T0: + vlans: + - 62 + vm_offset: 22 + +configuration_properties: + common: + dut_asn: 65100 + dut_type: LeafRouter + nhipv4: 10.10.246.100 + nhipv6: FC0A::C9 + spine: + swrole: spine + podset_number: 200 + tor_number: 16 + tor_subnet_number: 2 + leaf_asn_start: 62001 + tor_asn_start: 65501 + failure_rate: 0 + tor: + swrole: tor + tor_subnet_number: 5 + +configuration: + ARISTA01T2: + properties: + - common + - spine + bgp: + asn: 65200 + peers: + 65100: + - 10.0.0.0 + - FC00::1 + interfaces: + Loopback0: + ipv4: 100.1.0.1/32 + ipv6: 2064:100::1/128 + Ethernet1: + lacp: 1 + Ethernet2: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.1/31 + ipv6: fc00::2/126 + bp_interface: + ipv4: 10.10.246.1/24 + ipv6: fc0a::2/64 + + ARISTA03T2: + properties: + - common + - spine + bgp: + asn: 65200 + peers: + 65100: + - 10.0.0.4 + - FC00::5 + interfaces: + Loopback0: + ipv4: 100.1.0.3/32 + ipv6: 2064:100::3/128 + Ethernet1: + lacp: 1 + Ethernet2: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.5/31 + ipv6: fc00::6/126 + bp_interface: + ipv4: 10.10.246.3/24 + ipv6: fc0a::6/64 + + ARISTA05T2: + properties: + - common + - spine + bgp: + asn: 65200 + peers: + 65100: + - 10.0.0.8 + - FC00::9 + interfaces: + Loopback0: + ipv4: 100.1.0.5/32 + ipv6: 2064:100::5/128 + Ethernet1: + lacp: 1 + Ethernet2: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.9/31 + ipv6: fc00::a/126 + bp_interface: + ipv4: 10.10.246.5/24 + ipv6: fc0a::a/64 + + ARISTA07T2: + properties: + - common + - spine + bgp: + asn: 65200 + peers: + 65100: + - 10.0.0.12 + - FC00::D + interfaces: + Loopback0: + ipv4: 100.1.0.7/32 + ipv6: 2064:100::7/128 + Ethernet1: + lacp: 1 + Ethernet2: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.13/31 + ipv6: fc00::e/126 + bp_interface: + ipv4: 10.10.246.7/24 + ipv6: fc0a::e/64 + + ARISTA01T0: + properties: + - common + - tor + tornum: 1 + bgp: + asn: 64001 + peers: + 65100: + - 10.0.0.32 + - FC00::41 + interfaces: + Loopback0: + ipv4: 100.1.0.17/32 + ipv6: 2064:100::11/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.33/31 + ipv6: fc00::42/126 + bp_interface: + ipv4: 10.10.246.17/24 + ipv6: fc0a::22/64 + + ARISTA02T0: + properties: + - common + - tor + tornum: 2 + bgp: + asn: 64002 + peers: + 65100: + - 10.0.0.34 + - FC00::45 + interfaces: + Loopback0: + ipv4: 100.1.0.18/32 + ipv6: 2064:100::12/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.35/31 + ipv6: fc00::46/126 + bp_interface: + ipv4: 10.10.246.18/24 + ipv6: fc0a::25/64 + + ARISTA03T0: + properties: + - common + - tor + tornum: 3 + bgp: + asn: 64003 + peers: + 65100: + - 10.0.0.36 + - FC00::49 + interfaces: + Loopback0: + ipv4: 100.1.0.19/32 + ipv6: 2064:100::13/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.37/31 + ipv6: fc00::4a/126 + bp_interface: + ipv4: 10.10.246.19/24 + ipv6: fc0a::26/64 + vips: + ipv4: + prefixes: + - 200.0.1.0/26 + asn: 64700 + + ARISTA04T0: + properties: + - common + - tor + tornum: 4 + bgp: + asn: 64004 + peers: + 65100: + - 10.0.0.38 + - FC00::4D + interfaces: + Loopback0: + ipv4: 100.1.0.20/32 + ipv6: 2064:100::14/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.39/31 + ipv6: fc00::4e/126 + bp_interface: + ipv4: 10.10.246.20/24 + ipv6: fc0a::29/64 + + ARISTA05T0: + properties: + - common + - tor + tornum: 5 + bgp: + asn: 64005 + peers: + 65100: + - 10.0.0.40 + - FC00::51 + interfaces: + Loopback0: + ipv4: 100.1.0.21/32 + ipv6: 2064:100::15/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.41/31 + ipv6: fc00::52/126 + bp_interface: + ipv4: 10.10.246.21/24 + ipv6: fc0a::2a/64 + vips: + ipv4: + prefixes: + - 200.0.1.0/26 + asn: 64700 + + ARISTA06T0: + properties: + - common + - tor + tornum: 6 + bgp: + asn: 64006 + peers: + 65100: + - 10.0.0.42 + - FC00::55 + interfaces: + Loopback0: + ipv4: 100.1.0.22/32 + ipv6: 2064:100::16/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.43/31 + ipv6: fc00::56/126 + bp_interface: + ipv4: 10.10.246.22/24 + ipv6: fc0a::2d/64 + + ARISTA07T0: + properties: + - common + - tor + tornum: 7 + bgp: + asn: 64007 + peers: + 65100: + - 10.0.0.44 + - FC00::59 + interfaces: + Loopback0: + ipv4: 100.1.0.23/32 + ipv6: 2064:100::17/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.45/31 + ipv6: fc00::5a/126 + bp_interface: + ipv4: 10.10.246.23/24 + ipv6: fc0a::2e/64 + + ARISTA08T0: + properties: + - common + - tor + tornum: 8 + bgp: + asn: 64008 + peers: + 65100: + - 10.0.0.46 + - FC00::5D + interfaces: + Loopback0: + ipv4: 100.1.0.24/32 + ipv6: 2064:100::18/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.47/31 + ipv6: fc00::5e/126 + bp_interface: + ipv4: 10.10.246.24/24 + ipv6: fc0a::31/64 + + ARISTA09T0: + properties: + - common + - tor + tornum: 9 + bgp: + asn: 64009 + peers: + 65100: + - 10.0.0.48 + - FC00::61 + interfaces: + Loopback0: + ipv4: 100.1.0.25/32 + ipv6: 2064:100::19/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.49/31 + ipv6: fc00::62/126 + bp_interface: + ipv4: 10.10.246.25/24 + ipv6: fc0a::32/64 + + ARISTA10T0: + properties: + - common + - tor + tornum: 10 + bgp: + asn: 64010 + peers: + 65100: + - 10.0.0.50 + - FC00::65 + interfaces: + Loopback0: + ipv4: 100.1.0.26/32 + ipv6: 2064:100::1a/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.51/31 + ipv6: fc00::66/126 + bp_interface: + ipv4: 10.10.246.26/24 + ipv6: fc0a::35/64 + + ARISTA11T0: + properties: + - common + - tor + tornum: 11 + bgp: + asn: 64011 + peers: + 65100: + - 10.0.0.52 + - FC00::69 + interfaces: + Loopback0: + ipv4: 100.1.0.27/32 + ipv6: 2064:100::1b/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.53/31 + ipv6: fc00::6a/126 + bp_interface: + ipv4: 10.10.246.27/24 + ipv6: fc0a::36/64 + + ARISTA12T0: + properties: + - common + - tor + tornum: 12 + bgp: + asn: 64012 + peers: + 65100: + - 10.0.0.54 + - FC00::6D + interfaces: + Loopback0: + ipv4: 100.1.0.28/32 + ipv6: 2064:100::1c/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.55/31 + ipv6: fc00::6e/126 + bp_interface: + ipv4: 10.10.246.28/24 + ipv6: fc0a::39/64 + + ARISTA13T0: + properties: + - common + - tor + tornum: 13 + bgp: + asn: 64013 + peers: + 65100: + - 10.0.0.56 + - FC00::71 + interfaces: + Loopback0: + ipv4: 100.1.0.29/32 + ipv6: 2064:100::1d/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.57/31 + ipv6: fc00::72/126 + bp_interface: + ipv4: 10.10.246.29/24 + ipv6: fc0a::3a/64 + + ARISTA14T0: + properties: + - common + - tor + tornum: 14 + bgp: + asn: 64014 + peers: + 65100: + - 10.0.0.58 + - FC00::75 + interfaces: + Loopback0: + ipv4: 100.1.0.30/32 + ipv6: 2064:100::1e/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.59/31 + ipv6: fc00::76/126 + bp_interface: + ipv4: 10.10.246.30/24 + ipv6: fc0a::3d/64 + + ARISTA15T0: + properties: + - common + - tor + tornum: 15 + bgp: + asn: 64015 + peers: + 65100: + - 10.0.0.60 + - FC00::79 + interfaces: + Loopback0: + ipv4: 100.1.0.31/32 + ipv6: 2064:100::1f/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.61/31 + ipv6: fc00::7a/126 + bp_interface: + ipv4: 10.10.246.31/24 + ipv6: fc0a::3e/64 + + ARISTA16T0: + properties: + - common + - tor + tornum: 16 + bgp: + asn: 64016 + peers: + 65100: + - 10.0.0.62 + - FC00::7D + interfaces: + Loopback0: + ipv4: 100.1.0.32/32 + ipv6: 2064:100::20/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.63/31 + ipv6: fc00::7e/126 + bp_interface: + ipv4: 10.10.246.32/24 + ipv6: fc0a::41/64 + + ARISTA17T0: + properties: + - common + - tor + tornum: 17 + bgp: + asn: 64017 + peers: + 65100: + - 10.0.0.64 + - FC00::81 + interfaces: + Loopback0: + ipv4: 100.1.0.33/32 + ipv6: 2064:100::21/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.65/31 + ipv6: fc00::82/126 + bp_interface: + ipv4: 10.10.246.33/24 + ipv6: fc0a::43/64 + + ARISTA18T0: + properties: + - common + - tor + tornum: 18 + bgp: + asn: 64018 + peers: + 65100: + - 10.0.0.66 + - FC00::85 + interfaces: + Loopback0: + ipv4: 100.1.0.34/32 + ipv6: 2064:100::22/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.67/31 + ipv6: fc00::86/126 + bp_interface: + ipv4: 10.10.246.34/24 + ipv6: fc0a::45/64 + + ARISTA19T0: + properties: + - common + - tor + tornum: 19 + bgp: + asn: 64019 + peers: + 65100: + - 10.0.0.68 + - FC00::89 + interfaces: + Loopback0: + ipv4: 100.1.0.35/32 + ipv6: 2064:100::23/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.69/31 + ipv6: fc00::8a/126 + bp_interface: + ipv4: 10.10.246.35/24 + ipv6: fc0a::47/64 diff --git a/ansible/veos b/ansible/veos index 2a18fb4714c..4feebd7c7a3 100644 --- a/ansible/veos +++ b/ansible/veos @@ -108,4 +108,4 @@ server_1 server_2 [servers:vars] -topologies=['t1', 't1-lag', 't1-64-lag', 't0', 't0-16', 't0-56', 't0-52', 'ptf32', 'ptf64', 't0-64', 't0-64-32', 't0-116'] +topologies=['t1', 't1-lag', 't1-64-lag', 't1-64-lag-clet', 't0', 't0-16', 't0-56', 't0-52', 'ptf32', 'ptf64', 't0-64', 't0-64-32', 't0-116'] diff --git a/ansible/veos.vtb b/ansible/veos.vtb index 4ea5a7af40e..f44d28fd189 100644 --- a/ansible/veos.vtb +++ b/ansible/veos.vtb @@ -26,7 +26,7 @@ host_var_file=host_vars/STR-ACS-VSERV-01.yml server_1 [servers:vars] -topologies=['t1', 't1-lag', 't1-64-lag', 't0', 't0-16', 't0-56', 't0-52', 'ptf32', 'ptf64', 't0-64', 't0-64-32', 't0-116'] +topologies=['t1', 't1-lag', 't1-64-lag', 't1-64-lag-clet', 't0', 't0-16', 't0-56', 't0-52', 'ptf32', 'ptf64', 't0-64', 't0-64-32', 't0-116'] [sonic] vlab-01 ansible_host=10.250.0.101 type=kvm diff --git a/tests/veos.vtb b/tests/veos.vtb index 54527ea9b88..e063e55e116 100644 --- a/tests/veos.vtb +++ b/tests/veos.vtb @@ -26,7 +26,7 @@ host_var_file=host_vars/STR-ACS-VSERV-01.yml server_1 [servers:vars] -topologies=['t1', 't1-lag', 't1-64-lag', 't0', 't0-16', 't0-56', 't0-52', 'ptf32', 'ptf64', 't0-64', 't0-64-32', 't0-116'] +topologies=['t1', 't1-lag', 't1-64-lag', 't1-64-lag-clet', 't0', 't0-16', 't0-56', 't0-52', 'ptf32', 'ptf64', 't0-64', 't0-64-32', 't0-116'] [sonic] vlab-01 ansible_host=10.250.0.101 type=kvm hwsku=Force10-S6000 ansible_password=password ansible_user=admin From 7487c8aaac539bffe0f777ba27b2cb8900f8df80 Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Thu, 28 Nov 2019 16:51:15 -0800 Subject: [PATCH 152/218] [test behavior] disable allow_recover by default (#1241) Allow_recover knob controls the behavior of test infrastructure. When set to true, it allows a test to recover DUT in bad state before starting test. With SONiC stability increases, we should by default disable recovery, and surface more issues. If a test failed in the middle, it should be the test itself to exit with a healthy DUT. Signed-off-by: Ying Xie --- ansible/roles/test/tasks/test_sonic_by_testname.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/tasks/test_sonic_by_testname.yml b/ansible/roles/test/tasks/test_sonic_by_testname.yml index 882a968d052..140bb06ed74 100644 --- a/ansible/roles/test/tasks/test_sonic_by_testname.yml +++ b/ansible/roles/test/tasks/test_sonic_by_testname.yml @@ -7,7 +7,7 @@ - "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" - set_fact: - allow_recover: true + allow_recover: false when: allow_recover is not defined - name: do basic sanity check before each test From 65f40895da2176aec07eef4f794efba7d5b6380b Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Mon, 2 Dec 2019 14:05:33 +0800 Subject: [PATCH 153/218] [interface.yml] Raise the failure again in the rescue block (#1247) Fix issue https://github.com/Azure/sonic-mgmt/issues/1244 Signed-off-by: Xin Wang --- ansible/roles/test/tasks/interface.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/tasks/interface.yml b/ansible/roles/test/tasks/interface.yml index 90fb9134a77..a61ef3c1769 100644 --- a/ansible/roles/test/tasks/interface.yml +++ b/ansible/roles/test/tasks/interface.yml @@ -35,11 +35,13 @@ - block: - name: Verify interfaces are up correctly assert: { that: "{{ ansible_interface_link_down_ports | length }} == 0" } + rescue: - include: check_fanout_interfaces.yml vars: check_fanout: true - - debug: msg="Not all Interfaces are up" + + - fail: msg="Not all interfaces are up" - block: - name: Verify port channel interfaces are up correctly @@ -50,9 +52,12 @@ - include: check_sw_vm_interfaces.yml vars: check_vms: true - - debug: msg="Not all PortChannels are up '{{ portchannel_status['stdout_lines'] }}' " + + - debug: msg="PortChannel status '{{ portchannel_status['stdout_lines'] }}'" when: portchannel_status is defined + - fail: msg="Not all PortChannels are up" + - name: Verify VLAN interfaces are up correctly assert: { that: "'{{ ansible_interface_facts[item]['active'] }}' == 'True'" } with_items: "{{ minigraph_vlans.keys() }}" From ac4f7ed1fa1f73773474e2a97ec8e4aceb06cc07 Mon Sep 17 00:00:00 2001 From: Neetha John Date: Tue, 3 Dec 2019 19:02:39 -0800 Subject: [PATCH 154/218] Break up Pfcwd test into subtests (#1258) Enable each of the Pfcwd test (config, functional, timer, all storm) to be run as a subtest. Signed-off-by: Neetha John --- ansible/roles/test/tasks/pfc_wd.yml | 19 ++++++++++++++++ .../check_timer_accuracy_test.yml | 22 +++++++++++++++++++ .../functional_test/functional_test.yml | 11 ---------- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/ansible/roles/test/tasks/pfc_wd.yml b/ansible/roles/test/tasks/pfc_wd.yml index f13afc41807..030df4ebdc6 100644 --- a/ansible/roles/test/tasks/pfc_wd.yml +++ b/ansible/roles/test/tasks/pfc_wd.yml @@ -129,21 +129,40 @@ minigraph_vlan_interfaces: [] when: minigraph_vlan_interfaces is undefined + - name: Set timers + set_fact: + pfc_wd_detect_time: 400 + pfc_wd_restore_time: 400 + pfc_wd_restore_time_large: 3000 + pfc_wd_poll_time: 400 + + - name: Set polling interval {{ pfc_wd_poll_time }}. + shell: "pfcwd interval {{ pfc_wd_poll_time }}" + become: yes + - block: + - name: Clean up config + command: pfcwd stop + become: yes + - name: Test PFC WD configuration validation. vars: pfc_wd_template: roles/test/templates/pfc_wd_config.j2 include: roles/test/tasks/pfc_wd/config_test/config_test.yml + when: subtest is undefined or (subtest is defined and 'pfc_config' in subtest) - name: Test PFC WD Functional tests. include: roles/test/tasks/pfc_wd/functional_test/functional_test.yml with_dict: "{{select_test_ports}}" + when: subtest is undefined or (subtest is defined and 'pfc_functional' in subtest) - name: Test PFC WD Timer accuracy. include: roles/test/tasks/pfc_wd/functional_test/check_timer_accuracy_test.yml + when: subtest is undefined or (subtest is defined and 'pfc_timer' in subtest) - name: Test PFC WD extreme case when all ports have storm include: roles/test/tasks/pfc_wd/functional_test/storm_all_test.yml + when: subtest is undefined or (subtest is defined and 'pfc_all_port_storm' in subtest) - name: Set vlan members set_fact: diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/check_timer_accuracy_test.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/check_timer_accuracy_test.yml index cc0b03a582f..dc9a6d00f08 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/check_timer_accuracy_test.yml +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/check_timer_accuracy_test.yml @@ -1,6 +1,28 @@ # Verify timers - set_fact: pfc_wd_test_port: "{{test_ports.keys()[0]}}" + testname: functional_test + +- conn_graph_facts: host={{test_ports[pfc_wd_test_port]['peer_device']}} + connection: local + become: no + +- name: Prepare variables required for PFC test + set_fact: + pfc_gen_file: pfc_gen.py + pfc_queue_index: 4 + pfc_frames_number: 100000 + pfc_wd_test_pkt_count: 100 + pfc_fanout_interface: "{{neighbors[pfc_wd_test_port]['peerport']}}" + peer_hwsku: "{{device_info['HwSku']}}" + peer_mgmt: "{{device_info['mgmtip']}}" + peer_login: "{{switch_login[hwsku_map[device_info['HwSku']]]}}" + +- name: set pfc storm templates based on fanout platform sku + include: roles/test/tasks/pfc_wd/functional_test/set_pfc_storm_templates.yml + +- name: Deploy pfc packet generater file to fanout switch + include: roles/test/tasks/pfc_wd/functional_test/deploy_pfc_pktgen.yml - block: - name: Apply config with proper timers to {{ pfc_wd_test_port }}. diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test.yml b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test.yml index 5240886e50d..a76e194c4d9 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test.yml +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/functional_test.yml @@ -66,22 +66,11 @@ - name: set pfc storm templates based on fanout platform sku include: roles/test/tasks/pfc_wd/functional_test/set_pfc_storm_templates.yml -- name: Set timers - set_fact: - pfc_wd_detect_time: 400 - pfc_wd_restore_time: 400 - pfc_wd_restore_time_large: 3000 - pfc_wd_poll_time: 400 - - name: Set timers if user has specified set_fact: pfc_wd_restore_time_large: "{{restore_time}}" when: restore_time is defined -- name: Set polling interval {{ pfc_wd_poll_time }}. - shell: "pfcwd interval {{ pfc_wd_poll_time }}" - become: yes - - name: Set timers 2 set_fact: pfc_wait_for_restore_time: "{{(pfc_wd_restore_time_large | int / 1000 * 2) | int}}" From 47d98fec34ecb9ca3bda33233210e56bc7e3e792 Mon Sep 17 00:00:00 2001 From: Joe LeVeque Date: Mon, 9 Dec 2019 11:04:06 -0800 Subject: [PATCH 155/218] [201811][dhcp_relay] Add link state change tests (#1264) --- ansible/roles/test/tasks/dhcp_relay.yml | 99 +++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/ansible/roles/test/tasks/dhcp_relay.yml b/ansible/roles/test/tasks/dhcp_relay.yml index da1b5dbc682..51dada5d4af 100644 --- a/ansible/roles/test/tasks/dhcp_relay.yml +++ b/ansible/roles/test/tasks/dhcp_relay.yml @@ -51,3 +51,102 @@ - relay_iface_mac=\"{{ relay_iface_mac }}\" - relay_iface_netmask=\"{{ minigraph_vlan_interfaces[0]['mask'] }}\" ptf_extra_options: "--relax" + +- name: Bring all uplink interfaces down + shell: ifconfig {{ item.key }} down + with_dict: minigraph_portchannels + become: true + +- name: Pause to ensure uplinks are down + pause: + seconds: 10 + +- name: Bring all uplink interfaces up + shell: ifconfig {{ item.key }} up + with_dict: minigraph_portchannels + become: true + +- name: Pause to ensure uplinks are up + pause: + seconds: 10 + +# Run the DHCP relay PTF test +- include: ptf_runner.yml + vars: + ptf_test_name: DHCP Relay Test + ptf_test_dir: ptftests + ptf_test_path: dhcp_relay_test.DHCPTest + ptf_platform: remote + ptf_platform_dir: ptftests + ptf_test_params: + - hostname=\"{{ inventory_hostname }}\" + - client_port_index=\"{{ client_port_index }}\" + - client_iface_alias=\"{{ client_iface_alias }}\" + - leaf_port_indices=\"{{ leaf_port_indices }}\" + - num_dhcp_servers=\"{{ dhcp_servers | length }}\" + - server_ip=\"{{ dhcp_servers[0] }}\" + - relay_iface_ip=\"{{ minigraph_vlan_interfaces[0]['addr'] }}\" + - relay_iface_mac=\"{{ relay_iface_mac }}\" + - relay_iface_netmask=\"{{ minigraph_vlan_interfaces[0]['mask'] }}\" + ptf_extra_options: "--relax" + +- name: Stop DHCP relay service + become: true + service: + name: dhcp_relay + state: stopped + +- name: Bring all uplink interfaces down + shell: ifconfig {{ item.key }} down + with_dict: minigraph_portchannels + become: true + +- name: Pause to ensure uplinks are down + pause: + seconds: 10 + +- name: Start DHCP relay service with uplinks down + become: true + service: + name: dhcp_relay + state: restarted + +- name: Give the DHCP relay container time to start up + pause: + seconds: 30 + +- name: Bring all uplink interfaces up + shell: ifconfig {{ item.key }} up + with_dict: minigraph_portchannels + become: true + +- name: Pause to ensure uplinks are up + pause: + seconds: 10 + +# Run the DHCP relay PTF test +- include: ptf_runner.yml + vars: + ptf_test_name: DHCP Relay Test + ptf_test_dir: ptftests + ptf_test_path: dhcp_relay_test.DHCPTest + ptf_platform: remote + ptf_platform_dir: ptftests + ptf_test_params: + - hostname=\"{{ inventory_hostname }}\" + - client_port_index=\"{{ client_port_index }}\" + - client_iface_alias=\"{{ client_iface_alias }}\" + - leaf_port_indices=\"{{ leaf_port_indices }}\" + - num_dhcp_servers=\"{{ dhcp_servers | length }}\" + - server_ip=\"{{ dhcp_servers[0] }}\" + - relay_iface_ip=\"{{ minigraph_vlan_interfaces[0]['addr'] }}\" + - relay_iface_mac=\"{{ relay_iface_mac }}\" + - relay_iface_netmask=\"{{ minigraph_vlan_interfaces[0]['mask'] }}\" + ptf_extra_options: "--relax" + +- name: Restart DHCP relay service to ensure it is in a healthy state + become: true + service: + name: dhcp_relay + state: restarted + tags: always From 0c15a8d81474b66bf6334beb8ec7a09c917e86a9 Mon Sep 17 00:00:00 2001 From: Neetha John Date: Wed, 11 Dec 2019 14:37:09 -0800 Subject: [PATCH 156/218] Change snmp community string before minigraph deploy (#1276) Signed-off-by: Neetha John --- ansible/config_sonic_basedon_testbed.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ansible/config_sonic_basedon_testbed.yml b/ansible/config_sonic_basedon_testbed.yml index f07c64aa8f5..c157d2079e8 100644 --- a/ansible/config_sonic_basedon_testbed.yml +++ b/ansible/config_sonic_basedon_testbed.yml @@ -120,6 +120,13 @@ become: true when: apply_configlet is defined and apply_configlet|bool == true + - name: Replace snmp community string + lineinfile: + name: /etc/sonic/snmp.yml + regexp: '^snmp_rocommunity:' + line: 'snmp_rocommunity: {{ snmp_rocommunity }}' + become: true + - name: disable automatic minigraph update if we are deploying new minigraph into SONiC lineinfile: name: /etc/sonic/updategraph.conf From cb40a400f557113edca936f6739e4458cd501770 Mon Sep 17 00:00:00 2001 From: Renuka Manavalan <47282725+renukamanavalan@users.noreply.github.com> Date: Thu, 12 Dec 2019 10:54:28 -0800 Subject: [PATCH 157/218] Update topo_facts from configlet, if present. (#1270) * Update topo_facts from configlet, if present. * 1) Let any topology file for configlet has the toponame with suffix as "-clet" 2) For every configlet topo, there be a corresponding non-configlet version e.g. t1-64-lag & t1-64-lag-clet. The configlet version would be same as the other, except for few missing VMs that are added later through configlets. But for test data, like topo_facts, we need complete info, so we use the non-configlet version of the topo file. * Removed an unused code chunk and an import * Fixed few bugs. --- ansible/config_sonic_basedon_testbed.yml | 2 +- ansible/library/testbed_vm_info.py | 3 +++ ansible/library/topo_facts.py | 4 ++++ ansible/roles/test/files/acstests/acltb_test.py | 2 +- ansible/roles/test/templates/acltb.j2 | 2 +- ansible/vars/configlet/t1-64-lag-clet/apply_clet.sh | 2 +- .../t1-64-lag-clet/{clet-add_20T0.json => clet-add.json} | 0 7 files changed, 11 insertions(+), 4 deletions(-) rename ansible/vars/configlet/t1-64-lag-clet/{clet-add_20T0.json => clet-add.json} (100%) diff --git a/ansible/config_sonic_basedon_testbed.yml b/ansible/config_sonic_basedon_testbed.yml index c157d2079e8..b09a835ba0d 100644 --- a/ansible/config_sonic_basedon_testbed.yml +++ b/ansible/config_sonic_basedon_testbed.yml @@ -150,7 +150,7 @@ - name: execute configlet application script, which applies configlets in strict order. become: true - shell: bash -c "/etc/sonic/apply_clet.sh" + shell: bash "/etc/sonic/apply_clet.sh" when: apply_configlet is defined and apply_configlet|bool == true - name: execute cli "config save -y" to save current minigraph as startup-config diff --git a/ansible/library/testbed_vm_info.py b/ansible/library/testbed_vm_info.py index 8882fac84ce..9b32a062794 100644 --- a/ansible/library/testbed_vm_info.py +++ b/ansible/library/testbed_vm_info.py @@ -9,6 +9,7 @@ from operator import itemgetter from itertools import groupby from collections import defaultdict +import re DOCUMENTATION = ''' module: testbed_vm_info.py @@ -43,6 +44,8 @@ class TestbedVMFacts(): """ def __init__(self, toponame, vmbase): + CLET_SUFFIX = "-clet" + toponame = re.sub(CLET_SUFFIX + "$", "", toponame) self.topofile = TOPO_PATH+'topo_'+toponame +'.yml' self.start_index = int(re.findall('VM(\d+)', vmbase)[0]) self.vmhosts = {} diff --git a/ansible/library/topo_facts.py b/ansible/library/topo_facts.py index f6749668813..19afa1801d9 100644 --- a/ansible/library/topo_facts.py +++ b/ansible/library/topo_facts.py @@ -6,6 +6,7 @@ from operator import itemgetter from itertools import groupby import yaml +import re DOCUMENTATION = ''' module: topo_facts.py @@ -26,10 +27,13 @@ def __init__(self): self.vm_topo_config = {} def get_topo_config(self, topo_name): + CLET_SUFFIX = "-clet" + if 'ptf32' in topo_name: topo_name = 't1' if 'ptf64' in topo_name: topo_name = 't1-64' + topo_name = re.sub(CLET_SUFFIX + "$", "", topo_name) topo_filename = 'vars/topo_' + topo_name + '.yml' vm_topo_config = dict() diff --git a/ansible/roles/test/files/acstests/acltb_test.py b/ansible/roles/test/files/acstests/acltb_test.py index 148fb672cc2..bf5d1ecd3e5 100644 --- a/ansible/roles/test/files/acstests/acltb_test.py +++ b/ansible/roles/test/files/acstests/acltb_test.py @@ -290,7 +290,7 @@ def runTest(self): test_result = False self.switch_info = open(self.test_params["switch_info"], 'r').readlines() - if self.testbed_type in [ 't1', 't1-lag', 't1-64-lag' ]: + if self.testbed_type in [ 't1', 't1-lag', 't1-64-lag', 't1-64-lag-clet' ]: self.tor_ports = map(int, self.switch_info[0].rstrip(",\n").split(",")) self.spine_ports = map(int, self.switch_info[1].rstrip(",\n").split(",")) self.dest_ip_addr_spine = self.switch_info[2].strip() diff --git a/ansible/roles/test/templates/acltb.j2 b/ansible/roles/test/templates/acltb.j2 index b0865d53e7c..dedaebdc0f3 100644 --- a/ansible/roles/test/templates/acltb.j2 +++ b/ansible/roles/test/templates/acltb.j2 @@ -1,4 +1,4 @@ -{% if testbed_type == "t1" or testbed_type == "t1-lag" or testbed_type == "t1-64-lag" %} +{% if testbed_type == "t1" or testbed_type == "t1-lag" or testbed_type == "t1-64-lag" or testbed_type == "t1-64-lag-clet" %} {# tor ports #} {% for ifname, v in minigraph_neighbors.iteritems() %}{% if "T0" in v.name %}{{ '%d' % minigraph_port_indices[ifname] }},{% endif %}{% endfor %} diff --git a/ansible/vars/configlet/t1-64-lag-clet/apply_clet.sh b/ansible/vars/configlet/t1-64-lag-clet/apply_clet.sh index 99833f4438f..fa0d4a8481f 100755 --- a/ansible/vars/configlet/t1-64-lag-clet/apply_clet.sh +++ b/ansible/vars/configlet/t1-64-lag-clet/apply_clet.sh @@ -3,4 +3,4 @@ # Sleep to let all BGP sessions go up & running before adding a T0 sleep 1m /usr/bin/configlet -j /etc/sonic/clet-to_clear.json -d -/usr/bin/configlet -j /etc/sonic/clet-add_20T0.json -u +/usr/bin/configlet -j /etc/sonic/clet-add.json -u diff --git a/ansible/vars/configlet/t1-64-lag-clet/clet-add_20T0.json b/ansible/vars/configlet/t1-64-lag-clet/clet-add.json similarity index 100% rename from ansible/vars/configlet/t1-64-lag-clet/clet-add_20T0.json rename to ansible/vars/configlet/t1-64-lag-clet/clet-add.json From 018c76d6bace6b191635711b55a45a37d0f80783 Mon Sep 17 00:00:00 2001 From: yvolynets-mlnx <50697593+yvolynets-mlnx@users.noreply.github.com> Date: Mon, 16 Dec 2019 01:05:28 +0200 Subject: [PATCH 158/218] [pytest]: Extended FDB test with ARP verification (#1277) Extended existed pytest FDB test case by adding new packet types to send: ARP request and ARP reply. This scenarios are running as separate test cases. Signed-off-by: Yuriy Volynets --- tests/fdb/test_fdb.py | 69 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 62 insertions(+), 7 deletions(-) diff --git a/tests/fdb/test_fdb.py b/tests/fdb/test_fdb.py index b3c39873bff..4104b8b0ee9 100644 --- a/tests/fdb/test_fdb.py +++ b/tests/fdb/test_fdb.py @@ -12,6 +12,7 @@ DUMMY_MAC_PREFIX = "02:11:22:33" DUMMY_MAC_COUNT = 10 FDB_POPULATE_SLEEP_TIMEOUT = 2 +PKT_TYPES = ["ethernet", "arp_request", "arp_reply"] logger = logging.getLogger(__name__) @@ -34,6 +35,51 @@ def send_eth(ptfadapter, source_port, source_mac, dest_mac): testutils.send(ptfadapter, source_port, pkt) +def send_arp_request(ptfadapter, source_port, source_mac, dest_mac): + """ + send arp request packet + :param ptfadapter: PTF adapter object + :param source_port: source port + :param source_mac: source MAC + :param dest_mac: destination MAC + :return: + """ + pkt = testutils.simple_arp_packet(pktlen=60, + eth_dst='ff:ff:ff:ff:ff:ff', + eth_src=source_mac, + vlan_vid=0, + vlan_pcp=0, + arp_op=1, + ip_snd='10.10.1.3', + ip_tgt='10.10.1.2', + hw_snd=source_mac, + hw_tgt='ff:ff:ff:ff:ff:ff', + ) + logger.debug('send ARP request packet source port id {} smac: {} dmac: {}'.format(source_port, source_mac, dest_mac)) + testutils.send(ptfadapter, source_port, pkt) + + +def send_arp_reply(ptfadapter, source_port, source_mac, dest_mac): + """ + send arp reply packet + :param ptfadapter: PTF adapter object + :param source_port: source port + :param source_mac: source MAC + :param dest_mac: destination MAC + :return: + """ + pkt = testutils.simple_arp_packet(eth_dst=dest_mac, + eth_src=source_mac, + arp_op=2, + ip_snd='10.10.1.2', + ip_tgt='10.10.1.3', + hw_tgt=dest_mac, + hw_snd=source_mac, + ) + logger.debug('send ARP reply packet source port id {} smac: {} dmac: {}'.format(source_port, source_mac, dest_mac)) + testutils.send(ptfadapter, source_port, pkt) + + def send_recv_eth(ptfadapter, source_port, source_mac, dest_port, dest_mac): """ send ethernet packet and verify it on dest_port @@ -55,7 +101,7 @@ def send_recv_eth(ptfadapter, source_port, source_mac, dest_port, dest_mac): testutils.verify_packet_any_port(ptfadapter, pkt, [dest_port]) -def setup_fdb(ptfadapter, vlan_table, router_mac): +def setup_fdb(ptfadapter, vlan_table, router_mac, pkt_type): """ :param ptfadapter: PTF adapter object :param vlan_table: VLAN table map: VLAN subnet -> list of VLAN members @@ -64,6 +110,8 @@ def setup_fdb(ptfadapter, vlan_table, router_mac): fdb = {} + assert pkt_type in PKT_TYPES + for vlan in vlan_table: for member in vlan_table[vlan]: mac = ptfadapter.dataplane.get_mac(0, member) @@ -79,7 +127,14 @@ def setup_fdb(ptfadapter, vlan_table, router_mac): for i in range(DUMMY_MAC_COUNT)] for dummy_mac in dummy_macs: - send_eth(ptfadapter, member, dummy_mac, router_mac) + if pkt_type == "ethernet": + send_eth(ptfadapter, member, dummy_mac, router_mac) + elif pkt_type == "arp_request": + send_arp_request(ptfadapter, member, dummy_mac, router_mac) + elif pkt_type == "arp_reply": + send_arp_reply(ptfadapter, member, dummy_mac, router_mac) + else: + pytest.fail("Unknown option '{}'".format(pkt_type)) # put in set learned dummy MACs fdb[member].update(dummy_macs) @@ -102,7 +157,8 @@ def fdb_cleanup(ansible_adhoc, testbed): @pytest.mark.usefixtures('fdb_cleanup') -def test_fdb(ansible_adhoc, testbed, ptfadapter): +@pytest.mark.parametrize("pkt_type", PKT_TYPES) +def test_fdb(ansible_adhoc, testbed, ptfadapter, pkt_type, testbed_devices): """ 1. verify fdb forwarding in T0 topology. 2. verify show mac command on DUT for learned mac. @@ -111,8 +167,8 @@ def test_fdb(ansible_adhoc, testbed, ptfadapter): if testbed['topo'] not in ['t0', 't0-64', 't0-116']: pytest.skip('unsupported testbed type') - duthost = AnsibleHost(ansible_adhoc, testbed['dut']) - ptfhost = AnsibleHost(ansible_adhoc, testbed['ptf']) + duthost = testbed_devices["dut"] + ptfhost = testbed_devices["ptf"] host_facts = duthost.setup()['ansible_facts'] mg_facts = duthost.minigraph_facts(host=duthost.hostname)['ansible_facts'] @@ -133,8 +189,7 @@ def test_fdb(ansible_adhoc, testbed, ptfadapter): for ifname in mg_facts['minigraph_vlans'][vlan['attachto']]['members']: vlan_table[vlan['subnet']].append(mg_facts['minigraph_port_indices'][ifname]) - fdb = setup_fdb(ptfadapter, vlan_table, router_mac) - + fdb = setup_fdb(ptfadapter, vlan_table, router_mac, pkt_type) for vlan in vlan_table: for src, dst in itertools.combinations(vlan_table[vlan], 2): for src_mac, dst_mac in itertools.product(fdb[src], fdb[dst]): From 1639af88d8d35da46e20b678cba1bde1b5341db4 Mon Sep 17 00:00:00 2001 From: Joe LeVeque Date: Tue, 17 Dec 2019 18:00:49 -0800 Subject: [PATCH 159/218] [dhcp_relay] Increase duration to wait for interface state changes (#1292) --- ansible/roles/test/tasks/dhcp_relay.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ansible/roles/test/tasks/dhcp_relay.yml b/ansible/roles/test/tasks/dhcp_relay.yml index 51dada5d4af..3531e12678d 100644 --- a/ansible/roles/test/tasks/dhcp_relay.yml +++ b/ansible/roles/test/tasks/dhcp_relay.yml @@ -59,7 +59,7 @@ - name: Pause to ensure uplinks are down pause: - seconds: 10 + seconds: 20 - name: Bring all uplink interfaces up shell: ifconfig {{ item.key }} up @@ -68,7 +68,7 @@ - name: Pause to ensure uplinks are up pause: - seconds: 10 + seconds: 20 # Run the DHCP relay PTF test - include: ptf_runner.yml @@ -103,7 +103,7 @@ - name: Pause to ensure uplinks are down pause: - seconds: 10 + seconds: 20 - name: Start DHCP relay service with uplinks down become: true @@ -113,7 +113,7 @@ - name: Give the DHCP relay container time to start up pause: - seconds: 30 + seconds: 40 - name: Bring all uplink interfaces up shell: ifconfig {{ item.key }} up @@ -122,7 +122,7 @@ - name: Pause to ensure uplinks are up pause: - seconds: 10 + seconds: 20 # Run the DHCP relay PTF test - include: ptf_runner.yml From b4e4e7e1ffce9e9d0bab952d8767c632bcae6967 Mon Sep 17 00:00:00 2001 From: Renuka Manavalan <47282725+renukamanavalan@users.noreply.github.com> Date: Wed, 18 Dec 2019 09:08:02 -0800 Subject: [PATCH 160/218] Drop the external flag and instead check for clet file presence. (#1295) --- ansible/config_sonic_basedon_testbed.yml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/ansible/config_sonic_basedon_testbed.yml b/ansible/config_sonic_basedon_testbed.yml index b09a835ba0d..2a6ffeb2951 100644 --- a/ansible/config_sonic_basedon_testbed.yml +++ b/ansible/config_sonic_basedon_testbed.yml @@ -114,11 +114,21 @@ dest=/etc/sonic/minigraph.xml become: true - - name: Copy corresponding configlet files if apply_configlet=true + - name: Test if configlet script exist + stat: + path: vars/configlet/{{ topo }}/apply_clet.sh + register: stat_result + delegate_to: localhost + + - name: debug print stat_result + debug: + msg: Stat result is {{ stat_result }} + + - name: Copy corresponding configlet files if exist copy: src=vars/configlet/{{ topo }}/ dest=/etc/sonic/ become: true - when: apply_configlet is defined and apply_configlet|bool == true + when: stat_result.stat.exists is defined and stat_result.stat.exists - name: Replace snmp community string lineinfile: @@ -151,7 +161,7 @@ - name: execute configlet application script, which applies configlets in strict order. become: true shell: bash "/etc/sonic/apply_clet.sh" - when: apply_configlet is defined and apply_configlet|bool == true + when: stat_result.stat.exists is defined and stat_result.stat.exists - name: execute cli "config save -y" to save current minigraph as startup-config become: true From ac6da0f3f19505aac7a2aaca3949f7840ab84e4f Mon Sep 17 00:00:00 2001 From: Joe LeVeque Date: Wed, 18 Dec 2019 18:14:57 -0800 Subject: [PATCH 161/218] [201811] Make tests/ptftests a proper symlink (#1298) --- tests/ptftests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 120000 tests/ptftests diff --git a/tests/ptftests b/tests/ptftests deleted file mode 100644 index bc19c3dcc38..00000000000 --- a/tests/ptftests +++ /dev/null @@ -1 +0,0 @@ -../ansible/roles/test/files/ptftests \ No newline at end of file diff --git a/tests/ptftests b/tests/ptftests new file mode 120000 index 00000000000..18ac61a62be --- /dev/null +++ b/tests/ptftests @@ -0,0 +1 @@ +../ansible/roles/test/files/ptftests/ \ No newline at end of file From 02cc53752b4541ec230ee25fe9686feaa455c96e Mon Sep 17 00:00:00 2001 From: Kebo Liu Date: Mon, 23 Dec 2019 16:45:16 +0800 Subject: [PATCH 162/218] [Bug fix] Stop pmon sensord task in PSU test to avoid fail test case (#1220) * Add function to stop sensord before start PSU test. The motivation is to avoid sensord print out error msg during the PSU test, which will fail the test since log analyzer will detected these error msg. * use fixture to implement the setup and teardown * remove commented out code line * fix review comments * add more function description --- tests/platform/test_platform_info.py | 63 +++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/tests/platform/test_platform_info.py b/tests/platform/test_platform_info.py index 93c30ffd674..aca94770410 100644 --- a/tests/platform/test_platform_info.py +++ b/tests/platform/test_platform_info.py @@ -20,6 +20,67 @@ CMD_PLATFORM_SYSEEPROM = "show platform syseeprom" +def check_sensord_status(ans_host): + """ + @summary: Check sensord running status by analyzing the output of "ps -x" and return the PID if it's running + @return: first return value will be a bool, True to indicate task is running. + second return value is int PID, a none -1 value for a valid PID of sensord task + """ + running_status = False + sensord_pid = -1 + pmon_ps_output = ans_host.command("docker exec pmon ps -x") + for line in pmon_ps_output["stdout_lines"]: + key_value = line.split() + if "/usr/sbin/sensord" in key_value: + running_status = True + sensord_pid = int(key_value[0]) + break + + return running_status, sensord_pid + + +def stop_pmon_sensord_task(ans_host): + """ + @summary: Stop sensord task of pmon docker if it's running. + """ + sensord_running_status, sensord_pid = check_sensord_status(ans_host) + if sensord_running_status: + ans_host.command("docker exec pmon kill -SIGTERM {}".format(sensord_pid)) + + sensord_running_status, sensord_pid = check_sensord_status(ans_host) + if sensord_running_status: + assert False, "Failed to stop sensord task before test." + else: + logging.info("sensord stopped successfully") + + +@pytest.fixture(scope="module") +def psu_test_setup_teardown(testbed_devices): + """ + @summary: Sensord task will print out error msg when detect PSU offline, + which can cause log analyzer fail the test. So stop sensord task + before test and restart it after all test finished. + """ + logging.info("Starting psu test setup") + ans_host = testbed_devices["dut"] + stop_pmon_sensord_task(ans_host) + + yield + + logging.info("Starting psu test teardown") + sensord_running_status, sensord_pid = check_sensord_status(ans_host) + if not sensord_running_status: + ans_host.command("docker exec pmon supervisorctl restart lm-sensors") + time.sleep(3) + sensord_running_status, sensord_pid = check_sensord_status(ans_host) + if sensord_running_status: + logging.info("sensord task restarted, pid = {}".format(sensord_pid)) + else: + assert False, "Failed to restart sensord task after test." + else: + logging.info("sensord is running, pid = {}".format(sensord_pid)) + + def test_show_platform_summary(testbed_devices): """ @summary: Check output of 'show platform summary' @@ -71,7 +132,7 @@ def test_show_platform_psustatus(testbed_devices): check_vendor_specific_psustatus(ans_host, line) -def test_turn_on_off_psu_and_check_psustatus(testbed_devices, psu_controller): +def test_turn_on_off_psu_and_check_psustatus(testbed_devices, psu_controller, psu_test_setup_teardown): """ @summary: Turn off/on PSU and check PSU status using 'show platform psustatus' """ From 7f42bf98ebfecb665d146b56bfc34fdf5330dbf4 Mon Sep 17 00:00:00 2001 From: Neetha John Date: Mon, 16 Dec 2019 13:01:21 -0800 Subject: [PATCH 163/218] Break down warm boot sad into multiple testcases (#1285) Signed-off-by: Neetha John --- .../roles/test/tasks/warm-reboot-sad-bgp.yml | 22 +++++++++++++ .../test/tasks/warm-reboot-sad-lag-member.yml | 28 ++++++++++++++++ .../roles/test/tasks/warm-reboot-sad-lag.yml | 22 +++++++++++++ .../test/tasks/warm-reboot-sad-vlan-port.yml | 22 +++++++++++++ ansible/roles/test/vars/testcases.yml | 32 +++++++++++++++++++ 5 files changed, 126 insertions(+) create mode 100644 ansible/roles/test/tasks/warm-reboot-sad-bgp.yml create mode 100644 ansible/roles/test/tasks/warm-reboot-sad-lag-member.yml create mode 100644 ansible/roles/test/tasks/warm-reboot-sad-lag.yml create mode 100644 ansible/roles/test/tasks/warm-reboot-sad-vlan-port.yml diff --git a/ansible/roles/test/tasks/warm-reboot-sad-bgp.yml b/ansible/roles/test/tasks/warm-reboot-sad-bgp.yml new file mode 100644 index 00000000000..0d514761604 --- /dev/null +++ b/ansible/roles/test/tasks/warm-reboot-sad-bgp.yml @@ -0,0 +1,22 @@ +- name: set default reboot_limit in seconds + set_fact: + reboot_limit: 1 + when: reboot_limit is not defined + +# preboot_list format is 'preboot oper type:number of VMS down:number of lag members down'. for non lag member cases, this parameter will be skipped +- name: Set vars + set_fact: + pre_list: ['neigh_bgp_down:2', 'dut_bgp_down:3'] + +- name: set default values vnet variables + set_fact: + vnet: False + vnet_pkts: '' + when: (vnet is not defined) or (vnet_pkts is not defined) + +- name: Warm-reboot test + include_tasks: advanced-reboot.yml + vars: + reboot_type: warm-reboot + preboot_list: "{{ pre_list }}" + preboot_files: "peer_dev_info,neigh_port_info" diff --git a/ansible/roles/test/tasks/warm-reboot-sad-lag-member.yml b/ansible/roles/test/tasks/warm-reboot-sad-lag-member.yml new file mode 100644 index 00000000000..653a19e41e4 --- /dev/null +++ b/ansible/roles/test/tasks/warm-reboot-sad-lag-member.yml @@ -0,0 +1,28 @@ +- name: set default reboot_limit in seconds + set_fact: + reboot_limit: 1 + when: reboot_limit is not defined + +# preboot_list format is 'preboot oper type:number of VMS down:number of lag members down'. for non lag member cases, this parameter will be skipped +- name: Set vars + set_fact: + pre_list: ['dut_lag_member_down:3:1', 'neigh_lag_member_down:2:1'] + lag_memb_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}" + +- name: Add all lag member down case + set_fact: + pre_list: "{{ pre_list + ['dut_lag_member_down:2:{{ lag_memb_cnt }}', 'neigh_lag_member_down:3:{{ lag_memb_cnt }}']}}" + when: testbed_type in ['t0-64', 't0-116', 't0-64-32'] + +- name: set default values vnet variables + set_fact: + vnet: False + vnet_pkts: '' + when: (vnet is not defined) or (vnet_pkts is not defined) + +- name: Warm-reboot test + include_tasks: advanced-reboot.yml + vars: + reboot_type: warm-reboot + preboot_list: "{{ pre_list }}" + preboot_files: "peer_dev_info,neigh_port_info" diff --git a/ansible/roles/test/tasks/warm-reboot-sad-lag.yml b/ansible/roles/test/tasks/warm-reboot-sad-lag.yml new file mode 100644 index 00000000000..24e443d48fb --- /dev/null +++ b/ansible/roles/test/tasks/warm-reboot-sad-lag.yml @@ -0,0 +1,22 @@ +- name: set default reboot_limit in seconds + set_fact: + reboot_limit: 1 + when: reboot_limit is not defined + +# preboot_list format is 'preboot oper type:number of VMS down:number of lag members down'. for non lag member cases, this parameter will be skipped +- name: Set vars + set_fact: + pre_list: ['dut_lag_down:2', 'neigh_lag_down:3'] + +- name: set default values vnet variables + set_fact: + vnet: False + vnet_pkts: '' + when: (vnet is not defined) or (vnet_pkts is not defined) + +- name: Warm-reboot test + include_tasks: advanced-reboot.yml + vars: + reboot_type: warm-reboot + preboot_list: "{{ pre_list }}" + preboot_files: "peer_dev_info,neigh_port_info" diff --git a/ansible/roles/test/tasks/warm-reboot-sad-vlan-port.yml b/ansible/roles/test/tasks/warm-reboot-sad-vlan-port.yml new file mode 100644 index 00000000000..7e2e773a86c --- /dev/null +++ b/ansible/roles/test/tasks/warm-reboot-sad-vlan-port.yml @@ -0,0 +1,22 @@ +- name: set default reboot_limit in seconds + set_fact: + reboot_limit: 1 + when: reboot_limit is not defined + +# preboot_list format is 'preboot oper type:number of VMS down:number of lag members down'. for non lag member cases, this parameter will be skipped +- name: Set vars + set_fact: + pre_list: ['vlan_port_down:4'] + +- name: set default values vnet variables + set_fact: + vnet: False + vnet_pkts: '' + when: (vnet is not defined) or (vnet_pkts is not defined) + +- name: Warm-reboot test + include_tasks: advanced-reboot.yml + vars: + reboot_type: warm-reboot + preboot_list: "{{ pre_list }}" + preboot_files: "peer_dev_info,neigh_port_info" diff --git a/ansible/roles/test/vars/testcases.yml b/ansible/roles/test/vars/testcases.yml index 5f9aa5bae45..f3af36b2beb 100644 --- a/ansible/roles/test/vars/testcases.yml +++ b/ansible/roles/test/vars/testcases.yml @@ -108,6 +108,38 @@ testcases: ptf_host: vm_hosts: + warm-reboot-sad-bgp: + filename: warm-reboot-sad-bgp.yml + vtestbed_compatible: no + topologies: [t0, t0-64, t0-64-32, t0-116, t0-56] + required_vars: + ptf_host: + vm_hosts: + + warm-reboot-sad-lag: + filename: warm-reboot-sad-lag.yml + vtestbed_compatible: no + topologies: [t0, t0-64, t0-64-32, t0-116, t0-56] + required_vars: + ptf_host: + vm_hosts: + + warm-reboot-sad-lag-member: + filename: warm-reboot-sad-lag-member.yml + vtestbed_compatible: no + topologies: [t0, t0-64, t0-64-32, t0-116, t0-56] + required_vars: + ptf_host: + vm_hosts: + + warm-reboot-sad-vlan-port: + filename: warm-reboot-sad-vlan-port.yml + vtestbed_compatible: no + topologies: [t0, t0-64, t0-64-32, t0-116, t0-56] + required_vars: + ptf_host: + vm_hosts: + fib: filename: simple-fib.yml topologies: [t0, t0-16, t0-52, t0-56, t0-64, t0-64-32, t0-116, t1, t1-lag, t1-64-lag, t1-64-lag-clet] From 6e61b2c206e919fdfa74aafdb2dc1374238dcc97 Mon Sep 17 00:00:00 2001 From: Sujin Kang Date: Mon, 23 Dec 2019 12:22:42 -0800 Subject: [PATCH 164/218] Add HW Sku for platform info pytest (#1301) --- tests/common/mellanox_data.py | 50 ++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/tests/common/mellanox_data.py b/tests/common/mellanox_data.py index 65b25cabe9a..c54a52ba2aa 100644 --- a/tests/common/mellanox_data.py +++ b/tests/common/mellanox_data.py @@ -1,5 +1,5 @@ -SPC1_HWSKUS = ["ACS-MSN2700", "Mellanox-SN2700", "ACS-MSN2740", "ACS-MSN2100", "ACS-MSN2410", "ACS-MSN2010"] +SPC1_HWSKUS = ["ACS-MSN2700", "Mellanox-SN2700", "Mellanox-SN2700-D48C8", "ACS-MSN2740", "ACS-MSN2100", "ACS-MSN2410", "ACS-MSN2010"] SPC2_HWSKUS = ["ACS-MSN3700", "ACS-MSN3700C", "ACS-MSN3800"] SWITCH_HWSKUS = SPC1_HWSKUS + SPC2_HWSKUS @@ -28,6 +28,54 @@ "number": 32 } }, + "Mellanox-SN2700": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": True + }, + "fans": { + "number": 4, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + }, + "cpu_pack": { + "number": 1 + }, + "cpu_cores": { + "number": 2 + }, + "ports": { + "number": 32 + } + }, + "Mellanox-SN2700-D48C8": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": True + }, + "fans": { + "number": 4, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + }, + "cpu_pack": { + "number": 1 + }, + "cpu_cores": { + "number": 2 + }, + "ports": { + "number": 32 + } + }, "ACS-MSN2740": { "reboot": { "cold_reboot": True, From dbd70c7ad4cd4a1260be13283affe88f09cc94c5 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Thu, 9 Jan 2020 01:38:17 +0800 Subject: [PATCH 165/218] [fanout] Deploy Mellanox fanout using non-root account (#1212) (#1315) The Mellanox ONYX fanout switch can be deployed using the normal admin account rather than the root account which is not always available to users. This change removed the dependency of using the root account. Signed-off-by: Xin Wang --- ansible/group_vars/fanout/secrets.yml | 2 ++ ansible/roles/fanout/tasks/fanout_mlnx.yml | 20 ++++------- .../fanout/tasks/mlnx/deploy_pfcwd_fanout.yml | 33 +++++++------------ .../tasks/mlnx/download_copy_pfcwd_fanout.yml | 21 ++++++++++++ ansible/roles/fanout/tasks/mlnx/scp_copy.yml | 6 ++++ 5 files changed, 47 insertions(+), 35 deletions(-) create mode 100644 ansible/roles/fanout/tasks/mlnx/download_copy_pfcwd_fanout.yml create mode 100644 ansible/roles/fanout/tasks/mlnx/scp_copy.yml diff --git a/ansible/group_vars/fanout/secrets.yml b/ansible/group_vars/fanout/secrets.yml index 8434ea5d82c..49a12b8b79a 100644 --- a/ansible/group_vars/fanout/secrets.yml +++ b/ansible/group_vars/fanout/secrets.yml @@ -1,4 +1,6 @@ ansible_ssh_user: user ansible_ssh_pass: password +fanout_mlnx_user: admin +fanout_mlnx_password: admin fanout_sonic_user: admin fanout_sonic_password: password diff --git a/ansible/roles/fanout/tasks/fanout_mlnx.yml b/ansible/roles/fanout/tasks/fanout_mlnx.yml index 4e2de20b6f1..6966614a1d0 100644 --- a/ansible/roles/fanout/tasks/fanout_mlnx.yml +++ b/ansible/roles/fanout/tasks/fanout_mlnx.yml @@ -1,8 +1,8 @@ ############################################################################################## -### playbook to deploy the fanout swtich +### playbook to deploy the fanout switch ### Use this playbook to deploy the VLAN configurations of fanout leaf switch in SONiC testbed -### This playbook will run based on hardware flatform. Each fanout switch hardware type has its -### own unique feature and configuration command or format. Unless you use the hardware swtich +### This playbook will run based on hardware platform. Each fanout switch hardware type has its +### own unique feature and configuration command or format. Unless you use the hardware switch ### specified in this playbook, you would need to come up with your own fanout switch deployment ### playbook ################################################################################################ @@ -13,19 +13,13 @@ tags: always - name: prepare fanout switch admin login info - set_fact: ansible_ssh_user={{ fanout_admin_user }} ansible_ssh_pass={{ fanout_admin_password }} peer_hwsku={{device_info['HwSku']}} + set_fact: ansible_ssh_user={{ fanout_mlnx_user }} ansible_ssh_pass={{ fanout_mlnx_password }} peer_hwsku={{device_info['HwSku']}} tags: always -- set_fact: - fanout_root_user: "user" - fanout_root_pass: "password" - tags: deploy,pfcwd_config,check_pfcwd_config - - ########################################################## - # deploy tasks to deploy default configuration on fanout # - ########################################################## +########################################################## +# deploy tasks to deploy default configuration on fanout # +########################################################## - block: - - debug: msg={{ inventory_hostname }} - name: build fanout startup config for fanout mlnx-os-switch action: apswitch template=mlnx_fanout.j2 connection: switch diff --git a/ansible/roles/fanout/tasks/mlnx/deploy_pfcwd_fanout.yml b/ansible/roles/fanout/tasks/mlnx/deploy_pfcwd_fanout.yml index caedf235adf..461d15fd93f 100644 --- a/ansible/roles/fanout/tasks/mlnx/deploy_pfcwd_fanout.yml +++ b/ansible/roles/fanout/tasks/mlnx/deploy_pfcwd_fanout.yml @@ -1,17 +1,15 @@ ############################################################################################## -### sub-playbook to deploy the docker images needed for the pfcwd test to fanout swtich +### sub-playbook to deploy the docker images needed for the pfcwd test to fanout switch ### to run separately: ### ansible-playbook -i lab fanout.yml -l ${FANOUT} --become --tags pfcwd_config -vvvv ### Optionally "-e pfcwd_dockers_url=" can be specified to fetch dockers without ### building them. This is useful to save time or run task in sonic-mgmt docker. ### E.g. -### ansible-playbook -i lab fanout.yml -l ${FANOUT} -e pfcwd_dockers_url=http://arc-build-server/sonic/ --become --tags pfcwd_config -vvvv +### ansible-playbook -i lab fanout.yml -l ${FANOUT} -e pfcwd_dockers_url=http://some-http-server/pfcwd-dockers/ --become --tags pfcwd_config -vvvv ################################################################################################ -- set_fact: - fanout_addr: "{{device_info['mgmtip']}}" - ansible_ssh_user: "{{fanout_root_user}}" - ansible_ssh_pass: "{{fanout_root_pass}}" +- name: Define variables for deployment of pfcwd dockers on fanout + set_fact: pfcwd_dockers: "['roles/test/files/mlnx/docker-tests-pfcgen/pfc_storm.tgz']" fanout_img_path: "/var/opt/tms/images/" @@ -23,29 +21,20 @@ delegate_to: localhost when: pfcwd_dockers_url is not defined -- name: Copy test match and ignore files to switch - copy: - src: "{{ item }}" - dest: "{{fanout_img_path}}" +- name: Copy pfcwd docker images to switch + include: scp_copy.yml + vars: + src: "{{ item }}" + dest: "{{ fanout_img_path }}" with_items: pfcwd_dockers when: pfcwd_dockers_url is not defined - name: Download pre-built pfcwd dockers if path specified - get_url: url={{pfcwd_dockers_url}}{{item | basename}} dest={{fanout_img_path}}/{{item | basename}} + include: download_copy_pfcwd_fanout.yml with_items: pfcwd_dockers + delegate_to: localhost when: pfcwd_dockers_url is defined -- block: - - name: Mount FS to read-write - command: mount -o remount, rw / - - - name: Update storage driver for Docker - command: 'sed -i s/\"storage-driver\":\ \"vfs\",/\"storage-driver\":\ \"devicemapper\",\\n\ \ \ \ \"storage-opts\":\ [\\n\ \ \ \ \ \ \ \ \"dm.fs=ext4\"\\n\ \ \ \ ],\/g /opt/tms/bin/docker_config.json' - - always: - - name: Remount FS back to read-only - command: mount -r -o remount / - - name: Load and start dockers action: apswitch template=mlnx_deploy_pfcwd_fanout.j2 connection: switch diff --git a/ansible/roles/fanout/tasks/mlnx/download_copy_pfcwd_fanout.yml b/ansible/roles/fanout/tasks/mlnx/download_copy_pfcwd_fanout.yml new file mode 100644 index 00000000000..f5695a651a4 --- /dev/null +++ b/ansible/roles/fanout/tasks/mlnx/download_copy_pfcwd_fanout.yml @@ -0,0 +1,21 @@ +- block: + + - name: Get timestamp + set_fact: timestamp="{{lookup('pipe','date +%Y%m%d%H%M%S')}}" + + - name: Get temporary filename + set_fact: filename="/tmp/pfcwd_docker_{{ timestamp }}" + + - name: Download pre-built pfcwd docker image + get_url: url={{ pfcwd_dockers_url }}/{{ item | basename }} dest={{ filename }} + + - name: Copy the downloaded pfcwd docker image to switch + include: scp_copy.yml + vars: + src: "{{ filename }}" + dest: "{{ fanout_img_path }}/{{ item | basename }}" + + always: + + - name: Remove the downloaded pfcwd docker image + file: path={{ filename }} state=absent diff --git a/ansible/roles/fanout/tasks/mlnx/scp_copy.yml b/ansible/roles/fanout/tasks/mlnx/scp_copy.yml new file mode 100644 index 00000000000..fabbc42eca2 --- /dev/null +++ b/ansible/roles/fanout/tasks/mlnx/scp_copy.yml @@ -0,0 +1,6 @@ +- name: Use scp to copy local file to remote host + command: "{{ scp_cmd }} {{ scp_params }} {{ src }} {{ ansible_ssh_user }}@{{ ansible_host }}:{{ dest }}" + vars: + scp_cmd: "sshpass -p {{ ansible_ssh_pass }} scp" + scp_params: "-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" + delegate_to: localhost From 0a15d4f255bb95a25baa999f0540dd37ba79916b Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Wed, 15 Jan 2020 20:38:57 +0800 Subject: [PATCH 166/218] Fix platform PSU consistency checking issue (#1325) The PSU consistency checking logic assumes that all PSUs are OK. When one of the PSU is not powered, the testing will fail unnecessarily. This change fixed this issue and also improved the function check_vendor_specific_psustatus to have better code reuse. Signed-off-by: Xin Wang --- tests/platform/mellanox/check_sysfs.py | 12 +++++++++++- tests/platform/test_platform_info.py | 20 +++++++++----------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/tests/platform/mellanox/check_sysfs.py b/tests/platform/mellanox/check_sysfs.py index 72e9f365d43..eef50fcbcea 100644 --- a/tests/platform/mellanox/check_sysfs.py +++ b/tests/platform/mellanox/check_sysfs.py @@ -5,6 +5,7 @@ """ import logging import json +import re def check_sysfs_broken_symbolinks(dut): @@ -168,8 +169,17 @@ def check_sysfs_psu(dut): from common.mellanox_data import SWITCH_MODELS psu_count = SWITCH_MODELS[dut.facts["hwsku"]]["psus"]["number"] + CMD_PLATFORM_PSUSTATUS = "show platform psustatus" + logging.info("Get PSU status using '%s', hostname: %s" % (CMD_PLATFORM_PSUSTATUS, dut.hostname)) + psu_status = dut.command(CMD_PLATFORM_PSUSTATUS) + psu_status_lines = psu_status["stdout_lines"][2:] + assert len(psu_status_lines) == psu_count, "PSU status output does not match PSU count" + + psu_line_pattern = re.compile(r"PSU\s+\d+\s+(OK|NOT OK|NOT PRESENT)") for psu_id in range(1, psu_count + 1): - check_psu_status_sysfs_consistency(dut, psu_id, 'OK') + psu_status_line = psu_status_lines[psu_id - 1] + psu_state = psu_line_pattern.match(psu_status_line).group(1) + check_psu_status_sysfs_consistency(dut, psu_id, psu_state) def check_sysfs_qsfp(dut, interfaces): diff --git a/tests/platform/test_platform_info.py b/tests/platform/test_platform_info.py index aca94770410..b4ef57352cd 100644 --- a/tests/platform/test_platform_info.py +++ b/tests/platform/test_platform_info.py @@ -54,7 +54,7 @@ def stop_pmon_sensord_task(ans_host): logging.info("sensord stopped successfully") -@pytest.fixture(scope="module") +@pytest.fixture(scope="function") def psu_test_setup_teardown(testbed_devices): """ @summary: Sensord task will print out error msg when detect PSU offline, @@ -100,7 +100,7 @@ def test_show_platform_summary(testbed_devices): "Unexpected output fields, actual=%s, expected=%s" % (str(actual_fields), str(expected_fields)) -def check_vendor_specific_psustatus(dut, psu_status_line): +def check_vendor_specific_psustatus(dut): """ @summary: Vendor specific psu status check """ @@ -109,14 +109,10 @@ def check_vendor_specific_psustatus(dut, psu_status_line): sub_folder_dir = os.path.join(current_file_dir, "mellanox") if sub_folder_dir not in sys.path: sys.path.append(sub_folder_dir) - from check_sysfs import check_psu_status_sysfs_consistency + from check_sysfs import check_sysfs_psu - psu_line_pattern = re.compile(r"PSU\s+(\d)+\s+(OK|NOT OK|NOT PRESENT)") - psu_match = psu_line_pattern.match(psu_status_line) - psu_id = psu_match.group(1) - psu_status = psu_match.group(2) + check_sysfs_psu(dut) - check_psu_status_sysfs_consistency(dut, psu_id, psu_status) def test_show_platform_psustatus(testbed_devices): """ @@ -129,7 +125,8 @@ def test_show_platform_psustatus(testbed_devices): psu_line_pattern = re.compile(r"PSU\s+\d+\s+(OK|NOT OK|NOT PRESENT)") for line in psu_status["stdout_lines"][2:]: assert psu_line_pattern.match(line), "Unexpected PSU status output" - check_vendor_specific_psustatus(ans_host, line) + + check_vendor_specific_psustatus(ans_host) def test_turn_on_off_psu_and_check_psustatus(testbed_devices, psu_controller, psu_test_setup_teardown): @@ -190,7 +187,8 @@ def test_turn_on_off_psu_and_check_psustatus(testbed_devices, psu_controller, ps fields = line.split() if fields[2] != "OK": psu_under_test = fields[1] - check_vendor_specific_psustatus(ans_host, line) + + check_vendor_specific_psustatus(ans_host) assert psu_under_test is not None, "No PSU is turned off" logging.info("Turn on PSU %s" % str(psu["psu_id"])) @@ -203,8 +201,8 @@ def test_turn_on_off_psu_and_check_psustatus(testbed_devices, psu_controller, ps fields = line.split() if fields[1] == psu_under_test: assert fields[2] == "OK", "Unexpected PSU status after turned it on" - check_vendor_specific_psustatus(ans_host, line) + check_vendor_specific_psustatus(ans_host) psu_test_results[psu_under_test] = True for psu in psu_test_results: From b9024d6f7ff2015ac749a6c47fa67994ac3f28e3 Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Fri, 17 Jan 2020 09:01:01 +0200 Subject: [PATCH 167/218] [advanced-reboot] assert on total_disrupt_time (#1308) We missed a bug causing total_disrupt_time to be > 1 sec. This is because the test asserts longest_disrupt_time < 1 sec. However, there might be a lot of small periods of time which are < 1 sec which result in total big downtime. The fix is to assert also on total_disrupt_time in warm reboot test. For fast reboot the test does not calculate total_disrupt_time because it is assumed the lognest_disrupt_time == total_disrupt_time in case of fast reboot. Signed-off-by: Stepan Blyschak --- .../test/files/ptftests/advanced-reboot.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 9cd5c982c2e..69b712cc695 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -858,7 +858,7 @@ def wait_for_ssh_threads(): no_cp_replies = self.extract_no_cpu_replies(upper_replies) if no_routing_stop - no_routing_start > self.limit: - self.fails['dut'].add("Downtime must be less then %s seconds. It was %s" \ + self.fails['dut'].add("Longest downtime period must be less then %s seconds. It was %s" \ % (self.test_params['reboot_limit_in_seconds'], str(no_routing_stop - no_routing_start))) if no_routing_stop - self.reboot_start > datetime.timedelta(seconds=self.test_params['graceful_limit']): self.fails['dut'].add("%s cycle must be less than graceful limit %s seconds" % (self.reboot_type, self.test_params['graceful_limit'])) @@ -866,6 +866,10 @@ def wait_for_ssh_threads(): self.fails['dut'].add("Dataplane didn't route to all servers, when control-plane was down: %d vs %d" % (no_cp_replies, self.nr_vl_pkts)) if self.reboot_type == 'warm-reboot': + if self.total_disrupt_time > self.limit.total_seconds(): + self.fails['dut'].add("Total downtime period must be less then %s seconds. It was %s" \ + % (str(self.limit), str(self.total_disrupt_time))) + # after the data plane is up, check for routing changes if self.test_params['inboot_oper'] and self.sad_handle: self.check_inboot_sad_status() @@ -921,7 +925,7 @@ def wait_for_ssh_threads(): self.log("-"*50) if no_routing_stop: - self.log("Downtime was %s" % str(no_routing_stop - no_routing_start)) + self.log("Longest downtime period was %s" % str(no_routing_stop - no_routing_start)) reboot_time = "0:00:00" if routing_always else str(no_routing_stop - self.reboot_start) self.log("Reboot time was %s" % reboot_time) self.log("Expected downtime is less then %s" % self.limit) @@ -1203,13 +1207,13 @@ def examine_flow(self, filename = None): self.fails['dut'].add("Sniffer failed to filter any traffic from DUT") self.assertTrue(received_counter, "Sniffer failed to filter any traffic from DUT") self.fails['dut'].clear() + self.disrupts_count = len(self.lost_packets) # Total disrupt counter. + # Find the longest loss with the longest time: + max_disrupt_from_id, (self.max_lost_id, self.max_disrupt_time, self.no_routing_start, self.no_routing_stop) = \ + max(self.lost_packets.items(), key = lambda item:item[1][0:2]) + self.total_disrupt_packets = sum([item[0] for item in self.lost_packets.values()]) + self.total_disrupt_time = sum([item[1] for item in self.lost_packets.values()]) if self.lost_packets: - self.disrupts_count = len(self.lost_packets) # Total disrupt counter. - # Find the longest loss with the longest time: - max_disrupt_from_id, (self.max_lost_id, self.max_disrupt_time, self.no_routing_start, self.no_routing_stop) = \ - max(self.lost_packets.items(), key = lambda item:item[1][0:2]) - self.total_disrupt_packets = sum([item[0] for item in self.lost_packets.values()]) - self.total_disrupt_time = sum([item[1] for item in self.lost_packets.values()]) self.log("Disruptions happen between %s and %s after the reboot." % \ (str(self.disruption_start - self.reboot_start), str(self.disruption_stop - self.reboot_start))) else: From 6e60b310ea41f4e226884c1faba8dd42d3728f8b Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Sun, 19 Jan 2020 15:48:31 -0800 Subject: [PATCH 168/218] Revert "[advanced-reboot] assert on total_disrupt_time (#1308)" (#1346) This reverts commit c13bf808739f84cd368499f7d3920f225f2b39a4. --- .../test/files/ptftests/advanced-reboot.py | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 69b712cc695..9cd5c982c2e 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -858,7 +858,7 @@ def wait_for_ssh_threads(): no_cp_replies = self.extract_no_cpu_replies(upper_replies) if no_routing_stop - no_routing_start > self.limit: - self.fails['dut'].add("Longest downtime period must be less then %s seconds. It was %s" \ + self.fails['dut'].add("Downtime must be less then %s seconds. It was %s" \ % (self.test_params['reboot_limit_in_seconds'], str(no_routing_stop - no_routing_start))) if no_routing_stop - self.reboot_start > datetime.timedelta(seconds=self.test_params['graceful_limit']): self.fails['dut'].add("%s cycle must be less than graceful limit %s seconds" % (self.reboot_type, self.test_params['graceful_limit'])) @@ -866,10 +866,6 @@ def wait_for_ssh_threads(): self.fails['dut'].add("Dataplane didn't route to all servers, when control-plane was down: %d vs %d" % (no_cp_replies, self.nr_vl_pkts)) if self.reboot_type == 'warm-reboot': - if self.total_disrupt_time > self.limit.total_seconds(): - self.fails['dut'].add("Total downtime period must be less then %s seconds. It was %s" \ - % (str(self.limit), str(self.total_disrupt_time))) - # after the data plane is up, check for routing changes if self.test_params['inboot_oper'] and self.sad_handle: self.check_inboot_sad_status() @@ -925,7 +921,7 @@ def wait_for_ssh_threads(): self.log("-"*50) if no_routing_stop: - self.log("Longest downtime period was %s" % str(no_routing_stop - no_routing_start)) + self.log("Downtime was %s" % str(no_routing_stop - no_routing_start)) reboot_time = "0:00:00" if routing_always else str(no_routing_stop - self.reboot_start) self.log("Reboot time was %s" % reboot_time) self.log("Expected downtime is less then %s" % self.limit) @@ -1207,13 +1203,13 @@ def examine_flow(self, filename = None): self.fails['dut'].add("Sniffer failed to filter any traffic from DUT") self.assertTrue(received_counter, "Sniffer failed to filter any traffic from DUT") self.fails['dut'].clear() - self.disrupts_count = len(self.lost_packets) # Total disrupt counter. - # Find the longest loss with the longest time: - max_disrupt_from_id, (self.max_lost_id, self.max_disrupt_time, self.no_routing_start, self.no_routing_stop) = \ - max(self.lost_packets.items(), key = lambda item:item[1][0:2]) - self.total_disrupt_packets = sum([item[0] for item in self.lost_packets.values()]) - self.total_disrupt_time = sum([item[1] for item in self.lost_packets.values()]) if self.lost_packets: + self.disrupts_count = len(self.lost_packets) # Total disrupt counter. + # Find the longest loss with the longest time: + max_disrupt_from_id, (self.max_lost_id, self.max_disrupt_time, self.no_routing_start, self.no_routing_stop) = \ + max(self.lost_packets.items(), key = lambda item:item[1][0:2]) + self.total_disrupt_packets = sum([item[0] for item in self.lost_packets.values()]) + self.total_disrupt_time = sum([item[1] for item in self.lost_packets.values()]) self.log("Disruptions happen between %s and %s after the reboot." % \ (str(self.disruption_start - self.reboot_start), str(self.disruption_stop - self.reboot_start))) else: From 346fc34ca867d66dfd6b1090eed3d99864aced3e Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Thu, 23 Jan 2020 20:06:21 +0200 Subject: [PATCH 169/218] [mellanox][pfcwd] ignore next_hop_cnt deprecate message (#1355) --- .../test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages b/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages index 00a801add90..f7f3db8b081 100644 --- a/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages +++ b/ansible/roles/test/tasks/pfc_wd/functional_test/ignore_pfc_wd_messages @@ -9,4 +9,5 @@ r, ".* SAI_STATUS_BUFFER_OVERFLOW" r, ".* ERR ntpd.*routing socket reports: No buffer space available.*" r, ".* ERR syncd.*" r, ".* syncd .* ERROR +HOST_INTERFACE" -r, ".* syncd .* ERROR SX_API_ROUTER: uc_route_data_p->next_hop_list_p: This Parameter is deprecated and will be removed in the future.*" \ No newline at end of file +r, ".* syncd .* ERROR SX_API_ROUTER: uc_route_data_p->next_hop_list_p: This Parameter is deprecated and will be removed in the future.*" +r, ".* syncd .* ERROR SX_API_ROUTER: uc_route_data_p->next_hop_cnt: This Parameter is deprecated and will be removed in the future.*" From dff7899bf5ab366b0ced9421590ac23aa8d9b9a1 Mon Sep 17 00:00:00 2001 From: Renuka Manavalan <47282725+renukamanavalan@users.noreply.github.com> Date: Thu, 23 Jan 2020 18:14:11 -0800 Subject: [PATCH 170/218] Added required files as symbolic link (#1356) --- ansible/roles/eos/templates/t1-64-lag-clet-spine.j2 | 1 + ansible/roles/eos/templates/t1-64-lag-clet-tor.j2 | 1 + 2 files changed, 2 insertions(+) create mode 120000 ansible/roles/eos/templates/t1-64-lag-clet-spine.j2 create mode 120000 ansible/roles/eos/templates/t1-64-lag-clet-tor.j2 diff --git a/ansible/roles/eos/templates/t1-64-lag-clet-spine.j2 b/ansible/roles/eos/templates/t1-64-lag-clet-spine.j2 new file mode 120000 index 00000000000..b17f81efe10 --- /dev/null +++ b/ansible/roles/eos/templates/t1-64-lag-clet-spine.j2 @@ -0,0 +1 @@ +t1-64-lag-spine.j2 \ No newline at end of file diff --git a/ansible/roles/eos/templates/t1-64-lag-clet-tor.j2 b/ansible/roles/eos/templates/t1-64-lag-clet-tor.j2 new file mode 120000 index 00000000000..48ba5038b47 --- /dev/null +++ b/ansible/roles/eos/templates/t1-64-lag-clet-tor.j2 @@ -0,0 +1 @@ +t1-64-lag-tor.j2 \ No newline at end of file From 943c9db6ac3a6ee8504ee31533f8a7a05d4d5d8c Mon Sep 17 00:00:00 2001 From: Neetha John Date: Sun, 26 Jan 2020 12:35:57 -0800 Subject: [PATCH 171/218] Fix Ansible compatibility issues in warm-reboot sad scripts (#1357) Fix test run start failure caused due to changes made for Ansible upgrade Signed-off-by: Neetha John --- ansible/roles/test/tasks/warm-reboot-sad-bgp.yml | 2 +- ansible/roles/test/tasks/warm-reboot-sad-lag-member.yml | 2 +- ansible/roles/test/tasks/warm-reboot-sad-lag.yml | 2 +- ansible/roles/test/tasks/warm-reboot-sad-vlan-port.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ansible/roles/test/tasks/warm-reboot-sad-bgp.yml b/ansible/roles/test/tasks/warm-reboot-sad-bgp.yml index 0d514761604..4992330c647 100644 --- a/ansible/roles/test/tasks/warm-reboot-sad-bgp.yml +++ b/ansible/roles/test/tasks/warm-reboot-sad-bgp.yml @@ -15,7 +15,7 @@ when: (vnet is not defined) or (vnet_pkts is not defined) - name: Warm-reboot test - include_tasks: advanced-reboot.yml + include: advanced-reboot.yml vars: reboot_type: warm-reboot preboot_list: "{{ pre_list }}" diff --git a/ansible/roles/test/tasks/warm-reboot-sad-lag-member.yml b/ansible/roles/test/tasks/warm-reboot-sad-lag-member.yml index 653a19e41e4..4e56d0b46e9 100644 --- a/ansible/roles/test/tasks/warm-reboot-sad-lag-member.yml +++ b/ansible/roles/test/tasks/warm-reboot-sad-lag-member.yml @@ -21,7 +21,7 @@ when: (vnet is not defined) or (vnet_pkts is not defined) - name: Warm-reboot test - include_tasks: advanced-reboot.yml + include: advanced-reboot.yml vars: reboot_type: warm-reboot preboot_list: "{{ pre_list }}" diff --git a/ansible/roles/test/tasks/warm-reboot-sad-lag.yml b/ansible/roles/test/tasks/warm-reboot-sad-lag.yml index 24e443d48fb..7633da1b193 100644 --- a/ansible/roles/test/tasks/warm-reboot-sad-lag.yml +++ b/ansible/roles/test/tasks/warm-reboot-sad-lag.yml @@ -15,7 +15,7 @@ when: (vnet is not defined) or (vnet_pkts is not defined) - name: Warm-reboot test - include_tasks: advanced-reboot.yml + include: advanced-reboot.yml vars: reboot_type: warm-reboot preboot_list: "{{ pre_list }}" diff --git a/ansible/roles/test/tasks/warm-reboot-sad-vlan-port.yml b/ansible/roles/test/tasks/warm-reboot-sad-vlan-port.yml index 7e2e773a86c..7d570606fb8 100644 --- a/ansible/roles/test/tasks/warm-reboot-sad-vlan-port.yml +++ b/ansible/roles/test/tasks/warm-reboot-sad-vlan-port.yml @@ -15,7 +15,7 @@ when: (vnet is not defined) or (vnet_pkts is not defined) - name: Warm-reboot test - include_tasks: advanced-reboot.yml + include: advanced-reboot.yml vars: reboot_type: warm-reboot preboot_list: "{{ pre_list }}" From 41111edecc9f7c83d0fa974a2fd800dcad4ae996 Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Fri, 7 Feb 2020 18:57:57 -0800 Subject: [PATCH 172/218] [deploy server] changes to make server deployment smoother (#1371) - Add required packages. - Add public key before adding docker repo. - Make sure path exists before adding file to it. Signed-off-by: Ying Xie --- ansible/roles/vm_set/tasks/docker.yml | 10 +++++----- ansible/roles/vm_set/tasks/external_port.yml | 6 ++++++ ansible/roles/vm_set/tasks/main.yml | 2 ++ 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/ansible/roles/vm_set/tasks/docker.yml b/ansible/roles/vm_set/tasks/docker.yml index c6936bf2882..7fc700f2aef 100644 --- a/ansible/roles/vm_set/tasks/docker.yml +++ b/ansible/roles/vm_set/tasks/docker.yml @@ -1,3 +1,8 @@ +- name: Add docker official GPG key + apt_key: url=https://download.docker.com/linux/ubuntu/gpg state=present + become: yes + environment: "{{ proxy_env | default({}) }}" + - name: Add docker repository for 16.04 apt_repository: repo: deb [arch=amd64] https://download.docker.com/linux/ubuntu xenial stable @@ -19,11 +24,6 @@ become: yes when: host_distribution_version.stdout == "18.04" -- name: Add docker official GPG key - shell: "curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -" - become: yes - environment: "{{ proxy_env | default({}) }}" - - name: Install docker-ce apt: pkg=docker-ce update_cache=yes become: yes diff --git a/ansible/roles/vm_set/tasks/external_port.yml b/ansible/roles/vm_set/tasks/external_port.yml index 9357b0b6a01..7fdda5f20c3 100644 --- a/ansible/roles/vm_set/tasks/external_port.yml +++ b/ansible/roles/vm_set/tasks/external_port.yml @@ -1,3 +1,9 @@ +- name: make sure /etc/network/interfaces.d exists + file: + path: /etc/network/interfaces.d + state: directory + become: yes + - name: setup external interface as trunk port template: src=external_port.j2 dest=/etc/network/interfaces.d/external_port diff --git a/ansible/roles/vm_set/tasks/main.yml b/ansible/roles/vm_set/tasks/main.yml index 3a66c6d0729..68c649d5002 100644 --- a/ansible/roles/vm_set/tasks/main.yml +++ b/ansible/roles/vm_set/tasks/main.yml @@ -58,6 +58,8 @@ apt: pkg={{ item }} update_cache=yes cache_valid_time=86400 become: yes with_items: + - ifupdown + - python - qemu - openvswitch-switch - net-tools From 414c95f1f8783fde1f26a3265ff9ec2827b1b3aa Mon Sep 17 00:00:00 2001 From: Stephen Sun <5379172+stephenxs@users.noreply.github.com> Date: Tue, 11 Feb 2020 23:48:03 +0800 Subject: [PATCH 173/218] [platform/test_sfp] backport (#1338) to 201811 branch. (#1378) operate SFP only once for split ports when resetting or setting LPM --- tests/platform/files/getportmap.py | 50 ++++++++++++++++++++++ tests/platform/test_sfp.py | 69 ++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100755 tests/platform/files/getportmap.py diff --git a/tests/platform/files/getportmap.py b/tests/platform/files/getportmap.py new file mode 100755 index 00000000000..de7ea740f94 --- /dev/null +++ b/tests/platform/files/getportmap.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# This script runs on the DUT and is intended to retrieve the portmapping from logical interfaces to physical ones +# The way the port mapping retrieved is exactly the same as what xcvrd does + +import sfputil +import json +import subprocess + +PLATFORM_ROOT_PATH = '/usr/share/sonic/device' +SONIC_CFGGEN_PATH = '/usr/local/bin/sonic-cfggen' +HWSKU_KEY = 'DEVICE_METADATA.localhost.hwsku' +PLATFORM_KEY = 'DEVICE_METADATA.localhost.platform' +PLATFORM_ROOT_DOCKER = "/usr/share/sonic/platform" + +platform_sfputil = sfputil.SfpUtil() + +# Returns platform and HW SKU +def get_hwsku(): + proc = subprocess.Popen([SONIC_CFGGEN_PATH, '-d', '-v', HWSKU_KEY], + stdout=subprocess.PIPE, + shell=False, + stderr=subprocess.STDOUT) + stdout = proc.communicate()[0] + proc.wait() + hwsku = stdout.rstrip('\n') + + return hwsku + + +# Returns path to port config file +def get_path_to_port_config_file(): + # Get platform and hwsku + hwsku = get_hwsku() + + # Load platform module from source + platform_path = PLATFORM_ROOT_DOCKER + hwsku_path = "/".join([platform_path, hwsku]) + + port_config_file_path = "/".join([hwsku_path, "port_config.ini"]) + + return port_config_file_path + + +port_config_path = get_path_to_port_config_file() +platform_sfputil.read_porttab_mappings(port_config_path) + +# print the mapping to stdout in json format +print json.dumps(platform_sfputil.logical_to_physical) + +# json will be loaded by sonic-mgmt diff --git a/tests/platform/test_sfp.py b/tests/platform/test_sfp.py index d6b057daff7..f9eba6b075f 100644 --- a/tests/platform/test_sfp.py +++ b/tests/platform/test_sfp.py @@ -9,6 +9,7 @@ import os import time import copy +import json import pytest @@ -19,6 +20,15 @@ pytest.mark.disable_loganalyzer # disable automatic loganalyzer ] +ans_host = None +port_mapping = None + + +def teardown_module(): + logging.info("remove script to retrieve port mapping") + file_path = os.path.join('/usr/share/sonic/device', ans_host.facts['platform'], 'plugins/getportmap.py') + ans_host.file(path=file_path, state='absent') + def parse_output(output_lines): """ @summary: For parsing command output. The output lines should have format 'key value'. @@ -48,6 +58,38 @@ def parse_eeprom(output_lines): return res +def get_port_map(testbed_devices): + """ + @summary: Get the port mapping info from the DUT + @return: a dictionary containing the port map + """ + global port_mapping + global ans_host + + # we've already retrieve port mapping for the DUT, just return it + if not port_mapping is None: + logging.info("Return the previously retrievd port mapping") + return port_mapping + + # this is the first running + logging.info("Retrieving port mapping from DUT") + # copy the helper to DUT + ans_host = testbed_devices["dut"] + src_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'files/getportmap.py') + dest_path = os.path.join('/usr/share/sonic/device', ans_host.facts['platform'], 'plugins/getportmap.py') + ans_host.copy(src=src_path, dest=dest_path) + + # execute command on the DUT to get portmap + get_portmap_cmd = 'docker exec pmon python /usr/share/sonic/platform/plugins/getportmap.py' + portmap_json_string = ans_host.command(get_portmap_cmd)["stdout"] + + # parse the json + port_mapping = json.loads(portmap_json_string) + assert port_mapping, "Retrieve port mapping from DUT failed" + + return port_mapping + + def test_check_sfp_status_and_configure_sfp(testbed_devices, conn_graph_facts): """ @summary: Check SFP status and configure SFP @@ -75,6 +117,9 @@ def test_check_sfp_status_and_configure_sfp(testbed_devices, conn_graph_facts): cmd_xcvr_presence = "show interface transceiver presence" cmd_xcvr_eeprom = "show interface transceiver eeprom" + portmap = get_port_map(testbed_devices) + logging.info("Got portmap {}".format(portmap)) + logging.info("Check output of '%s'" % cmd_sfp_presence) sfp_presence = ans_host.command(cmd_sfp_presence) parsed_presence = parse_output(sfp_presence["stdout_lines"][2:]) @@ -104,7 +149,14 @@ def test_check_sfp_status_and_configure_sfp(testbed_devices, conn_graph_facts): assert parsed_eeprom[intf] == "SFP EEPROM detected" logging.info("Test '%s '" % cmd_sfp_reset) + tested_physical_ports = set() for intf in conn_graph_facts["device_conn"]: + phy_intf = portmap[intf][0] + if phy_intf in tested_physical_ports: + logging.info("skip tested SFPs {} to avoid repeating operating physical interface {}".format(intf, phy_intf)) + continue + tested_physical_ports.add(phy_intf) + logging.info("resetting {} physical interface {}".format(intf, phy_intf)) reset_result = ans_host.command("%s %s" % (cmd_sfp_reset, intf)) assert reset_result["rc"] == 0, "'%s %s' failed" % (cmd_sfp_reset, intf) time.sleep(5) @@ -150,6 +202,9 @@ def test_check_sfp_low_power_mode(testbed_devices, conn_graph_facts): cmd_sfp_show_lpmode = "sudo sfputil show lpmode" cmd_sfp_set_lpmode = "sudo sfputil lpmode" + portmap = get_port_map(testbed_devices) + logging.info("Got portmap {}".format(portmap)) + logging.info("Check output of '%s'" % cmd_sfp_show_lpmode) lpmode_show = ans_host.command(cmd_sfp_show_lpmode) parsed_lpmode = parse_output(lpmode_show["stdout_lines"][2:]) @@ -159,7 +214,14 @@ def test_check_sfp_low_power_mode(testbed_devices, conn_graph_facts): assert parsed_lpmode[intf].lower() == "on" or parsed_lpmode[intf].lower() == "off", "Unexpected SFP lpmode" logging.info("Try to change SFP lpmode") + tested_physical_ports = set() for intf in conn_graph_facts["device_conn"]: + phy_intf = portmap[intf][0] + if phy_intf in tested_physical_ports: + logging.info("skip tested SFPs {} to avoid repeating operating physical interface {}".format(intf, phy_intf)) + continue + tested_physical_ports.add(phy_intf) + logging.info("setting {} physical interface {}".format(intf, phy_intf)) new_lpmode = "off" if original_lpmode[intf].lower() == "on" else "on" lpmode_set_result = ans_host.command("%s %s %s" % (cmd_sfp_set_lpmode, new_lpmode, intf)) assert lpmode_set_result["rc"] == 0, "'%s %s %s' failed" % (cmd_sfp_set_lpmode, new_lpmode, intf) @@ -173,7 +235,14 @@ def test_check_sfp_low_power_mode(testbed_devices, conn_graph_facts): assert parsed_lpmode[intf].lower() == "on" or parsed_lpmode[intf].lower() == "off", "Unexpected SFP lpmode" logging.info("Try to change SFP lpmode") + tested_physical_ports = set() for intf in conn_graph_facts["device_conn"]: + phy_intf = portmap[intf][0] + if phy_intf in tested_physical_ports: + logging.info("skip tested SFPs {} to avoid repeating operating physical interface {}".format(intf, phy_intf)) + continue + tested_physical_ports.add(phy_intf) + logging.info("restoring {} physical interface {}".format(intf, phy_intf)) new_lpmode = original_lpmode[intf].lower() lpmode_set_result = ans_host.command("%s %s %s" % (cmd_sfp_set_lpmode, new_lpmode, intf)) assert lpmode_set_result["rc"] == 0, "'%s %s %s' failed" % (cmd_sfp_set_lpmode, new_lpmode, intf) From cb6ad18ec46cb63fb6b9a2830f335903329f9121 Mon Sep 17 00:00:00 2001 From: Stephen Sun <5379172+stephenxs@users.noreply.github.com> Date: Mon, 10 Feb 2020 01:46:08 +0800 Subject: [PATCH 174/218] [test_reboot]Wait a few seconds before checking the reboot-cause (#1367) In the reboot cause test it is assumed that the reboot-cause is available as soon as the critical services ready. However, PR Delay process-reboot-cause service until network connection is stable #4003 introduces an extra amount of seconds and fails the assumption. --- tests/platform/test_reboot.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/platform/test_reboot.py b/tests/platform/test_reboot.py index 67c65063fa9..f8012fd3059 100644 --- a/tests/platform/test_reboot.py +++ b/tests/platform/test_reboot.py @@ -70,7 +70,7 @@ def check_reboot_cause(dut, reboot_cause_expected): reboot_cause_got = output["stdout"] logging.debug("show reboot-cause returns {}".format(reboot_cause_got)) m = re.search(reboot_cause_expected, reboot_cause_got) - assert m is not None, "got reboot-cause %s after rebooted by %s" % (reboot_cause_got, reboot_cause_expected) + return m is not None def reboot_and_check(localhost, dut, interfaces, reboot_type=REBOOT_TYPE_COLD, reboot_helper=None, reboot_kwargs=None): @@ -125,7 +125,8 @@ def reboot_and_check(localhost, dut, interfaces, reboot_type=REBOOT_TYPE_COLD, r check_critical_services(dut) logging.info("Check reboot cause") - check_reboot_cause(dut, reboot_cause) + assert wait_until(120, 20, check_reboot_cause, dut, reboot_cause), \ + "got reboot-cause failed after rebooted by %s" % reboot_cause if reboot_ctrl_dict[reboot_type]["test_reboot_cause_only"]: logging.info("Further checking skipped for {} test which intends to verify reboot-cause only".format(reboot_type)) From de68bbea744c675cfc9c2426f3e28f4a79ac22e8 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Sun, 16 Feb 2020 20:06:51 +0800 Subject: [PATCH 175/218] Warm reboot is not enabled on Mellanox ACS-MSN2410 on 201811 yet (#1382) On 201811 branch, the Mellanox ACS-MSN2410 switch does not have warm reboot enabled yet. Signed-off-by: Xin Wang --- tests/common/mellanox_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/common/mellanox_data.py b/tests/common/mellanox_data.py index c54a52ba2aa..5959bb714a4 100644 --- a/tests/common/mellanox_data.py +++ b/tests/common/mellanox_data.py @@ -104,7 +104,7 @@ "reboot": { "cold_reboot": True, "fast_reboot": True, - "warm_reboot": True + "warm_reboot": False }, "fans": { "number": 4, From e5fdff2e77d3dff4b19abda3481c89d0c3fa5e47 Mon Sep 17 00:00:00 2001 From: lguohan Date: Thu, 5 Mar 2020 11:43:00 -0800 Subject: [PATCH 176/218] [swap_syncd]: Correct privilege for sysctl (#1421) Signed-off-by: Neetha John --- ansible/swap_syncd.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/swap_syncd.yml b/ansible/swap_syncd.yml index d460204c952..bfe55a8af1b 100644 --- a/ansible/swap_syncd.yml +++ b/ansible/swap_syncd.yml @@ -44,6 +44,7 @@ ignore_errors: yes - name: Set sysctl RCVBUF parameter for tests + become: true sysctl: name: "net.core.rmem_max" value: 509430500 From 13b80939d51930f119bd5fc9d23008b523a85b54 Mon Sep 17 00:00:00 2001 From: lguohan Date: Thu, 5 Mar 2020 11:43:35 -0800 Subject: [PATCH 177/218] [qos_sai]: Add watermark and DWRR parameters for Arista-7060CX-32S-C32 (#1422) Signed-off-by: Neetha John --- ansible/vars/qos.yml | 74 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/ansible/vars/qos.yml b/ansible/vars/qos.yml index f616176f14f..721036f9037 100644 --- a/ansible/vars/qos.yml +++ b/ansible/vars/qos.yml @@ -507,6 +507,19 @@ qos_params: q6_num_of_pkts: 140 limit: 80 pkts_num_leak_out: 36 + wrr_chg: + ecn: 1 + q0_num_of_pkts: 80 + q1_num_of_pkts: 80 + q2_num_of_pkts: 80 + q3_num_of_pkts: 300 + q4_num_of_pkts: 300 + q5_num_of_pkts: 80 + q6_num_of_pkts: 80 + limit: 80 + pkts_num_leak_out: 36 + lossy_weight: 8 + lossless_weight: 30 hdrm_pool_size: dscps: [3, 4] ecn: 1 @@ -518,6 +531,67 @@ qos_params: pkts_num_trig_pfc: 1095 pkts_num_hdrm_full: 1292 pkts_num_hdrm_partial: 1165 + wm_pg_shared_lossless: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 36 + pkts_num_fill_min: 6 + pkts_num_trig_pfc: 1458 + cell_size: 208 + wm_pg_shared_lossy: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_leak_out: 36 + pkts_num_fill_min: 0 + pkts_num_trig_egr_drp: 9887 + cell_size: 208 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 36 + pkts_num_trig_pfc: 1458 + pkts_num_trig_ingr_drp: 2751 + cell_size: 208 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_leak_out: 36 + pkts_num_fill_min: 8 + pkts_num_trig_ingr_drp: 2751 + cell_size: 208 + wm_q_shared_lossy: + dscp: 8 + ecn: 1 + queue: 0 + pkts_num_leak_out: 36 + pkts_num_fill_min: 8 + pkts_num_trig_egr_drp: 9887 + cell_size: 208 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_leak_out: 36 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 1458 + pkts_num_trig_ingr_drp: 2751 + pkts_num_fill_egr_min: 8 + cell_size: 208 + wm_buf_pool_lossy: + dscp: 8 + ecn: 1 + pg: 0 + queue: 0 + pkts_num_leak_out: 36 + pkts_num_fill_ingr_min: 0 + pkts_num_trig_egr_drp: 9887 + pkts_num_fill_egr_min: 8 + cell_size: 208 Celestica-DX010-C32: xoff_1: dscp: 3 From 460f68eecbf3edfe46e51ab1896e81b68f484919 Mon Sep 17 00:00:00 2001 From: lguohan Date: Thu, 5 Mar 2020 11:43:59 -0800 Subject: [PATCH 178/218] [qos_sai]: Fix QOS parameters for S6100 and Arista-7260CX3-D108C8 (#1423) Signed-off-by: Neetha John --- ansible/vars/qos.yml | 87 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/ansible/vars/qos.yml b/ansible/vars/qos.yml index 721036f9037..fc55f305fb0 100644 --- a/ansible/vars/qos.yml +++ b/ansible/vars/qos.yml @@ -422,6 +422,19 @@ qos_params: q6_num_of_pkts: 140 limit: 80 pkts_num_leak_out: 19 + wrr_chg: + ecn: 1 + q0_num_of_pkts: 80 + q1_num_of_pkts: 80 + q2_num_of_pkts: 80 + q3_num_of_pkts: 300 + q4_num_of_pkts: 300 + q5_num_of_pkts: 80 + q6_num_of_pkts: 80 + limit: 80 + pkts_num_leak_out: 19 + lossy_weight: 8 + lossless_weight: 30 hdrm_pool_size: dscps: [3, 4] ecn: 1 @@ -433,6 +446,67 @@ qos_params: pkts_num_trig_pfc: 732 pkts_num_hdrm_full: 520 pkts_num_hdrm_partial: 361 + wm_pg_shared_lossless: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 19 + pkts_num_fill_min: 6 + pkts_num_trig_pfc: 1458 + cell_size: 208 + wm_pg_shared_lossy: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_leak_out: 19 + pkts_num_fill_min: 0 + pkts_num_trig_egr_drp: 9887 + cell_size: 208 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 19 + pkts_num_trig_pfc: 1458 + pkts_num_trig_ingr_drp: 1979 + cell_size: 208 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_leak_out: 19 + pkts_num_fill_min: 8 + pkts_num_trig_ingr_drp: 1979 + cell_size: 208 + wm_q_shared_lossy: + dscp: 8 + ecn: 1 + queue: 0 + pkts_num_leak_out: 19 + pkts_num_fill_min: 8 + pkts_num_trig_egr_drp: 9887 + cell_size: 208 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_leak_out: 19 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 1458 + pkts_num_trig_ingr_drp: 1979 + pkts_num_fill_egr_min: 8 + cell_size: 208 + wm_buf_pool_lossy: + dscp: 8 + ecn: 1 + pg: 0 + queue: 0 + pkts_num_leak_out: 19 + pkts_num_fill_ingr_min: 0 + pkts_num_trig_egr_drp: 9887 + pkts_num_fill_egr_min: 8 + cell_size: 208 Arista-7060CX-32S-C32: xoff_1: dscp: 3 @@ -984,6 +1058,19 @@ qos_params: q6_num_of_pkts: 140 limit: 80 pkts_num_leak_out: 0 + wrr_chg: + ecn: 1 + q0_num_of_pkts: 80 + q1_num_of_pkts: 80 + q2_num_of_pkts: 80 + q3_num_of_pkts: 300 + q4_num_of_pkts: 300 + q5_num_of_pkts: 80 + q6_num_of_pkts: 80 + limit: 80 + pkts_num_leak_out: 0 + lossy_weight: 8 + lossless_weight: 30 hdrm_pool_size: dscps: [3, 4] ecn: 1 From 19dfdc2099cbe1c40a15d6cfe63983d3206c3df2 Mon Sep 17 00:00:00 2001 From: Neetha John Date: Thu, 5 Mar 2020 20:15:47 -0800 Subject: [PATCH 179/218] [qos_sai]: Fix headroom parameters for Arista-7260cx3-Q64 (#1424) Signed-off-by: Neetha John --- ansible/vars/qos.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ansible/vars/qos.yml b/ansible/vars/qos.yml index fc55f305fb0..49f0eb5a369 100644 --- a/ansible/vars/qos.yml +++ b/ansible/vars/qos.yml @@ -916,13 +916,13 @@ qos_params: dscps: [3, 4] ecn: 1 pgs: [3, 4] - src_port_ids: [7, 8, 9, 10, 11, 38, 39, 40, 41, 42] - dst_port_id: 6 + src_port_ids: [6, 7, 8, 9, 10, 38, 39, 40, 41, 42] + dst_port_id: 32 pgs_num: 19 pkts_num_leak_out: 0 - pkts_num_trig_pfc: 1489 + pkts_num_trig_pfc: 1490 pkts_num_hdrm_full: 520 - pkts_num_hdrm_partial: 48 + pkts_num_hdrm_partial: 47 wm_pg_shared_lossless: dscp: 3 ecn: 1 From dcf828810c9c2ed15087d7dc449ed433005e782b Mon Sep 17 00:00:00 2001 From: Neetha John Date: Tue, 10 Mar 2020 09:53:55 -0700 Subject: [PATCH 180/218] Added a knob to disable buffer watermark and dscp-pg mapping test (#1434) Signed-off-by: Neetha John --- ansible/roles/test/tasks/buff_wm.yml | 115 ++++++++++++++++ .../test/tasks/qos_get_max_buff_size.yml | 46 +++---- ansible/roles/test/tasks/qos_get_ports.yml | 2 + ansible/roles/test/tasks/qos_sai.yml | 123 ++---------------- 4 files changed, 149 insertions(+), 137 deletions(-) create mode 100644 ansible/roles/test/tasks/buff_wm.yml diff --git a/ansible/roles/test/tasks/buff_wm.yml b/ansible/roles/test/tasks/buff_wm.yml new file mode 100644 index 00000000000..7f3545580ea --- /dev/null +++ b/ansible/roles/test/tasks/buff_wm.yml @@ -0,0 +1,115 @@ +# Clear all watermarks before each watermark test +# because of the clear on read polling mode +- name: Toggle watermark polling + shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' + +# buffer pool watermark test +- include: qos_sai_ptf.yml + vars: + test_name: Ingress buffer pool watermark test, lossless traffic + test_path: sai_qos_tests.BufferPoolWatermarkTest + test_params: + - dscp='{{qp.wm_buf_pool_lossless.dscp}}' + - ecn='{{qp.wm_buf_pool_lossless.ecn}}' + - pg='{{qp.wm_buf_pool_lossless.pg}}' + - queue='' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.wm_buf_pool_lossless.pkts_num_leak_out}}' + - pkts_num_fill_min='{{qp.wm_buf_pool_lossless.pkts_num_fill_ingr_min}}' + - pkts_num_fill_shared='{{qp.wm_buf_pool_lossless.pkts_num_trig_pfc}}' + - cell_size='{{qp.wm_buf_pool_lossless.cell_size}}' + - buf_pool_roid='{{lossless_ingr_buf_pool_roid}}' + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7050-QX-32S') +- debug: + var: out.stdout_lines + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7050-QX-32S') + +# Clear all watermarks before each watermark test +# because of the clear on read polling mode +- name: Toggle watermark polling + shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' + +# buffer pool watermark test +- include: qos_sai_ptf.yml + vars: + test_name: Egress buffer pool watermark test, lossless traffic + test_path: sai_qos_tests.BufferPoolWatermarkTest + test_params: + - dscp='{{qp.wm_buf_pool_lossless.dscp}}' + - ecn='{{qp.wm_buf_pool_lossless.ecn}}' + - pg='' + - queue='{{qp.wm_buf_pool_lossless.queue}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.wm_buf_pool_lossless.pkts_num_leak_out}}' + - pkts_num_fill_min='{{qp.wm_buf_pool_lossless.pkts_num_fill_egr_min}}' + - pkts_num_fill_shared='{{qp.wm_buf_pool_lossless.pkts_num_trig_ingr_drp|int - 1}}' + - cell_size='{{qp.wm_buf_pool_lossless.cell_size}}' + - buf_pool_roid='{{lossless_egr_buf_pool_roid}}' +- debug: + var: out.stdout_lines + +# Clear all watermarks before each watermark test +# because of the clear on read polling mode +- name: Toggle watermark polling + shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' + +# buffer pool watermark test +- include: qos_sai_ptf.yml + vars: + test_name: Ingress buffer pool watermark test, lossy traffic + test_path: sai_qos_tests.BufferPoolWatermarkTest + test_params: + - dscp='{{qp.wm_buf_pool_lossy.dscp}}' + - ecn='{{qp.wm_buf_pool_lossy.ecn}}' + - pg='{{qp.wm_buf_pool_lossy.pg}}' + - queue='' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.wm_buf_pool_lossy.pkts_num_leak_out}}' + - pkts_num_fill_min='{{qp.wm_buf_pool_lossy.pkts_num_fill_ingr_min}}' + - pkts_num_fill_shared='{{qp.wm_buf_pool_lossy.pkts_num_trig_egr_drp|int - 1}}' + - cell_size='{{qp.wm_buf_pool_lossy.cell_size}}' + - buf_pool_roid='{{lossy_ingr_buf_pool_roid}}' + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7050-QX-32S') +- debug: + var: out.stdout_lines + when: minigraph_hwsku is defined and + (minigraph_hwsku != 'Arista-7050-QX-32S') + +# Clear all watermarks before each watermark test +# because of the clear on read polling mode +- name: Toggle watermark polling + shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' + +# buffer pool watermark test +- include: qos_sai_ptf.yml + vars: + test_name: Egress buffer pool watermark test, lossy traffic + test_path: sai_qos_tests.BufferPoolWatermarkTest + test_params: + - dscp='{{qp.wm_buf_pool_lossy.dscp}}' + - ecn='{{qp.wm_buf_pool_lossy.ecn}}' + - pg='' + - queue='{{qp.wm_buf_pool_lossy.queue}}' + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' + - pkts_num_leak_out='{{qp.wm_buf_pool_lossy.pkts_num_leak_out}}' + - pkts_num_fill_min='{{qp.wm_buf_pool_lossy.pkts_num_fill_egr_min}}' + - pkts_num_fill_shared='{{qp.wm_buf_pool_lossy.pkts_num_trig_egr_drp|int - 1}}' + - cell_size='{{qp.wm_buf_pool_lossy.cell_size}}' + - buf_pool_roid='{{lossy_egr_buf_pool_roid}}' +- debug: + var: out.stdout_lines diff --git a/ansible/roles/test/tasks/qos_get_max_buff_size.yml b/ansible/roles/test/tasks/qos_get_max_buff_size.yml index 694aac6c300..74f0190737e 100644 --- a/ansible/roles/test/tasks/qos_get_max_buff_size.yml +++ b/ansible/roles/test/tasks/qos_get_max_buff_size.yml @@ -115,32 +115,34 @@ # Get buffer pool ROID # This is perhaps the only useful section in this yaml play -- set_fact: - buffer_pool_name="{{buffer_pool_id|replace('BUFFER_POOL|','')}}" +- block: + - set_fact: + buffer_pool_name="{{buffer_pool_id|replace('BUFFER_POOL|','')}}" -- name: Get {{buffer_pool_name}} VOID - shell: redis-cli -n 2 HGET COUNTERS_BUFFER_POOL_NAME_MAP "{{buffer_pool_name}}" - register: buffer_pool_void + - name: Get {{buffer_pool_name}} VOID + shell: redis-cli -n 2 HGET COUNTERS_BUFFER_POOL_NAME_MAP "{{buffer_pool_name}}" + register: buffer_pool_void -- fail: - msg: "Unable to get VOID for {{buffer_pool_name}}" - when: buffer_pool_void.stdout == "" + - fail: + msg: "Unable to get VOID for {{buffer_pool_name}}" + when: buffer_pool_void.stdout == "" -- name: Parse buffer pool VOID - set_fact: - buffer_pool_void="{{buffer_pool_void.stdout}}" + - name: Parse buffer pool VOID + set_fact: + buffer_pool_void="{{buffer_pool_void.stdout}}" -- name: Get {{buffer_pool_name}} ROID - shell: redis-cli -n 1 HGET VIDTORID "{{buffer_pool_void}}" - register: buffer_pool_roid + - name: Get {{buffer_pool_name}} ROID + shell: redis-cli -n 1 HGET VIDTORID "{{buffer_pool_void}}" + register: buffer_pool_roid -- fail: - msg: "Unable to get ROID for {{buffer_pool_name}}" - when: buffer_pool_roid.stdout == "" + - fail: + msg: "Unable to get ROID for {{buffer_pool_name}}" + when: buffer_pool_roid.stdout == "" -- name: Parse buffer pool ROID, remove 'oid:' prefix - set_fact: - buffer_pool_roid="{{buffer_pool_roid.stdout|replace('oid:','')}}" + - name: Parse buffer pool ROID, remove 'oid:' prefix + set_fact: + buffer_pool_roid="{{buffer_pool_roid.stdout|replace('oid:','')}}" -- debug: - msg="{{buffer_pool_name}} roid {{buffer_pool_roid}}" + - debug: + msg="{{buffer_pool_name}} roid {{buffer_pool_roid}}" + when: not disable_test diff --git a/ansible/roles/test/tasks/qos_get_ports.yml b/ansible/roles/test/tasks/qos_get_ports.yml index 1da26daa384..7e52ce4aaec 100644 --- a/ansible/roles/test/tasks/qos_get_ports.yml +++ b/ansible/roles/test/tasks/qos_get_ports.yml @@ -150,6 +150,7 @@ - name: Set lossless ingress buffer pool ROID set_fact: lossless_ingr_buf_pool_roid: "{{buffer_pool_roid}}" + when: not disable_test # Ingress lossy @@ -171,6 +172,7 @@ - name: Set lossy ingress buffer pool ROID set_fact: lossy_ingr_buf_pool_roid: "{{buffer_pool_roid}}" + when: not disable_test # Egress lossless diff --git a/ansible/roles/test/tasks/qos_sai.yml b/ansible/roles/test/tasks/qos_sai.yml index 6c9d307e773..84b9375a371 100644 --- a/ansible/roles/test/tasks/qos_sai.yml +++ b/ansible/roles/test/tasks/qos_sai.yml @@ -6,6 +6,9 @@ - include_vars: vars/qos.yml +- set_fact: + disable_test: "{{disable_test | default('true') | bool}}" + - block: - name: Getting minigraph facts minigraph_facts: host={{inventory_hostname}} @@ -394,121 +397,8 @@ - debug: var: out.stdout_lines - # Clear all watermarks before each watermark test - # because of the clear on read polling mode - - name: Toggle watermark polling - shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' - - # buffer pool watermark test - - include: qos_sai_ptf.yml - vars: - test_name: Ingress buffer pool watermark test, lossless traffic - test_path: sai_qos_tests.BufferPoolWatermarkTest - test_params: - - dscp='{{qp.wm_buf_pool_lossless.dscp}}' - - ecn='{{qp.wm_buf_pool_lossless.ecn}}' - - pg='{{qp.wm_buf_pool_lossless.pg}}' - - queue='' - - dst_port_id='{{dst_port_id}}' - - dst_port_ip='{{dst_port_ip}}' - - src_port_id='{{src_port_id}}' - - src_port_ip='{{src_port_ip}}' - - pkts_num_leak_out='{{qp.wm_buf_pool_lossless.pkts_num_leak_out}}' - - pkts_num_fill_min='{{qp.wm_buf_pool_lossless.pkts_num_fill_ingr_min}}' - - pkts_num_fill_shared='{{qp.wm_buf_pool_lossless.pkts_num_trig_pfc}}' - - cell_size='{{qp.wm_buf_pool_lossless.cell_size}}' - - buf_pool_roid='{{lossless_ingr_buf_pool_roid}}' - when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7050-QX-32S') - - debug: - var: out.stdout_lines - when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7050-QX-32S') - - # Clear all watermarks before each watermark test - # because of the clear on read polling mode - - name: Toggle watermark polling - shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' - - # buffer pool watermark test - - include: qos_sai_ptf.yml - vars: - test_name: Egress buffer pool watermark test, lossless traffic - test_path: sai_qos_tests.BufferPoolWatermarkTest - test_params: - - dscp='{{qp.wm_buf_pool_lossless.dscp}}' - - ecn='{{qp.wm_buf_pool_lossless.ecn}}' - - pg='' - - queue='{{qp.wm_buf_pool_lossless.queue}}' - - dst_port_id='{{dst_port_id}}' - - dst_port_ip='{{dst_port_ip}}' - - src_port_id='{{src_port_id}}' - - src_port_ip='{{src_port_ip}}' - - pkts_num_leak_out='{{qp.wm_buf_pool_lossless.pkts_num_leak_out}}' - - pkts_num_fill_min='{{qp.wm_buf_pool_lossless.pkts_num_fill_egr_min}}' - - pkts_num_fill_shared='{{qp.wm_buf_pool_lossless.pkts_num_trig_ingr_drp|int - 1}}' - - cell_size='{{qp.wm_buf_pool_lossless.cell_size}}' - - buf_pool_roid='{{lossless_egr_buf_pool_roid}}' - - debug: - var: out.stdout_lines - - # Clear all watermarks before each watermark test - # because of the clear on read polling mode - - name: Toggle watermark polling - shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' - - # buffer pool watermark test - - include: qos_sai_ptf.yml - vars: - test_name: Ingress buffer pool watermark test, lossy traffic - test_path: sai_qos_tests.BufferPoolWatermarkTest - test_params: - - dscp='{{qp.wm_buf_pool_lossy.dscp}}' - - ecn='{{qp.wm_buf_pool_lossy.ecn}}' - - pg='{{qp.wm_buf_pool_lossy.pg}}' - - queue='' - - dst_port_id='{{dst_port_id}}' - - dst_port_ip='{{dst_port_ip}}' - - src_port_id='{{src_port_id}}' - - src_port_ip='{{src_port_ip}}' - - pkts_num_leak_out='{{qp.wm_buf_pool_lossy.pkts_num_leak_out}}' - - pkts_num_fill_min='{{qp.wm_buf_pool_lossy.pkts_num_fill_ingr_min}}' - - pkts_num_fill_shared='{{qp.wm_buf_pool_lossy.pkts_num_trig_egr_drp|int - 1}}' - - cell_size='{{qp.wm_buf_pool_lossy.cell_size}}' - - buf_pool_roid='{{lossy_ingr_buf_pool_roid}}' - when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7050-QX-32S') - - debug: - var: out.stdout_lines - when: minigraph_hwsku is defined and - (minigraph_hwsku != 'Arista-7050-QX-32S') - - # Clear all watermarks before each watermark test - # because of the clear on read polling mode - - name: Toggle watermark polling - shell: bash -c 'counterpoll watermark enable; sleep 20; counterpoll watermark disable' - - # buffer pool watermark test - - include: qos_sai_ptf.yml - vars: - test_name: Egress buffer pool watermark test, lossy traffic - test_path: sai_qos_tests.BufferPoolWatermarkTest - test_params: - - dscp='{{qp.wm_buf_pool_lossy.dscp}}' - - ecn='{{qp.wm_buf_pool_lossy.ecn}}' - - pg='' - - queue='{{qp.wm_buf_pool_lossy.queue}}' - - dst_port_id='{{dst_port_id}}' - - dst_port_ip='{{dst_port_ip}}' - - src_port_id='{{src_port_id}}' - - src_port_ip='{{src_port_ip}}' - - pkts_num_leak_out='{{qp.wm_buf_pool_lossy.pkts_num_leak_out}}' - - pkts_num_fill_min='{{qp.wm_buf_pool_lossy.pkts_num_fill_egr_min}}' - - pkts_num_fill_shared='{{qp.wm_buf_pool_lossy.pkts_num_trig_egr_drp|int - 1}}' - - cell_size='{{qp.wm_buf_pool_lossy.cell_size}}' - - buf_pool_roid='{{lossy_egr_buf_pool_roid}}' - - debug: - var: out.stdout_lines + - include: buff_wm.yml + when: not disable_test # DSCP to pg mapping - include: qos_sai_ptf.yml @@ -520,8 +410,11 @@ - dst_port_ip='{{dst_port_ip}}' - src_port_id='{{src_port_id}}' - src_port_ip='{{src_port_ip}}' + when: not disable_test + - debug: var: out.stdout_lines + when: not disable_test # Change lossy and lossless scheduler weights - name: Change lossy scheduler weight to {{qp.wrr_chg.lossy_weight}} From 20b747b9b2bd13e4ebc9a141d6ad75716cb28b32 Mon Sep 17 00:00:00 2001 From: roman_savchuk Date: Fri, 6 Mar 2020 14:26:30 -0800 Subject: [PATCH 181/218] [wr_arp] Added ssl support in ferret.py for wr_arp test case (#1415) * Added ssl support in ferret.py for wr_arp test case Added certificate generation for ssl to wr_arp.yml Signed-off-by: SavchukRomanLv * Fixed ansible script for creation ssl cert and key Signed-off-by: SavchukRomanLv --- ansible/roles/test/files/helpers/ferret.py | 8 +++++++- ansible/roles/test/tasks/wr_arp.yml | 16 +++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/ansible/roles/test/files/helpers/ferret.py b/ansible/roles/test/files/helpers/ferret.py index 19e0cccf362..e2d72bc9c36 100644 --- a/ansible/roles/test/files/helpers/ferret.py +++ b/ansible/roles/test/files/helpers/ferret.py @@ -11,6 +11,7 @@ import time import socket import ctypes +import ssl import struct import binascii import itertools @@ -89,10 +90,15 @@ def generate_response(self, response): class RestAPI(object): - PORT = 85 + PORT = 448 def __init__(self, obj, db, src_ip): self.httpd = SocketServer.TCPServer(("", self.PORT), obj) + self.context = ssl.SSLContext(ssl.PROTOCOL_TLS) + self.context.verify_mode = ssl.CERT_NONE + self.context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) + self.context.load_cert_chain(certfile="/opt/test.pem", keyfile="/opt/test.key") + self.httpd.socket=self.context.wrap_socket(self.httpd.socket, server_side=True) self.db = db obj.db = db obj.src_ip = src_ip diff --git a/ansible/roles/test/tasks/wr_arp.yml b/ansible/roles/test/tasks/wr_arp.yml index e886769e69b..1212987962e 100644 --- a/ansible/roles/test/tasks/wr_arp.yml +++ b/ansible/roles/test/tasks/wr_arp.yml @@ -77,11 +77,6 @@ become: yes when: "'PortChannel' in route_output.stdout" -- name: Update supervisor configuration - include: "roles/test/tasks/common_tasks/update_supervisor.yml" - vars: - supervisor_host: "{{ ptf_host }}" - - name: Copy tests to the PTF container copy: src=roles/test/files/ptftests dest=/root delegate_to: "{{ ptf_host }}" @@ -100,6 +95,17 @@ template: src=vxlan_decap.json.j2 dest=/tmp/vxlan_decap.json delegate_to: "{{ ptf_host }}" +- name: Generate pem and key files for ssl + command: openssl req -new -x509 -keyout test.key -out test.pem -days 365 -nodes -subj "/C=10/ST=Test/L=Test/O=Test/OU=Test/CN=test.com" + args: + chdir: /opt + delegate_to: "{{ ptf_host }}" + +- name: Update supervisor configuration + include_tasks: "roles/test/tasks/common_tasks/update_supervisor.yml" + vars: + supervisor_host: "{{ ptf_host }}" + - name: Dump debug info. DUT ip debug: var=ansible_eth0.ipv4.address From 63e642c9767150b6bacca56c1f89025991024acb Mon Sep 17 00:00:00 2001 From: Neetha John Date: Tue, 10 Mar 2020 09:48:35 -0700 Subject: [PATCH 182/218] Fix ARP issues seen in QOS tests (#1420) * Fix ARP issues seen in QOS tests Signed-off-by: Neetha John --- .../test/files/saitests/sai_qos_tests.py | 227 ++++++++++++------ ansible/roles/test/tasks/qos_sai.yml | 16 +- 2 files changed, 163 insertions(+), 80 deletions(-) diff --git a/ansible/roles/test/files/saitests/sai_qos_tests.py b/ansible/roles/test/files/saitests/sai_qos_tests.py index 6b135f8faee..55e8edcd617 100644 --- a/ansible/roles/test/files/saitests/sai_qos_tests.py +++ b/ansible/roles/test/files/saitests/sai_qos_tests.py @@ -58,21 +58,69 @@ class ARPpopulate(sai_base_test.ThriftInterfaceDataPlane): + def setUp(self): + sai_base_test.ThriftInterfaceDataPlane.setUp(self) + time.sleep(5) + switch_init(self.client) + + # Parse input parameters + self.router_mac = self.test_params['router_mac'] + self.dst_port_id = int(self.test_params['dst_port_id']) + self.dst_port_ip = self.test_params['dst_port_ip'] + self.dst_port_mac = self.dataplane.get_mac(0, self.dst_port_id) + self.src_port_id = int(self.test_params['src_port_id']) + self.src_port_ip = self.test_params['src_port_ip'] + self.src_port_mac = self.dataplane.get_mac(0, self.src_port_id) + self.dst_port_2_id = int(self.test_params['dst_port_2_id']) + self.dst_port_2_ip = self.test_params['dst_port_2_ip'] + self.dst_port_2_mac = self.dataplane.get_mac(0, self.dst_port_2_id) + self.dst_port_3_id = int(self.test_params['dst_port_3_id']) + self.dst_port_3_ip = self.test_params['dst_port_3_ip'] + self.dst_port_3_mac = self.dataplane.get_mac(0, self.dst_port_3_id) + + def tearDown(self): + sai_base_test.ThriftInterfaceDataPlane.tearDown(self) + def runTest(self): - router_mac = self.test_params['router_mac'] - # ARP Populate - index = 0 - for port in ptf_ports(): - arpreq_pkt = simple_arp_packet( - eth_dst='ff:ff:ff:ff:ff:ff', - eth_src=self.dataplane.get_mac(port[0],port[1]), - arp_op=1, - ip_snd='10.0.0.%d' % (index * 2 + 1), - ip_tgt='10.0.0.%d' % (index * 2), - hw_snd=self.dataplane.get_mac(port[0], port[1]), - hw_tgt='00:00:00:00:00:00') - send_packet(self, port[1], arpreq_pkt) - index += 1 + # ARP Populate + arpreq_pkt = simple_arp_packet( + eth_dst='ff:ff:ff:ff:ff:ff', + eth_src=self.src_port_mac, + arp_op=1, + ip_snd=self.src_port_ip, + ip_tgt='192.168.0.1', + hw_snd=self.src_port_mac, + hw_tgt='00:00:00:00:00:00') + + send_packet(self, self.src_port_id, arpreq_pkt) + arpreq_pkt = simple_arp_packet( + eth_dst='ff:ff:ff:ff:ff:ff', + eth_src=self.dst_port_mac, + arp_op=1, + ip_snd=self.dst_port_ip, + ip_tgt='192.168.0.1', + hw_snd=self.dst_port_mac, + hw_tgt='00:00:00:00:00:00') + send_packet(self, self.dst_port_id, arpreq_pkt) + arpreq_pkt = simple_arp_packet( + eth_dst='ff:ff:ff:ff:ff:ff', + eth_src=self.dst_port_2_mac, + arp_op=1, + ip_snd=self.dst_port_2_ip, + ip_tgt='192.168.0.1', + hw_snd=self.dst_port_2_mac, + hw_tgt='00:00:00:00:00:00') + send_packet(self, self.dst_port_2_id, arpreq_pkt) + arpreq_pkt = simple_arp_packet( + eth_dst='ff:ff:ff:ff:ff:ff', + eth_src=self.dst_port_3_mac, + arp_op=1, + ip_snd=self.dst_port_3_ip, + ip_tgt='192.168.0.1', + hw_snd=self.dst_port_3_mac, + hw_tgt='00:00:00:00:00:00') + send_packet(self, self.dst_port_3_id, arpreq_pkt) + time.sleep(8) class ReleaseAllPorts(sai_base_test.ThriftInterfaceDataPlane): def runTest(self): @@ -736,135 +784,166 @@ def runTest(self): sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id, dst_port_2_id, dst_port_3_id]) class HdrmPoolSizeTest(sai_base_test.ThriftInterfaceDataPlane): - def runTest(self): + def setUp(self): + sai_base_test.ThriftInterfaceDataPlane.setUp(self) time.sleep(5) switch_init(self.client) - # Parse input parameters - dscps = self.test_params['dscps'] - ecn = self.test_params['ecn'] - router_mac = self.test_params['router_mac'] - pgs = [pg + 2 for pg in self.test_params['pgs']] # The pfc counter index starts from index 2 in sai_thrift_read_port_counters - src_port_ids = self.test_params['src_port_ids'] - src_port_ips = self.test_params['src_port_ips'] - print >> sys.stderr, src_port_ips + # Parse input parameters + self.testbed_type = self.test_params['testbed_type'] + self.dscps = self.test_params['dscps'] + self.ecn = self.test_params['ecn'] + self.router_mac = self.test_params['router_mac'] + self.pgs = [pg + 2 for pg in self.test_params['pgs']] # The pfc counter index starts from index 2 in sai_thrift_read_port_counters + self.src_port_ids = self.test_params['src_port_ids'] + self.src_port_ips = self.test_params['src_port_ips'] + print >> sys.stderr, self.src_port_ips sys.stderr.flush() - dst_port_id = self.test_params['dst_port_id'] - dst_port_ip = self.test_params['dst_port_ip'] - pgs_num = self.test_params['pgs_num'] - asic_type = self.test_params['sonic_asic_type'] - pkts_num_leak_out = self.test_params['pkts_num_leak_out'] - pkts_num_trig_pfc = self.test_params['pkts_num_trig_pfc'] - pkts_num_hdrm_full = self.test_params['pkts_num_hdrm_full'] - pkts_num_hdrm_partial = self.test_params['pkts_num_hdrm_partial'] - print >> sys.stderr, ("pkts num: leak_out: %d, trig_pfc: %d, hdrm_full: %d, hdrm_partial: %d" % (pkts_num_leak_out, pkts_num_trig_pfc, pkts_num_hdrm_full, pkts_num_hdrm_partial)) + self.dst_port_id = self.test_params['dst_port_id'] + self.dst_port_ip = self.test_params['dst_port_ip'] + self.pgs_num = self.test_params['pgs_num'] + self.asic_type = self.test_params['sonic_asic_type'] + self.pkts_num_leak_out = self.test_params['pkts_num_leak_out'] + self.pkts_num_trig_pfc = self.test_params['pkts_num_trig_pfc'] + self.pkts_num_hdrm_full = self.test_params['pkts_num_hdrm_full'] + self.pkts_num_hdrm_partial = self.test_params['pkts_num_hdrm_partial'] + print >> sys.stderr, ("pkts num: leak_out: %d, trig_pfc: %d, hdrm_full: %d, hdrm_partial: %d" % (self.pkts_num_leak_out, self.pkts_num_trig_pfc, self.pkts_num_hdrm_full, self.pkts_num_hdrm_partial)) sys.stderr.flush() - dst_port_mac = self.dataplane.get_mac(0, dst_port_id) - src_port_macs = [self.dataplane.get_mac(0, ptid) for ptid in src_port_ids] + self.dst_port_mac = self.dataplane.get_mac(0, self.dst_port_id) + self.src_port_macs = [self.dataplane.get_mac(0, ptid) for ptid in self.src_port_ids] + + if self.testbed_type in ['t0', 't0-64', 't0-116']: + # populate ARP + for idx, ptid in enumerate(self.src_port_ids): + + arpreq_pkt = simple_arp_packet( + eth_dst='ff:ff:ff:ff:ff:ff', + eth_src=self.src_port_macs[idx], + arp_op=1, + ip_snd=self.src_port_ips[idx], + ip_tgt='192.168.0.1', + hw_snd=self.src_port_macs[idx], + hw_tgt='00:00:00:00:00:00') + send_packet(self, ptid, arpreq_pkt) + arpreq_pkt = simple_arp_packet( + eth_dst='ff:ff:ff:ff:ff:ff', + eth_src=self.dst_port_mac, + arp_op=1, + ip_snd=self.dst_port_ip, + ip_tgt='192.168.0.1', + hw_snd=self.dst_port_mac, + hw_tgt='00:00:00:00:00:00') + send_packet(self, self.dst_port_id, arpreq_pkt) + time.sleep(8) + + def tearDown(self): + sai_base_test.ThriftInterfaceDataPlane.tearDown(self) + + def runTest(self): margin = 0 - sidx_dscp_pg_tuples = [(sidx, dscp, pgs[pgidx]) for sidx, sid in enumerate(src_port_ids) for pgidx, dscp in enumerate(dscps)] - assert(len(sidx_dscp_pg_tuples) >= pgs_num) + sidx_dscp_pg_tuples = [(sidx, dscp, self.pgs[pgidx]) for sidx, sid in enumerate(self.src_port_ids) for pgidx, dscp in enumerate(self.dscps)] + assert(len(sidx_dscp_pg_tuples) >= self.pgs_num) print >> sys.stderr, sidx_dscp_pg_tuples sys.stderr.flush() # get a snapshot of counter values at recv and transmit ports # queue_counters value is not of our interest here - recv_counters_bases = [sai_thrift_read_port_counters(self.client, port_list[sid])[0] for sid in src_port_ids] - xmit_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + recv_counters_bases = [sai_thrift_read_port_counters(self.client, port_list[sid])[0] for sid in self.src_port_ids] + xmit_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.dst_port_id]) # Pause egress of dut xmit port - sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) + sai_thrift_port_tx_disable(self.client, self.asic_type, [self.dst_port_id]) try: # send packets to leak out sidx = 0 pkt = simple_tcp_packet(pktlen=64, - eth_dst=router_mac if router_mac != '' else dst_port_mac, - eth_src=src_port_macs[sidx], - ip_src=src_port_ips[sidx], - ip_dst=dst_port_ip, + eth_dst=self.router_mac if self.router_mac != '' else self.dst_port_mac, + eth_src=self.src_port_macs[sidx], + ip_src=self.src_port_ips[sidx], + ip_dst=self.dst_port_ip, ip_ttl=64) - send_packet(self, src_port_ids[sidx], pkt, pkts_num_leak_out) + send_packet(self, self.src_port_ids[sidx], pkt, self.pkts_num_leak_out) # send packets to all pgs to fill the service pool # and trigger PFC on all pgs - for i in range(0, pgs_num): + for i in range(0, self.pgs_num): # Prepare TCP packet data tos = sidx_dscp_pg_tuples[i][1] << 2 - tos |= ecn + tos |= self.ecn ttl = 64 default_packet_length = 64 pkt = simple_tcp_packet(pktlen=default_packet_length, - eth_dst=router_mac if router_mac != '' else dst_port_mac, - eth_src=src_port_macs[sidx_dscp_pg_tuples[i][0]], - ip_src=src_port_ips[sidx_dscp_pg_tuples[i][0]], - ip_dst=dst_port_ip, + eth_dst=self.router_mac if self.router_mac != '' else self.dst_port_mac, + eth_src=self.src_port_macs[sidx_dscp_pg_tuples[i][0]], + ip_src=self.src_port_ips[sidx_dscp_pg_tuples[i][0]], + ip_dst=self.dst_port_ip, ip_tos=tos, ip_ttl=ttl) - send_packet(self, src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, pkts_num_trig_pfc) + send_packet(self, self.src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, self.pkts_num_trig_pfc) print >> sys.stderr, "Service pool almost filled" sys.stderr.flush() # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) - for i in range(0, pgs_num): + for i in range(0, self.pgs_num): # Prepare TCP packet data tos = sidx_dscp_pg_tuples[i][1] << 2 - tos |= ecn + tos |= self.ecn ttl = 64 default_packet_length = 64 pkt = simple_tcp_packet(pktlen=default_packet_length, - eth_dst=router_mac if router_mac != '' else dst_port_mac, - eth_src=src_port_macs[sidx_dscp_pg_tuples[i][0]], - ip_src=src_port_ips[sidx_dscp_pg_tuples[i][0]], - ip_dst=dst_port_ip, + eth_dst=self.router_mac if self.router_mac != '' else self.dst_port_mac, + eth_src=self.src_port_macs[sidx_dscp_pg_tuples[i][0]], + ip_src=self.src_port_ips[sidx_dscp_pg_tuples[i][0]], + ip_dst=self.dst_port_ip, ip_tos=tos, ip_ttl=ttl) pkt_cnt = 0 - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_ids[sidx_dscp_pg_tuples[i][0]]]) + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.src_port_ids[sidx_dscp_pg_tuples[i][0]]]) while (recv_counters[sidx_dscp_pg_tuples[i][2]] == recv_counters_bases[sidx_dscp_pg_tuples[i][0]][sidx_dscp_pg_tuples[i][2]]) and (pkt_cnt < 10): - send_packet(self, src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, 1) + send_packet(self, self.src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, 1) pkt_cnt += 1 # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) # get a snapshot of counter values at recv and transmit ports # queue_counters value is not of our interest here - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_ids[sidx_dscp_pg_tuples[i][0]]]) + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.src_port_ids[sidx_dscp_pg_tuples[i][0]]]) if pkt_cnt == 10: sys.exit("Too many pkts needed to trigger pfc: %d" % (pkt_cnt)) assert(recv_counters[sidx_dscp_pg_tuples[i][2]] > recv_counters_bases[sidx_dscp_pg_tuples[i][0]][sidx_dscp_pg_tuples[i][2]]) - print >> sys.stderr, "%d packets for sid: %d, pg: %d to trigger pfc" % (pkt_cnt, src_port_ids[sidx_dscp_pg_tuples[i][0]], sidx_dscp_pg_tuples[i][2] - 2) + print >> sys.stderr, "%d packets for sid: %d, pg: %d to trigger pfc" % (pkt_cnt, self.src_port_ids[sidx_dscp_pg_tuples[i][0]], sidx_dscp_pg_tuples[i][2] - 2) sys.stderr.flush() print >> sys.stderr, "PFC triggered" sys.stderr.flush() # send packets to all pgs to fill the headroom pool - for i in range(0, pgs_num): + for i in range(0, self.pgs_num): # Prepare TCP packet data tos = sidx_dscp_pg_tuples[i][1] << 2 - tos |= ecn + tos |= self.ecn ttl = 64 default_packet_length = 64 pkt = simple_tcp_packet(pktlen=default_packet_length, - eth_dst=router_mac if router_mac != '' else dst_port_mac, - eth_src=src_port_macs[sidx_dscp_pg_tuples[i][0]], - ip_src=src_port_ips[sidx_dscp_pg_tuples[i][0]], - ip_dst=dst_port_ip, + eth_dst=self.router_mac if self.router_mac != '' else self.dst_port_mac, + eth_src=self.src_port_macs[sidx_dscp_pg_tuples[i][0]], + ip_src=self.src_port_ips[sidx_dscp_pg_tuples[i][0]], + ip_dst=self.dst_port_ip, ip_tos=tos, ip_ttl=ttl) - send_packet(self, src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, pkts_num_hdrm_full if i != pgs_num - 1 else pkts_num_hdrm_partial) + send_packet(self, self.src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, self.pkts_num_hdrm_full if i != self.pgs_num - 1 else self.pkts_num_hdrm_partial) # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_ids[sidx_dscp_pg_tuples[i][0]]]) + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.src_port_ids[sidx_dscp_pg_tuples[i][0]]]) # assert no ingress drop assert(recv_counters[INGRESS_DROP] == recv_counters_bases[sidx_dscp_pg_tuples[i][0]][INGRESS_DROP]) @@ -872,24 +951,24 @@ def runTest(self): sys.stderr.flush() # last pg - i = pgs_num - 1 + i = self.pgs_num - 1 # send 1 packet on last pg to trigger ingress drop - send_packet(self, src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, 1 + 2 * margin) + send_packet(self, self.src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, 1 + 2 * margin) # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_ids[sidx_dscp_pg_tuples[i][0]]]) + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.src_port_ids[sidx_dscp_pg_tuples[i][0]]]) # assert ingress drop assert(recv_counters[INGRESS_DROP] > recv_counters_bases[sidx_dscp_pg_tuples[i][0]][INGRESS_DROP]) # assert no egress drop at the dut xmit port - xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.dst_port_id]) assert(xmit_counters[EGRESS_DROP] == xmit_counters_base[EGRESS_DROP]) print >> sys.stderr, "pg hdrm filled" sys.stderr.flush() finally: - sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) + sai_thrift_port_tx_enable(self.client, self.asic_type, [self.dst_port_id]) # TODO: remove sai_thrift_clear_all_counters and change to use incremental counter values class DscpEcnSend(sai_base_test.ThriftInterfaceDataPlane): diff --git a/ansible/roles/test/tasks/qos_sai.yml b/ansible/roles/test/tasks/qos_sai.yml index 84b9375a371..fc2045b4e52 100644 --- a/ansible/roles/test/tasks/qos_sai.yml +++ b/ansible/roles/test/tasks/qos_sai.yml @@ -101,14 +101,17 @@ vars: test_name: populate arp on all ports test_path: sai_qos_tests.ARPpopulate - test_params: [] + test_params: + - dst_port_id='{{dst_port_id}}' + - dst_port_ip='{{dst_port_ip}}' + - dst_port_2_id='{{dst_port_2_id}}' + - dst_port_2_ip='{{dst_port_2_ip}}' + - dst_port_3_id='{{dst_port_3_id}}' + - dst_port_3_ip='{{dst_port_3_ip}}' + - src_port_id='{{src_port_id}}' + - src_port_ip='{{src_port_ip}}' when: testbed_type in ['t0', 't0-64', 't0-116'] or arp_entries.stdout.find('incomplete') == -1 - - name: Manually add an ARP entry for dst port - command: ip neigh replace {{dst_port_ip}} lladdr 7c:fe:90:5e:6b:a6 dev {{dut_switch_ports[dst_port_id|int]}} - become: yes - when: testbed_type not in ['t0', 't0-64', 't0-116'] and arp_entries.stdout.find('incomplete') != -1 - # XOFF limit - include: qos_sai_ptf.yml vars: @@ -195,6 +198,7 @@ test_name: headroom pool size ptf test ecn = {{qp.hdrm_pool_size.ecn}} test_path: sai_qos_tests.HdrmPoolSizeTest test_params: + - testbed_type='{{testbed_type}}' - dscps={{qp.hdrm_pool_size.dscps}} - ecn={{qp.hdrm_pool_size.ecn}} - pgs={{qp.hdrm_pool_size.pgs}} From 426971d47e986711a9ffccebceb63b30eaa4518b Mon Sep 17 00:00:00 2001 From: Neetha John Date: Tue, 10 Mar 2020 13:06:50 -0700 Subject: [PATCH 183/218] [qos_sai]: Enable error handling and fix issues due to undefined vars (#1433) (#1441) * Init global vars used for scheduler/port pause/release * Enable error handling and fix issues due to undefined vars Signed-off-by: Neetha John --- ansible/roles/test/files/saitests/switch.py | 4 ++++ ansible/roles/test/tasks/qos_sai.yml | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ansible/roles/test/files/saitests/switch.py b/ansible/roles/test/files/saitests/switch.py index 4653fa2a191..33dda65804d 100644 --- a/ansible/roles/test/files/saitests/switch.py +++ b/ansible/roles/test/files/saitests/switch.py @@ -51,6 +51,10 @@ is_bmv2 = ('BMV2_TEST' in os.environ) and (int(os.environ['BMV2_TEST']) == 1) +# constants +STOP_PORT_MAX_RATE = 1 +RELEASE_PORT_MAX_RATE = 0 + def switch_init(client): global switch_inited if switch_inited: diff --git a/ansible/roles/test/tasks/qos_sai.yml b/ansible/roles/test/tasks/qos_sai.yml index fc2045b4e52..ad3d74eb64c 100644 --- a/ansible/roles/test/tasks/qos_sai.yml +++ b/ansible/roles/test/tasks/qos_sai.yml @@ -50,6 +50,8 @@ - name: Disable Mellanox packet aging shell: python /root/packets_aging.py disable + register: result + failed_when: result.stderr != '' vars: ansible_shell_type: docker ansible_python_interpreter: docker exec -i syncd python @@ -481,7 +483,7 @@ - name: Restore original watermark polling status shell: counterpoll watermark {{watermark_status.stdout}} - when: watermark_status.stdout == "enable" or watermark_status.stdout == "disable" + when: watermark_status is defined and (watermark_status.stdout == "enable" or watermark_status.stdout == "disable") - name: Restore lossy scheduler weight to {{lossy_sched_weight}} command: redis-cli -n 4 HSET "{{lossy_sched_profile}}" weight "{{lossy_sched_weight.stdout}}" @@ -491,6 +493,8 @@ - name: Enable Mellanox packet aging shell: python /root/packets_aging.py enable + register: result + failed_when: result.stderr != '' vars: ansible_shell_type: docker ansible_python_interpreter: docker exec -i syncd python From 85237bddddd066f606a76f7c4ef4b1139737b47d Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Fri, 13 Mar 2020 08:32:23 -0700 Subject: [PATCH 184/218] [wr_arp] change wr_arp.yml according to the ansible version (#1446) Signed-off-by: Ying Xie --- ansible/roles/test/tasks/wr_arp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/tasks/wr_arp.yml b/ansible/roles/test/tasks/wr_arp.yml index 1212987962e..47c7c6aa7f5 100644 --- a/ansible/roles/test/tasks/wr_arp.yml +++ b/ansible/roles/test/tasks/wr_arp.yml @@ -102,7 +102,7 @@ delegate_to: "{{ ptf_host }}" - name: Update supervisor configuration - include_tasks: "roles/test/tasks/common_tasks/update_supervisor.yml" + include: "roles/test/tasks/common_tasks/update_supervisor.yml" vars: supervisor_host: "{{ ptf_host }}" From 7b78231823c6f6edcd0779e9d74cc3eb0dca135f Mon Sep 17 00:00:00 2001 From: Neetha John Date: Thu, 12 Mar 2020 14:43:40 -0700 Subject: [PATCH 185/218] Assign a random port when no ports are selected (#1444) 'select_test_ports' ends up being undefined in some cases when none of the test ports satisfy the condition (test_port_id % 15 == seed % 15) where seed is ansible_date_time['day'] With this change, a random port is selected and assigned to 'select_test_ports' if the above check fails to populate any ports Verified that failures are no longer seen with this change on the device with the particular seed value where it was failing earlier. Verified that this change is skipped when the ports are already populated Signed-off-by: Neetha John --- ansible/roles/test/tasks/pfc_wd.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ansible/roles/test/tasks/pfc_wd.yml b/ansible/roles/test/tasks/pfc_wd.yml index 030df4ebdc6..f9cae0fb8e3 100644 --- a/ansible/roles/test/tasks/pfc_wd.yml +++ b/ansible/roles/test/tasks/pfc_wd.yml @@ -107,6 +107,14 @@ with_dict: "{{test_ports}}" when: (item.value.test_port_id | int % 15) == (seed | int % 15) +- set_fact: + random_port: "{{ test_ports.keys()[0] }}" + when: select_test_ports is not defined + +- set_fact: + select_test_ports: "{{ select_test_ports | default({}) | combine({random_port : test_ports[random_port]}) | to_json }}" + when: select_test_ports is not defined + - debug: msg="{{select_test_ports}}" - name: Run default test if user has not specified warm reboot test From 289265fb0b62f1133d6063223d0b2529583ebc9f Mon Sep 17 00:00:00 2001 From: Neetha John Date: Fri, 13 Mar 2020 15:58:45 -0700 Subject: [PATCH 186/218] Add QOS parameters for Force10-S6000 (#1449) Signed-off-by: Neetha John --- ansible/vars/qos.yml | 148 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/ansible/vars/qos.yml b/ansible/vars/qos.yml index 49f0eb5a369..c3d767c2640 100644 --- a/ansible/vars/qos.yml +++ b/ansible/vars/qos.yml @@ -348,6 +348,154 @@ qos_params: pkts_num_trig_egr_drp: 31322 pkts_num_fill_egr_min: 8 cell_size: 208 + Force10-S6000: + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 48 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 48 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 48 + pkts_num_trig_pfc: 4898 + pkts_num_dismiss_pfc: 12 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_leak_out: 48 + pkts_num_trig_pfc: 4898 + pkts_num_dismiss_pfc: 12 + ecn_1: + dscp: 8 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 208 + ecn_2: + dscp: 8 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 208 + ecn_3: + dscp: 0 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 208 + ecn_4: + dscp: 0 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 208 + lossy_queue_1: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_leak_out: 48 + pkts_num_trig_egr_drp: 31322 + wrr: + ecn: 1 + q0_num_of_pkts: 140 + q1_num_of_pkts: 140 + q2_num_of_pkts: 140 + q3_num_of_pkts: 150 + q4_num_of_pkts: 150 + q5_num_of_pkts: 140 + q6_num_of_pkts: 140 + limit: 80 + pkts_num_leak_out: 48 + wrr_chg: + ecn: 1 + q0_num_of_pkts: 80 + q1_num_of_pkts: 80 + q2_num_of_pkts: 80 + q3_num_of_pkts: 300 + q4_num_of_pkts: 300 + q5_num_of_pkts: 80 + q6_num_of_pkts: 80 + limit: 80 + pkts_num_leak_out: 48 + lossy_weight: 8 + lossless_weight: 30 + wm_pg_shared_lossless: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 48 + pkts_num_fill_min: 6 + pkts_num_trig_pfc: 4898 + cell_size: 208 + wm_pg_shared_lossy: + dscp: 1 + ecn: 1 + pg: 0 + pkts_num_leak_out: 48 + pkts_num_fill_min: 0 + pkts_num_trig_egr_drp: 31322 + cell_size: 208 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_leak_out: 48 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + cell_size: 208 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_leak_out: 48 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 5164 + cell_size: 208 + wm_q_shared_lossy: + dscp: 1 + ecn: 1 + queue: 1 + pkts_num_leak_out: 48 + pkts_num_fill_min: 8 + pkts_num_trig_egr_drp: 31322 + cell_size: 208 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_leak_out: 48 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + pkts_num_fill_egr_min: 0 + cell_size: 208 + wm_buf_pool_lossy: + dscp: 8 + ecn: 1 + pg: 0 + queue: 0 + pkts_num_leak_out: 48 + pkts_num_fill_ingr_min: 0 + pkts_num_trig_egr_drp: 31322 + pkts_num_fill_egr_min: 8 + cell_size: 208 Force10-S6100: xoff_1: dscp: 3 From 600e59bbb1c4fc14e2517458973b1f11a2ddb0ef Mon Sep 17 00:00:00 2001 From: Neetha John Date: Fri, 13 Mar 2020 15:58:14 -0700 Subject: [PATCH 187/218] [qos] Fix script to accomodate the per-port buffer settings (#1450) - Buffer settings are now applied per port. Change the script to not match on multiple ports. - PTF portmap file is needed for all sku's to get the correct ptf port to interface mapping. Copy it to the ptf irrespective of sku type Signed-off-by: Neetha John --- ansible/roles/test/tasks/qos_get_ports.yml | 6 +++--- ansible/roles/test/tasks/qos_sai.yml | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/ansible/roles/test/tasks/qos_get_ports.yml b/ansible/roles/test/tasks/qos_get_ports.yml index 7e52ce4aaec..409d41f4e68 100644 --- a/ansible/roles/test/tasks/qos_get_ports.yml +++ b/ansible/roles/test/tasks/qos_get_ports.yml @@ -157,7 +157,7 @@ - include: roles/test/tasks/qos_get_max_buff_size.yml vars: target_table: 'BUFFER_PG' - target_port_name: "***{{dut_switch_ports[src_port_id|int]}}***" + target_port_name: "{{dut_switch_ports[src_port_id|int]}}" target_pg: '0' target_buffer_profile_type: 'ingress lossy' @@ -179,7 +179,7 @@ - include: roles/test/tasks/qos_get_max_buff_size.yml vars: target_table: 'BUFFER_QUEUE' - target_port_name: "***{{dut_switch_ports[src_port_id|int]}}***" + target_port_name: "{{dut_switch_ports[src_port_id|int]}}" target_pg: '3-4' target_buffer_profile_type: 'egress lossless' @@ -196,7 +196,7 @@ - include: roles/test/tasks/qos_get_max_buff_size.yml vars: target_table: 'BUFFER_QUEUE' - target_port_name: "***{{dut_switch_ports[src_port_id|int]}}***" + target_port_name: "{{dut_switch_ports[src_port_id|int]}}" target_pg: '0-2' target_buffer_profile_type: 'egress lossy' diff --git a/ansible/roles/test/tasks/qos_sai.yml b/ansible/roles/test/tasks/qos_sai.yml index ad3d74eb64c..ef938830405 100644 --- a/ansible/roles/test/tasks/qos_sai.yml +++ b/ansible/roles/test/tasks/qos_sai.yml @@ -68,9 +68,6 @@ - name: copy portmap copy: src={{ptf_portmap}} dest=/root delegate_to: "{{ptf_host}}" - when: minigraph_hwsku is defined and - (minigraph_hwsku in mellanox_hwskus or minigraph_hwsku in - ['Arista-7050-QX-32S', 'Arista-7060CX-32S-C32', 'Celestica-DX010-C32', 'Arista-7260CX3-D108C8', 'Force10-S6100', 'Arista-7260CX3-Q64']) - name: Init PTF base test parameters set_fact: From a1cc3a590327e156c48666fc089015aceda266e0 Mon Sep 17 00:00:00 2001 From: Tamer Ahmed Date: Wed, 8 Apr 2020 14:16:49 -0700 Subject: [PATCH 188/218] [advanced reboot] Add Paramiko module for device connection (#1542) (#1551) Parmiko module provides fallback mechanism to using username/password This is required if we are rebooting into new image using advanced reboot test fixture. signed-off-by: Tamer Ahmed --- .../test/files/ptftests/advanced-reboot.py | 16 +++-- .../test/files/ptftests/device_connection.py | 63 +++++++++++++++++++ ansible/roles/test/files/ptftests/sad_path.py | 49 ++++++--------- .../roles/test/tasks/ptf_runner_reboot.yml | 3 +- 4 files changed, 95 insertions(+), 36 deletions(-) create mode 100644 ansible/roles/test/files/ptftests/device_connection.py diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 9cd5c982c2e..4b34f57ed1b 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -57,12 +57,12 @@ import re from collections import defaultdict import json -import paramiko import Queue import pickle from operator import itemgetter import scapy.all as scapyall import itertools +from device_connection import DeviceConnection from arista import Arista import sad_path as sp @@ -125,6 +125,7 @@ def __init__(self): self.test_params = testutils.test_params_get() self.check_param('verbose', False, required=False) self.check_param('dut_username', '', required=True) + self.check_param('dut_password', '', required=True) self.check_param('dut_hostname', '', required=True) self.check_param('reboot_limit_in_seconds', 30, required=False) self.check_param('reboot_type', 'fast-reboot', required=False) @@ -210,6 +211,12 @@ def __init__(self): self.allow_vlan_flooding = bool(self.test_params['allow_vlan_flooding']) + self.dut_connection = DeviceConnection( + self.test_params['dut_hostname'], + self.test_params['dut_username'], + password=self.test_params['dut_password'] + ) + return def read_json(self, name): @@ -404,7 +411,7 @@ def get_sad_info(self): def init_sad_oper(self): if self.sad_oper: self.log("Preboot/Inboot Operations:") - self.sad_handle = sp.SadTest(self.sad_oper, self.ssh_targets, self.portchannel_ports, self.vm_dut_map, self.test_params, self.dut_ssh, self.vlan_ports) + self.sad_handle = sp.SadTest(self.sad_oper, self.ssh_targets, self.portchannel_ports, self.vm_dut_map, self.test_params, self.vlan_ports) (self.ssh_targets, self.portchannel_ports, self.neigh_vm, self.vlan_ports), (log_info, fails) = self.sad_handle.setup() self.populate_fail_info(fails) for log in log_info: @@ -473,7 +480,6 @@ def setUp(self): self.reboot_type = self.test_params['reboot_type'] if self.reboot_type not in ['fast-reboot', 'warm-reboot']: raise ValueError('Not supported reboot_type %s' % self.reboot_type) - self.dut_ssh = self.test_params['dut_username'] + '@' + self.test_params['dut_hostname'] self.dut_mac = self.test_params['dut_mac'] # get VM info @@ -499,7 +505,7 @@ def setUp(self): self.from_server_dst_ports = self.portchannel_ports self.log("Test params:") - self.log("DUT ssh: %s" % self.dut_ssh) + self.log("DUT ssh: %s@%s" % (self.test_params['dut_username'], self.test_params['dut_hostname'])) self.log("DUT reboot limit in seconds: %s" % self.limit) self.log("DUT mac address: %s" % self.dut_mac) @@ -989,7 +995,7 @@ def reboot_dut(self): time.sleep(self.reboot_delay) self.log("Rebooting remote side") - stdout, stderr, return_code = self.cmd(["ssh", "-oStrictHostKeyChecking=no", self.dut_ssh, "sudo " + self.reboot_type]) + stdout, stderr, return_code = self.dut_connection.execCommand("sudo " + self.reboot_type) if stdout != []: self.log("stdout from %s: %s" % (self.reboot_type, str(stdout))) if stderr != []: diff --git a/ansible/roles/test/files/ptftests/device_connection.py b/ansible/roles/test/files/ptftests/device_connection.py new file mode 100644 index 00000000000..a29ea493b06 --- /dev/null +++ b/ansible/roles/test/files/ptftests/device_connection.py @@ -0,0 +1,63 @@ +import paramiko +import logging +from paramiko.ssh_exception import BadHostKeyException, AuthenticationException, SSHException + +logger = logging.getLogger(__name__) + +DEFAULT_CMD_EXECUTION_TIMEOUT_SEC = 10 + +class DeviceConnection: + ''' + DeviceConnection uses Paramiko module to connect to devices + + Paramiko module uses fallback mechanism where it would first try to use + ssh key and that fails, it will attempt username/password combination + ''' + def __init__(self, hostname, username, password=None): + ''' + Class constructor + + @param hostname: hostname of device to connect to + @param username: username for device connection + @param password: password for device connection + ''' + self.hostname = hostname + self.username = username + self.password = password + + def execCommand(self, cmd, timeout=DEFAULT_CMD_EXECUTION_TIMEOUT_SEC): + ''' + Executes command on remote device + + @param cmd: command to be run on remote device + @param timeout: timeout for command run session + @return: stdout, stderr, value + stdout is a list of lines of the remote stdout gathered during command execution + stderr is a list of lines of the remote stderr gathered during command execution + value: 0 if command execution raised no exception + nonzero if exception is raised + ''' + client = paramiko.SSHClient() + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + if isinstance(cmd, list): + cmd = ' '.join(cmd) + + stdOut = stdErr = [] + retValue = 1 + try: + client.connect(self.hostname, username=self.username, password=self.password, allow_agent=False) + si, so, se = client.exec_command(cmd, timeout=timeout) + stdOut = so.readlines() + stdErr = se.readlines() + retValue = 0 + except SSHException as sshException: + logger.error('SSH Command failed with message: %s' % sshException) + except AuthenticationException as authenticationException: + logger.error('SSH Authentiaction failure with message: %s' % authenticationException) + except BadHostKeyException as badHostKeyException: + logger.error('SSH Authentiaction failure with message: %s' % badHostKeyException) + finally: + client.close() + + return stdOut, stdErr, retValue diff --git a/ansible/roles/test/files/ptftests/sad_path.py b/ansible/roles/test/files/ptftests/sad_path.py index 8fcb5b7db50..85e61d20e5a 100644 --- a/ansible/roles/test/files/ptftests/sad_path.py +++ b/ansible/roles/test/files/ptftests/sad_path.py @@ -1,25 +1,24 @@ import datetime import ipaddress import re -import subprocess import time from arista import Arista +from device_connection import DeviceConnection class SadTest(object): - def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh, vlan_ports): + def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, vlan_ports): self.oper_type = oper_type self.vm_list = vm_list self.portchannel_ports = portchannel_ports self.vm_dut_map = vm_dut_map self.test_args = test_args - self.dut_ssh = dut_ssh self.vlan_ports = vlan_ports self.fails_vm = set() self.fails_dut = set() self.log = [] - self.shandle = SadOper(self.oper_type, self.vm_list, self.portchannel_ports, self.vm_dut_map, self.test_args, self.dut_ssh, self.vlan_ports) + self.shandle = SadOper(self.oper_type, self.vm_list, self.portchannel_ports, self.vm_dut_map, self.test_args, self.vlan_ports) def setup(self): self.shandle.sad_setup(is_up=False) @@ -55,6 +54,7 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, self.portchannel_ports = portchannel_ports self.vm_dut_map = vm_dut_map self.test_args = test_args + self.dut_connection = DeviceConnection(test_args['dut_hostname'], test_args['dut_username'], password=test_args['dut_password']) self.vlan_ports = vlan_ports self.vlan_if_port = self.test_args['vlan_if_port'] self.neigh_vms = [] @@ -97,16 +97,6 @@ def extract_oper_info(self, oper_type): else: self.oper_type = oper_type - def cmd(self, cmds): - process = subprocess.Popen(cmds, - shell=False, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - stdout, stderr = process.communicate() - return_code = process.returncode - - return stdout, stderr, return_code - def select_vm(self): self.vm_list.sort() vm_len = len(self.vm_list) @@ -203,9 +193,8 @@ def retreive_logs(self): class SadOper(SadPath): - def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh, vlan_ports): + def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, vlan_ports): super(SadOper, self).__init__(oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, vlan_ports) - self.dut_ssh = dut_ssh self.dut_needed = dict() self.lag_members_down = dict() self.neigh_lag_members_down = dict() @@ -335,7 +324,7 @@ def get_bgp_route_cnt(self, is_up=True, v4=True): else: cmd = 'show ipv6 bgp summary | sed \'1,/Neighbor/d;/^$/,$d\' | sed \'s/\s\s*/ /g\' | cut -d\' \' -f 1,10' - stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, cmd]) + stdout, stderr, return_code = self.dut_connection.execCommand(cmd) if return_code != 0: self.fails['dut'].add('%s: Failed to retreive BGP route info from DUT' % self.msg_prefix[1 - is_up]) self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code)) @@ -345,15 +334,15 @@ def get_bgp_route_cnt(self, is_up=True, v4=True): def build_neigh_rt_map(self, neigh_rt_info): # construct neigh to route cnt map self.neigh_rt_map = dict() - for line in neigh_rt_info.strip().split('\n'): - key, value = line.split(' ') + for line in neigh_rt_info: + key, value = line.strip().split(' ') self.neigh_rt_map.update({key:value}) def verify_route_cnt(self, rt_incr, is_up=True, v4=True): neigh_rt_info, ret = self.get_bgp_route_cnt(is_up=is_up, v4=v4) if not ret: - for line in neigh_rt_info.strip().split('\n'): - neigh_ip, rt_cnt = line.split(' ') + for line in neigh_rt_info: + neigh_ip, rt_cnt = line.strip().split(' ') exp_cnt = int(self.neigh_rt_map[neigh_ip]) + rt_incr if int(rt_cnt) != exp_cnt: self.fails['dut'].add('%s: Route cnt incorrect for neighbor %s Expected: %d Obtained: %d' % (self.msg_prefix[is_up], neigh_ip, exp_cnt, int(rt_cnt))) @@ -386,7 +375,7 @@ def change_vlan_port_state(self, is_up=True): for intf, port in self.down_vlan_info: if not re.match('Ethernet\d+', intf): continue self.log.append('Changing state of %s from DUT side to %s' % (intf, state[is_up])) - stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], intf)]) + stdout, stderr, return_code = self.dut_connection.execCommand('sudo config interface %s %s' % (state[is_up], intf)) if return_code != 0: self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], intf)) self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code)) @@ -400,9 +389,9 @@ def verify_vlan_port_state(self, state='down', pre_check=True): # extract the admin status pat = re.compile('(\S+\s+){7}%s' % state) for intf, port in self.down_vlan_info: - stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'show interfaces status %s' % intf]) + stdout, stderr, return_code = self.dut_connection.execCommand('show interfaces status %s' % intf) if return_code == 0: - for line in stdout.split('\n'): + for line in stdout: if intf in line: is_match = pat.match(line.strip()) if is_match: @@ -426,7 +415,7 @@ def change_bgp_dut_state(self, is_up=True): continue self.log.append('Changing state of BGP peer %s from DUT side to %s' % (self.neigh_bgps[vm][key], state[is_up])) - stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config bgp %s neighbor %s' % (state[is_up], self.neigh_bgps[vm][key])]) + stdout, stderr, return_code = self.dut_connection.execCommand('sudo config bgp %s neighbor %s' % (state[is_up], self.neigh_bgps[vm][key])) if return_code != 0: self.fails['dut'].add('State change not successful from DUT side for peer %s' % self.neigh_bgps[vm][key]) self.fails['dut'].add('Return code: %d' % return_code) @@ -442,9 +431,9 @@ def verify_bgp_dut_state(self, state='Idle'): if key not in ['v4', 'v6']: continue self.log.append('Verifying if the DUT side BGP peer %s is %s' % (self.neigh_bgps[vm][key], states)) - stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'show ip bgp neighbor %s' % self.neigh_bgps[vm][key]]) + stdout, stderr, return_code = self.dut_connection.execCommand('show ip bgp neighbor %s' % self.neigh_bgps[vm][key]) if return_code == 0: - for line in stdout.split('\n'): + for line in stdout: if 'BGP state' in line: curr_state = re.findall('BGP state = (\w+)', line)[0] bgp_state[vm][key] = (curr_state in states) @@ -507,7 +496,7 @@ def change_dut_lag_state(self, is_up=True): for intf in down_intfs: if not re.match('(PortChannel|Ethernet)\d+', intf): continue self.log.append('Changing state of %s from DUT side to %s' % (intf, state[is_up])) - stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], intf)]) + stdout, stderr, return_code = self.dut_connection.execCommand('sudo config interface %s %s' % (state[is_up], intf)) if return_code != 0: self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], intf)) self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code)) @@ -549,9 +538,9 @@ def verify_dut_lag_state(self, pre_check=True): po_list.append(po_name) self.po_neigh_map[po_name] = self.neigh_names[vm] - stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'show interfaces portchannel']) + stdout, stderr, return_code = self.dut_connection.execCommand('show interfaces portchannel') if return_code == 0: - for line in stdout.split('\n'): + for line in stdout: for po_name in po_list: if po_name in line: is_match = pat.match(line) diff --git a/ansible/roles/test/tasks/ptf_runner_reboot.yml b/ansible/roles/test/tasks/ptf_runner_reboot.yml index 3026bdd3d63..6da5b330762 100644 --- a/ansible/roles/test/tasks/ptf_runner_reboot.yml +++ b/ansible/roles/test/tasks/ptf_runner_reboot.yml @@ -51,7 +51,8 @@ ptf_qlen: 1000 ptf_test_params: - verbose=False - - dut_username=\"{{ ansible_ssh_user }}\" + - dut_username=\"{{ sonicadmin_user }}\" + - dut_password=\"{{ sonicadmin_password }}\" - dut_hostname=\"{{ ansible_host }}\" - reboot_limit_in_seconds={{ reboot_limit }} - reboot_type=\"{{ reboot_type }}\" From e3d1946817cf0e92f5f58e43d60b0405b4eff261 Mon Sep 17 00:00:00 2001 From: Renuka Manavalan <47282725+renukamanavalan@users.noreply.github.com> Date: Wed, 8 Apr 2020 15:12:38 -0700 Subject: [PATCH 189/218] Configure and enable core uploader (#1522) * If core-storage secret key is available, add to /etc/sonic/core_analyzer.rc.json and enable & start core_uploader service If https_proxy is provided, update /etc/sonic/core_analyzer.rc.json. * Check the entire dict path before de-referencing. * Improved regex per comments. * Fixed syntax error. * Add a sample file for newly introduced ansible facts.wq * Removed a redundant empty line. Co-authored-by: Ubuntu --- ansible/config_sonic_basedon_testbed.yml | 43 ++++++++++++++++++++ ansible/group_vars/all/corefile_uploader.yml | 7 ++++ 2 files changed, 50 insertions(+) create mode 100644 ansible/group_vars/all/corefile_uploader.yml diff --git a/ansible/config_sonic_basedon_testbed.yml b/ansible/config_sonic_basedon_testbed.yml index 2a6ffeb2951..97a92d6720c 100644 --- a/ansible/config_sonic_basedon_testbed.yml +++ b/ansible/config_sonic_basedon_testbed.yml @@ -130,6 +130,49 @@ become: true when: stat_result.stat.exists is defined and stat_result.stat.exists + - name: Init account key and proxy + set_fact: + core_key: "" + core_proxy: "" + + - name: read account key + set_fact: + core_key: "{{ corefile_uploader['azure_sonic_core_storage']['account_key'] }}" + when: corefile_uploader['azure_sonic_core_storage']['account_key'] is defined + + - name: read https proxy + set_fact: + core_proxy: "{{ corefile_uploader['env']['https_proxy'] }}" + when: corefile_uploader['env']['https_proxy'] is defined + + - name: Put secret in core_analyzer.rc.json + lineinfile: + name: /etc/sonic/core_analyzer.rc.json + regexp: '(^.*)account_key' + line: '\1account_key": "{{ core_key }}",' + backrefs: yes + become: true + when: core_key != "" + + - name: Put https-proxy in core_analyzer.rc.json + lineinfile: + name: /etc/sonic/core_analyzer.rc.json + regexp: '(^.*)https_proxy' + line: '\1https_proxy": "{{ core_proxy }}"' + backrefs: yes + become: true + when: core_proxy != "" + + - name: enable core uploader service + become: true + command: systemctl enable core_uploader.service + when: core_key != "" + + - name: start core uploader service + become: true + command: systemctl start core_uploader.service + when: core_key != "" + - name: Replace snmp community string lineinfile: name: /etc/sonic/snmp.yml diff --git a/ansible/group_vars/all/corefile_uploader.yml b/ansible/group_vars/all/corefile_uploader.yml new file mode 100644 index 00000000000..c2c57b86d5d --- /dev/null +++ b/ansible/group_vars/all/corefile_uploader.yml @@ -0,0 +1,7 @@ +# Configure core file storage secret key and https-proxy as required +# +#corefile_uploader: +# azure_sonic_core_storage: +# account_key: "Your Secret" +# env: +# https_proxy: "http://10.10.10.10:8000" From d808e133dd8017ad67d2eb1c195c758b448a6e9a Mon Sep 17 00:00:00 2001 From: Tamer Ahmed Date: Thu, 9 Apr 2020 20:19:49 -0700 Subject: [PATCH 190/218] [ansible] Increase DUT reboot command timeout (#1558) 10 sec default timeout for some devices, is not enough to complete the reboot process. This PR increases the time out to reboot task timeout. signed-off-by: Tamer Ahmed --- ansible/roles/test/files/ptftests/advanced-reboot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 4b34f57ed1b..dd01345a2a9 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -995,7 +995,7 @@ def reboot_dut(self): time.sleep(self.reboot_delay) self.log("Rebooting remote side") - stdout, stderr, return_code = self.dut_connection.execCommand("sudo " + self.reboot_type) + stdout, stderr, return_code = self.dut_connection.execCommand("sudo " + self.reboot_type, timeout=self.task_timeout) if stdout != []: self.log("stdout from %s: %s" % (self.reboot_type, str(stdout))) if stderr != []: From 5687d8751aa2c65ea4702c5474215190c08d1b56 Mon Sep 17 00:00:00 2001 From: abdosi <58047199+abdosi@users.noreply.github.com> Date: Mon, 13 Apr 2020 15:38:39 -0700 Subject: [PATCH 191/218] changes to make PR#1561 compatible for 201811 (#1562) --- ansible/roles/test/tasks/qos_sai.yml | 37 +++++++++++++++++----------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/ansible/roles/test/tasks/qos_sai.yml b/ansible/roles/test/tasks/qos_sai.yml index ef938830405..eec83d76c79 100644 --- a/ansible/roles/test/tasks/qos_sai.yml +++ b/ansible/roles/test/tasks/qos_sai.yml @@ -31,16 +31,20 @@ - lldpd - lldp-syncd - - name: Disable bgpd + - name: Ensure BGP Daemon stopped become: yes - lineinfile: dest=/etc/quagga/daemons - regexp=^bgpd=.*$ - line='bgpd=no' - notify: - - Restart Quagga Daemon + supervisorctl: state=stopped name=bgpd vars: ansible_shell_type: docker ansible_python_interpreter: docker exec -i bgp python + + - name: Add iptables rule to drop BGP SYN Packet from peer so that we do not ACK back + shell: "iptables -A INPUT -j DROP -p tcp --destination-port bgp" + become: true + + - name: Add ip6tables rule to drop BGP SYN Packet from peer so that we do not ACK back + shell: "ip6tables -A INPUT -j DROP -p tcp --destination-port bgp" + become: true - meta: flush_handlers @@ -467,17 +471,22 @@ - lldpd - lldp-syncd - - name: Enable bgpd - become: yes - lineinfile: dest=/etc/quagga/daemons - regexp=^bgpd=.*$ - line='bgpd=yes' - notify: - - Restart Quagga Daemon + - name: Remove iptables rule to drop BGP SYN Packet from Peer + shell: "iptables -D INPUT -j DROP -p tcp --destination-port bgp" + become: true + + - name: Remove ip6tables rule to drop BGP SYN Packet from Peer + shell: "ip6tables -D INPUT -j DROP -p tcp --destination-port bgp" + become: true + + - name: Restore BGP daemon and docker + supervisorctl: state=started name=bgpd vars: ansible_shell_type: docker ansible_python_interpreter: docker exec -i bgp python - + notify: + - Restart Quagga Daemon + - name: Restore original watermark polling status shell: counterpoll watermark {{watermark_status.stdout}} when: watermark_status is defined and (watermark_status.stdout == "enable" or watermark_status.stdout == "disable") From dd1b923087ac8c1a4fd0375dbac59a6d3f43d254 Mon Sep 17 00:00:00 2001 From: Nazarii Hnydyn Date: Thu, 7 May 2020 09:15:45 +0300 Subject: [PATCH 192/218] [fast-reboot]: Fix IP range overlapping. (#1629) Signed-off-by: Nazarii Hnydyn --- ansible/roles/test/tasks/ptf_runner_reboot.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/tasks/ptf_runner_reboot.yml b/ansible/roles/test/tasks/ptf_runner_reboot.yml index 6da5b330762..166b7e2758b 100644 --- a/ansible/roles/test/tasks/ptf_runner_reboot.yml +++ b/ansible/roles/test/tasks/ptf_runner_reboot.yml @@ -61,7 +61,7 @@ - ports_file=\"/tmp/ports.json\" - dut_mac='{{ dut_mac }}' - dut_vlan_ip='192.168.0.1' - - default_ip_range='192.168.0.0/16' + - default_ip_range='192.168.100.0/18' - vlan_ip_range='{{ vlan_ip_range }}' - lo_v6_prefix='{{ lo_v6_prefix }}' - arista_vms=\"['{{ vm_hosts | list | join("','") }}']\" From 5e4994fbebe52e84610c0749755c26bf6b0a4578 Mon Sep 17 00:00:00 2001 From: Sujin Kang Date: Wed, 13 May 2020 23:28:02 -0700 Subject: [PATCH 193/218] Fix the platform_info test failure because of callback loader (#1667) --- tests/ansible_host.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/ansible_host.py b/tests/ansible_host.py index ff1f8ad79af..f9915ac474a 100644 --- a/tests/ansible_host.py +++ b/tests/ansible_host.py @@ -1,6 +1,10 @@ -from ansible.plugins import callback_loader from ansible.errors import AnsibleError +try: + from ansible.plugins import callback_loader +except ImportError: + from ansible.plugins.loader import callback_loader + def dump_ansible_results(results, stdout_callback='yaml'): cb = callback_loader.get(stdout_callback) return cb._dump_results(results) if cb else results From 3a87025e56a4a80278f5f2fb42f0afc815d4a592 Mon Sep 17 00:00:00 2001 From: Neetha John Date: Mon, 18 May 2020 10:23:41 -0700 Subject: [PATCH 194/218] Update Th buffer params for Alpha change (#1676) Signed-off-by: Neetha John --- ansible/vars/qos.yml | 78 ++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/ansible/vars/qos.yml b/ansible/vars/qos.yml index c3d767c2640..d3327783fbe 100644 --- a/ansible/vars/qos.yml +++ b/ansible/vars/qos.yml @@ -502,28 +502,28 @@ qos_params: ecn: 1 pg: 3 pkts_num_leak_out: 19 - pkts_num_trig_pfc: 1458 - pkts_num_trig_ingr_drp: 1979 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7063 xoff_2: dscp: 4 ecn: 1 pg: 4 pkts_num_leak_out: 19 - pkts_num_trig_pfc: 1458 - pkts_num_trig_ingr_drp: 1979 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7063 xon_1: dscp: 3 ecn: 1 pg: 3 pkts_num_leak_out: 19 - pkts_num_trig_pfc: 1458 + pkts_num_trig_pfc: 6542 pkts_num_dismiss_pfc: 11 xon_2: dscp: 4 ecn: 1 pg: 4 pkts_num_leak_out: 19 - pkts_num_trig_pfc: 1458 + pkts_num_trig_pfc: 6542 pkts_num_dismiss_pfc: 11 ecn_1: dscp: 8 @@ -591,7 +591,7 @@ qos_params: dst_port_id: 24 pgs_num: 10 pkts_num_leak_out: 19 - pkts_num_trig_pfc: 732 + pkts_num_trig_pfc: 1194 pkts_num_hdrm_full: 520 pkts_num_hdrm_partial: 361 wm_pg_shared_lossless: @@ -600,7 +600,7 @@ qos_params: pg: 3 pkts_num_leak_out: 19 pkts_num_fill_min: 6 - pkts_num_trig_pfc: 1458 + pkts_num_trig_pfc: 6542 cell_size: 208 wm_pg_shared_lossy: dscp: 8 @@ -615,8 +615,8 @@ qos_params: ecn: 1 pg: 3 pkts_num_leak_out: 19 - pkts_num_trig_pfc: 1458 - pkts_num_trig_ingr_drp: 1979 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7063 cell_size: 208 wm_q_shared_lossless: dscp: 3 @@ -624,7 +624,7 @@ qos_params: queue: 3 pkts_num_leak_out: 19 pkts_num_fill_min: 8 - pkts_num_trig_ingr_drp: 1979 + pkts_num_trig_ingr_drp: 7063 cell_size: 208 wm_q_shared_lossy: dscp: 8 @@ -641,8 +641,8 @@ qos_params: queue: 3 pkts_num_leak_out: 19 pkts_num_fill_ingr_min: 6 - pkts_num_trig_pfc: 1458 - pkts_num_trig_ingr_drp: 1979 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7063 pkts_num_fill_egr_min: 8 cell_size: 208 wm_buf_pool_lossy: @@ -661,28 +661,28 @@ qos_params: ecn: 1 pg: 3 pkts_num_leak_out: 36 - pkts_num_trig_pfc: 1458 - pkts_num_trig_ingr_drp: 2751 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7835 xoff_2: dscp: 4 ecn: 1 pg: 4 pkts_num_leak_out: 36 - pkts_num_trig_pfc: 1458 - pkts_num_trig_ingr_drp: 2751 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7835 xon_1: dscp: 3 ecn: 1 pg: 3 pkts_num_leak_out: 36 - pkts_num_trig_pfc: 1458 + pkts_num_trig_pfc: 6542 pkts_num_dismiss_pfc: 11 xon_2: dscp: 4 ecn: 1 pg: 4 pkts_num_leak_out: 36 - pkts_num_trig_pfc: 1458 + pkts_num_trig_pfc: 6542 pkts_num_dismiss_pfc: 11 ecn_1: dscp: 8 @@ -750,7 +750,7 @@ qos_params: dst_port_id: 16 pgs_num: 4 pkts_num_leak_out: 36 - pkts_num_trig_pfc: 1095 + pkts_num_trig_pfc: 2620 pkts_num_hdrm_full: 1292 pkts_num_hdrm_partial: 1165 wm_pg_shared_lossless: @@ -759,7 +759,7 @@ qos_params: pg: 3 pkts_num_leak_out: 36 pkts_num_fill_min: 6 - pkts_num_trig_pfc: 1458 + pkts_num_trig_pfc: 6542 cell_size: 208 wm_pg_shared_lossy: dscp: 8 @@ -774,8 +774,8 @@ qos_params: ecn: 1 pg: 3 pkts_num_leak_out: 36 - pkts_num_trig_pfc: 1458 - pkts_num_trig_ingr_drp: 2751 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7835 cell_size: 208 wm_q_shared_lossless: dscp: 3 @@ -783,7 +783,7 @@ qos_params: queue: 3 pkts_num_leak_out: 36 pkts_num_fill_min: 8 - pkts_num_trig_ingr_drp: 2751 + pkts_num_trig_ingr_drp: 7835 cell_size: 208 wm_q_shared_lossy: dscp: 8 @@ -800,8 +800,8 @@ qos_params: queue: 3 pkts_num_leak_out: 36 pkts_num_fill_ingr_min: 6 - pkts_num_trig_pfc: 1458 - pkts_num_trig_ingr_drp: 2751 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7835 pkts_num_fill_egr_min: 8 cell_size: 208 wm_buf_pool_lossy: @@ -820,28 +820,28 @@ qos_params: ecn: 1 pg: 3 pkts_num_leak_out: 36 - pkts_num_trig_pfc: 1458 - pkts_num_trig_ingr_drp: 2751 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7835 xoff_2: dscp: 4 ecn: 1 pg: 4 pkts_num_leak_out: 36 - pkts_num_trig_pfc: 1458 - pkts_num_trig_ingr_drp: 2751 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7835 xon_1: dscp: 3 ecn: 1 pg: 3 pkts_num_leak_out: 36 - pkts_num_trig_pfc: 1458 + pkts_num_trig_pfc: 6542 pkts_num_dismiss_pfc: 11 xon_2: dscp: 4 ecn: 1 pg: 4 pkts_num_leak_out: 36 - pkts_num_trig_pfc: 1458 + pkts_num_trig_pfc: 6542 pkts_num_dismiss_pfc: 11 ecn_1: dscp: 8 @@ -909,7 +909,7 @@ qos_params: dst_port_id: 16 pgs_num: 4 pkts_num_leak_out: 36 - pkts_num_trig_pfc: 1095 + pkts_num_trig_pfc: 2620 pkts_num_hdrm_full: 1292 pkts_num_hdrm_partial: 1165 wm_pg_shared_lossless: @@ -918,7 +918,7 @@ qos_params: pg: 3 pkts_num_leak_out: 36 pkts_num_fill_min: 6 - pkts_num_trig_pfc: 1458 + pkts_num_trig_pfc: 6542 cell_size: 208 wm_pg_shared_lossy: dscp: 8 @@ -933,8 +933,8 @@ qos_params: ecn: 1 pg: 3 pkts_num_leak_out: 36 - pkts_num_trig_pfc: 1458 - pkts_num_trig_ingr_drp: 2751 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7835 cell_size: 208 wm_q_shared_lossless: dscp: 3 @@ -942,7 +942,7 @@ qos_params: queue: 3 pkts_num_leak_out: 36 pkts_num_fill_min: 8 - pkts_num_trig_ingr_drp: 2751 + pkts_num_trig_ingr_drp: 7835 cell_size: 208 wm_q_shared_lossy: dscp: 8 @@ -959,8 +959,8 @@ qos_params: queue: 3 pkts_num_leak_out: 36 pkts_num_fill_ingr_min: 6 - pkts_num_trig_pfc: 1458 - pkts_num_trig_ingr_drp: 2751 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7835 pkts_num_fill_egr_min: 8 cell_size: 208 wm_buf_pool_lossy: From 486047c404b7206ac35a5c2ac082d9c3d441a290 Mon Sep 17 00:00:00 2001 From: abdosi <58047199+abdosi@users.noreply.github.com> Date: Sat, 30 May 2020 17:52:46 -0700 Subject: [PATCH 195/218] Fix for Issue https://github.com/Azure/sonic-buildimage/pull/4530 (#1708) make sure only syncd is match on pgrep --- ansible/roles/test/tasks/restart_syncd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/tasks/restart_syncd.yml b/ansible/roles/test/tasks/restart_syncd.yml index 1f6b48735fe..c47d144cb9a 100644 --- a/ansible/roles/test/tasks/restart_syncd.yml +++ b/ansible/roles/test/tasks/restart_syncd.yml @@ -15,7 +15,7 @@ seconds: 10 - name: Look for syncd process - shell: pgrep "\" -a + shell: pgrep syncd -a -x register: syncd_out ignore_errors: yes From 11717fd2f45b45b9f37e9655360cec1d4967f8b1 Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Tue, 2 Jun 2020 11:24:36 -0700 Subject: [PATCH 196/218] [sonic image installer] remove config_db.json before booting into new image (#1722) When minigraph.xml exists, config_db.json can be generated from it. In this case, before booting into an new image, remove the config_db.json from /host/old_config to force the new image to load minigraph. This is needed when nightly testbed is moving from an higer version to a lower version. Signed-off-by: Ying Xie --- ansible/library/reduce_and_add_sonic_images.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ansible/library/reduce_and_add_sonic_images.py b/ansible/library/reduce_and_add_sonic_images.py index 83924a7a9a9..5e2d134f266 100644 --- a/ansible/library/reduce_and_add_sonic_images.py +++ b/ansible/library/reduce_and_add_sonic_images.py @@ -23,6 +23,7 @@ ''' import sys +from os import path from ansible.module_utils.basic import * def exec_command(module, cmd, ignore_error=False, msg="executing command"): @@ -74,6 +75,12 @@ def install_new_sonic_image(module, new_image_url): exec_command(module, cmd="umount /tmp/tmpfs", ignore_error=True) exec_command(module, cmd="rm -rf /tmp/tmpfs", ignore_error=True) + # If sonic device is configured with minigraph, remove config_db.json + # to force next image to load minigraph. + if path.exists("/host/old_config/minigraph.xml"): + exec_command(module, + cmd="rm /host/old_config/config_db.json", + msg="Remove config_db.json in preference of minigraph.xml") def main(): module = AnsibleModule( From f887ff5f53f49ffffe8b9cf803405219b790bfc2 Mon Sep 17 00:00:00 2001 From: Renuka Manavalan <47282725+renukamanavalan@users.noreply.github.com> Date: Tue, 2 Jun 2020 18:54:05 -0700 Subject: [PATCH 197/218] Old SONiC images may not have core_analyzer.rc.json file. (#1729) Hence add a check. Apparently fastreboot test from an old image failed due to this file missing. --- ansible/config_sonic_basedon_testbed.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ansible/config_sonic_basedon_testbed.yml b/ansible/config_sonic_basedon_testbed.yml index 97a92d6720c..e075416395b 100644 --- a/ansible/config_sonic_basedon_testbed.yml +++ b/ansible/config_sonic_basedon_testbed.yml @@ -135,15 +135,20 @@ core_key: "" core_proxy: "" + - name: Test if core_analyzer.rc.json exists + stat: + path: /etc/sonic/core_analyzer.rc.json + register: rc_stat + - name: read account key set_fact: core_key: "{{ corefile_uploader['azure_sonic_core_storage']['account_key'] }}" - when: corefile_uploader['azure_sonic_core_storage']['account_key'] is defined + when: rc_stat.stat.exists is defined and rc_stat.stat.exists and corefile_uploader['azure_sonic_core_storage']['account_key'] is defined - name: read https proxy set_fact: core_proxy: "{{ corefile_uploader['env']['https_proxy'] }}" - when: corefile_uploader['env']['https_proxy'] is defined + when: rc_stat.stat.exists is defined and rc_stat.stat.exists and corefile_uploader['env']['https_proxy'] is defined - name: Put secret in core_analyzer.rc.json lineinfile: From 253e225ffccdf4deb632f18aac800d359072d2e4 Mon Sep 17 00:00:00 2001 From: Neetha John Date: Fri, 5 Jun 2020 18:32:30 -0700 Subject: [PATCH 198/218] Qos SAI test restructure (#1682) (#1726) * Qos SAI test restructure Signed-off-by: Neetha John --- ansible/group_vars/sonic/vars | 9 +- ansible/roles/test/tasks/qos_get_ports.yml | 4 + ansible/roles/test/tasks/qos_sai.yml | 146 ++- ansible/vars/qos.yml | 1301 ++++++-------------- 4 files changed, 446 insertions(+), 1014 deletions(-) diff --git a/ansible/group_vars/sonic/vars b/ansible/group_vars/sonic/vars index cbd819c150b..12383f46c76 100644 --- a/ansible/group_vars/sonic/vars +++ b/ansible/group_vars/sonic/vars @@ -4,7 +4,14 @@ sonic_version: "v2" broadcom_hwskus: [ "Force10-S6000", "Accton-AS7712-32X", "Celestica-DX010-C32", "Seastone-DX010", "Celestica-E1031-T48S4"] -mellanox_hwskus: [ 'ACS-MSN2700', 'Mellanox-SN2700', 'ACS-MSN2740', 'ACS-MSN2100', 'ACS-MSN2410', 'ACS-MSN2010' ] +broadcom_td2_hwskus: ['Force10-S6000', 'Force10-S6000-Q24S32', 'Arista-7050-QX32', 'Arista-7050-QX-32S'] +broadcom_th_hwskus: ['Force10-S6100', 'Arista-7060CX-32S-C32', 'Arista-7060CX-32S-C32-T1', 'Arista-7060CX-32S-D48C8', 'Celestica-DX010-C32', "Seastone-DX010" ] +broadcom_th2_hwskus: ['Arista-7260CX3-D108C8', 'Arista-7260CX3-C64', 'Arista-7260CX3-Q64'] + +mellanox_spc1_hwskus: [ 'ACS-MSN2700', 'ACS-MSN2740', 'ACS-MSN2100', 'ACS-MSN2410', 'ACS-MSN2010', 'Mellanox-SN2700', 'Mellanox-SN2700-D48C8' ] +mellanox_spc2_hwskus: [ 'ACS-MSN3700', 'ACS-MSN3700C', 'ACS-MSN3800', 'Mellanox-SN3800-D112C8' ] +mellanox_spc3_hwskus: [ 'ACS-MSN4700' ] +mellanox_hwskus: "{{ mellanox_spc1_hwskus + mellanox_spc2_hwskus + mellanox_spc3_hwskus }}" cavium_hwskus: [ "AS7512", "XP-SIM" ] diff --git a/ansible/roles/test/tasks/qos_get_ports.yml b/ansible/roles/test/tasks/qos_get_ports.yml index 409d41f4e68..66bf33e90fe 100644 --- a/ansible/roles/test/tasks/qos_get_ports.yml +++ b/ansible/roles/test/tasks/qos_get_ports.yml @@ -143,6 +143,10 @@ target_pg: '3-4' target_buffer_profile_type: 'ingress lossless' +- name: Set lossless buffer profile + set_fact: + lossless_buffer_profile: "{{ buffer_profile }}" + - name: Set lossless MAX buffer size set_fact: lossless_buffer_max_size: "{{buffer_headroom.stdout|int}}" diff --git a/ansible/roles/test/tasks/qos_sai.yml b/ansible/roles/test/tasks/qos_sai.yml index eec83d76c79..7e550af196e 100644 --- a/ansible/roles/test/tasks/qos_sai.yml +++ b/ansible/roles/test/tasks/qos_sai.yml @@ -14,12 +14,36 @@ minigraph_facts: host={{inventory_hostname}} become: no + - name: Get ports info. + include: roles/test/tasks/qos_get_ports.yml + + - name: Check if lossless buffer profile is derived + fail: msg="Lossless Buffer profile could not be retreived" + when: lossless_buffer_profile is not defined or minigraph_hwsku is not defined + + - set_fact: + defined_asic_list: ['td2', 'th', 'th2', 'spc1', 'spc2', 'spc3'] + speed_cablelen: "{{ lossless_buffer_profile }}" + + - set_fact: speed_cablelen="{{speed_cablelen | regex_replace('BUFFER_PROFILE\|pg_lossless_(.*)_profile', '\\1')}}" + + - name: Get asic type + set_fact: asic_type="{{ item }}" + when: + - hostvars[inventory_hostname][sonic_asic_type + '_' + item + '_hwskus'] is defined + - minigraph_hwsku in hostvars[inventory_hostname][sonic_asic_type + '_' + item + '_hwskus'] + with_items: "{{ defined_asic_list }}" + + - debug: msg="asic type is {{ asic_type }}, portspeed_cablelen is {{ speed_cablelen }}" + - name: check if the device has configured qos parameters fail: msg="device doesn't have configured qos parameters" - when: minigraph_hwsku is not defined or qos_params[minigraph_hwsku] is not defined + when: qos_params[asic_type] is not defined or qos_params[asic_type][speed_cablelen] is not defined - name: set qos parameters for the device - set_fact: qp={{qos_params[minigraph_hwsku]}} + set_fact: + qp: "{{qos_params[asic_type]}}" + qp_sc: "{{qos_params[asic_type][speed_cablelen]}}" - name: Ensure LLDP Daemon stopped become: yes @@ -48,14 +72,17 @@ - meta: flush_handlers - - block: - - name: Deploy script to DUT/syncd - copy: src=roles/test/files/mlnx/packets_aging.py dest=/root/packets_aging.py + - name: Deploy script to DUT/syncd + copy: src=roles/test/files/mlnx/packets_aging.py dest=/root/packets_aging.py + vars: + ansible_shell_type: docker + ansible_python_interpreter: docker exec -i syncd python + when: minigraph_hwsku is defined and minigraph_hwsku in mellanox_hwskus - - name: Disable Mellanox packet aging - shell: python /root/packets_aging.py disable - register: result - failed_when: result.stderr != '' + - name: Disable Mellanox packet aging + shell: python /root/packets_aging.py disable + register: result + failed_when: result.stderr != '' vars: ansible_shell_type: docker ansible_python_interpreter: docker exec -i syncd python @@ -81,9 +108,6 @@ - port_map_file='/root/{{ptf_portmap | basename}}' - sonic_asic_type='{{sonic_asic_type}}' - - name: Get ports info. - include: roles/test/tasks/qos_get_ports.yml - # Unpause all paused port - include: qos_sai_ptf.yml vars: @@ -118,39 +142,39 @@ # XOFF limit - include: qos_sai_ptf.yml vars: - test_name: xoff limit ptf test dscp = {{qp.xoff_1.dscp}}, ecn = {{qp.xoff_1.ecn}} + test_name: xoff limit ptf test dscp = {{qp_sc.xoff_1.dscp}}, ecn = {{qp_sc.xoff_1.ecn}} test_path: sai_qos_tests.PFCtest test_params: - - dscp='{{qp.xoff_1.dscp}}' - - ecn='{{qp.xoff_1.ecn}}' - - pg='{{qp.xoff_1.pg}}' + - dscp='{{qp_sc.xoff_1.dscp}}' + - ecn='{{qp_sc.xoff_1.ecn}}' + - pg='{{qp_sc.xoff_1.pg}}' - buffer_max_size='{{lossless_buffer_max_size|int}}' - queue_max_size='{{lossless_queue_max_size|int}}' - dst_port_id='{{dst_port_id}}' - dst_port_ip='{{dst_port_ip}}' - src_port_id='{{src_port_id}}' - src_port_ip='{{src_port_ip}}' - - pkts_num_leak_out='{{qp.xoff_1.pkts_num_leak_out}}' - - pkts_num_trig_pfc='{{qp.xoff_1.pkts_num_trig_pfc}}' - - pkts_num_trig_ingr_drp='{{qp.xoff_1.pkts_num_trig_ingr_drp}}' + - pkts_num_leak_out='{{qp_sc.pkts_num_leak_out}}' + - pkts_num_trig_pfc='{{qp_sc.xoff_1.pkts_num_trig_pfc}}' + - pkts_num_trig_ingr_drp='{{qp_sc.xoff_1.pkts_num_trig_ingr_drp}}' - include: qos_sai_ptf.yml vars: - test_name: xoff limit ptf test dscp = {{qp.xoff_2.dscp}}, ecn = {{qp.xoff_2.ecn}} + test_name: xoff limit ptf test dscp = {{qp_sc.xoff_2.dscp}}, ecn = {{qp_sc.xoff_2.ecn}} test_path: sai_qos_tests.PFCtest test_params: - - dscp='{{qp.xoff_2.dscp}}' - - ecn='{{qp.xoff_2.ecn}}' - - pg='{{qp.xoff_2.pg}}' + - dscp='{{qp_sc.xoff_2.dscp}}' + - ecn='{{qp_sc.xoff_2.ecn}}' + - pg='{{qp_sc.xoff_2.pg}}' - buffer_max_size='{{lossless_buffer_max_size|int}}' - queue_max_size='{{lossless_queue_max_size|int}}' - dst_port_id='{{dst_port_id}}' - dst_port_ip='{{dst_port_ip}}' - src_port_id='{{src_port_id}}' - src_port_ip='{{src_port_ip}}' - - pkts_num_leak_out='{{qp.xoff_2.pkts_num_leak_out}}' - - pkts_num_trig_pfc='{{qp.xoff_2.pkts_num_trig_pfc}}' - - pkts_num_trig_ingr_drp='{{qp.xoff_2.pkts_num_trig_ingr_drp}}' + - pkts_num_leak_out='{{qp_sc.pkts_num_leak_out}}' + - pkts_num_trig_pfc='{{qp_sc.xoff_2.pkts_num_trig_pfc}}' + - pkts_num_trig_ingr_drp='{{qp_sc.xoff_2.pkts_num_trig_ingr_drp}}' # XON limit - include: qos_sai_ptf.yml @@ -170,7 +194,7 @@ - dst_port_3_ip='{{dst_port_3_ip}}' - src_port_id='{{src_port_id}}' - src_port_ip='{{src_port_ip}}' - - pkts_num_leak_out='{{qp.xon_1.pkts_num_leak_out}}' + - pkts_num_leak_out='{{qp_sc.pkts_num_leak_out}}' - pkts_num_trig_pfc='{{qp.xon_1.pkts_num_trig_pfc}}' - pkts_num_dismiss_pfc='{{qp.xon_1.pkts_num_dismiss_pfc}}' @@ -191,29 +215,29 @@ - dst_port_3_ip='{{dst_port_3_ip}}' - src_port_id='{{src_port_id}}' - src_port_ip='{{src_port_ip}}' - - pkts_num_leak_out='{{qp.xon_2.pkts_num_leak_out}}' + - pkts_num_leak_out='{{qp_sc.pkts_num_leak_out}}' - pkts_num_trig_pfc='{{qp.xon_2.pkts_num_trig_pfc}}' - pkts_num_dismiss_pfc='{{qp.xon_2.pkts_num_dismiss_pfc}}' # Headroom pool size - include: qos_sai_ptf.yml vars: - test_name: headroom pool size ptf test ecn = {{qp.hdrm_pool_size.ecn}} + test_name: headroom pool size ptf test ecn = {{qp_sc.hdrm_pool_size.ecn}} test_path: sai_qos_tests.HdrmPoolSizeTest test_params: - testbed_type='{{testbed_type}}' - - dscps={{qp.hdrm_pool_size.dscps}} - - ecn={{qp.hdrm_pool_size.ecn}} - - pgs={{qp.hdrm_pool_size.pgs}} - - src_port_ids={{qp.hdrm_pool_size.src_port_ids}} - - src_port_ips=[{% for pid in qp.hdrm_pool_size.src_port_ids %}{% if not loop.last %}'{{testing_ports_ip[pid|string]}}', {% else %}'{{testing_ports_ip[pid|string]}}'{% endif %}{% endfor %}] - - dst_port_id={{qp.hdrm_pool_size.dst_port_id}} - - dst_port_ip='{{testing_ports_ip[qp.hdrm_pool_size.dst_port_id|string]}}' - - pgs_num={{qp.hdrm_pool_size.pgs_num }} - - pkts_num_leak_out={{qp.hdrm_pool_size.pkts_num_leak_out}} - - pkts_num_trig_pfc={{qp.hdrm_pool_size.pkts_num_trig_pfc}} - - pkts_num_hdrm_full={{qp.hdrm_pool_size.pkts_num_hdrm_full}} - - pkts_num_hdrm_partial={{qp.hdrm_pool_size.pkts_num_hdrm_partial}} + - dscps={{qp_sc.hdrm_pool_size.dscps}} + - ecn={{qp_sc.hdrm_pool_size.ecn}} + - pgs={{qp_sc.hdrm_pool_size.pgs}} + - src_port_ids={{qp_sc.hdrm_pool_size.src_port_ids}} + - src_port_ips=[{% for pid in qp_sc.hdrm_pool_size.src_port_ids %}{% if not loop.last %}'{{testing_ports_ip[pid|string]}}', {% else %}'{{testing_ports_ip[pid|string]}}'{% endif %}{% endfor %}] + - dst_port_id={{qp_sc.hdrm_pool_size.dst_port_id}} + - dst_port_ip='{{testing_ports_ip[qp_sc.hdrm_pool_size.dst_port_id|string]}}' + - pgs_num={{qp_sc.hdrm_pool_size.pgs_num }} + - pkts_num_leak_out={{qp_sc.pkts_num_leak_out}} + - pkts_num_trig_pfc={{qp_sc.hdrm_pool_size.pkts_num_trig_pfc}} + - pkts_num_hdrm_full={{qp_sc.hdrm_pool_size.pkts_num_hdrm_full}} + - pkts_num_hdrm_partial={{qp_sc.hdrm_pool_size.pkts_num_hdrm_partial}} when: minigraph_hwsku is defined and minigraph_hwsku in ['Arista-7060CX-32S-C32', 'Celestica-DX010-C32', 'Arista-7260CX3-D108C8', 'Force10-S6100', 'Arista-7260CX3-Q64'] @@ -234,7 +258,7 @@ - dst_port_2_ip='{{dst_port_2_ip}}' - src_port_id='{{src_port_id}}' - src_port_ip='{{src_port_ip}}' - - pkts_num_leak_out='{{qp.lossy_queue_1.pkts_num_leak_out}}' + - pkts_num_leak_out='{{qp_sc.pkts_num_leak_out}}' - pkts_num_trig_egr_drp='{{qp.lossy_queue_1.pkts_num_trig_egr_drp}}' # DSCP to queue mapping @@ -267,7 +291,7 @@ - q5_num_of_pkts='{{qp.wrr.q5_num_of_pkts}}' - q6_num_of_pkts='{{qp.wrr.q6_num_of_pkts}}' - limit='{{qp.wrr.limit}}' - - pkts_num_leak_out='{{qp.wrr.pkts_num_leak_out}}' + - pkts_num_leak_out='{{qp_sc.pkts_num_leak_out}}' - debug: var: out.stdout_lines @@ -289,7 +313,7 @@ - dst_port_ip='{{dst_port_ip}}' - src_port_id='{{src_port_id}}' - src_port_ip='{{src_port_ip}}' - - pkts_num_leak_out='{{qp.wm_pg_shared_lossless.pkts_num_leak_out}}' + - pkts_num_leak_out='{{qp_sc.pkts_num_leak_out}}' - pkts_num_fill_min='{{qp.wm_pg_shared_lossless.pkts_num_fill_min}}' - pkts_num_fill_shared='{{qp.wm_pg_shared_lossless.pkts_num_trig_pfc}}' - cell_size='{{qp.wm_pg_shared_lossless.cell_size}}' @@ -318,7 +342,7 @@ - dst_port_ip='{{dst_port_ip}}' - src_port_id='{{src_port_id}}' - src_port_ip='{{src_port_ip}}' - - pkts_num_leak_out='{{qp.wm_pg_shared_lossy.pkts_num_leak_out}}' + - pkts_num_leak_out='{{qp_sc.pkts_num_leak_out}}' - pkts_num_fill_min='{{qp.wm_pg_shared_lossy.pkts_num_fill_min}}' - pkts_num_fill_shared='{{qp.wm_pg_shared_lossy.pkts_num_trig_egr_drp|int - 1}}' - cell_size='{{qp.wm_pg_shared_lossy.cell_size}}' @@ -340,17 +364,17 @@ test_name: PG headroom watermark test test_path: sai_qos_tests.PGHeadroomWatermarkTest test_params: - - dscp='{{qp.wm_pg_headroom.dscp}}' - - ecn='{{qp.wm_pg_headroom.ecn}}' - - pg='{{qp.wm_pg_headroom.pg}}' + - dscp='{{qp_sc.wm_pg_headroom.dscp}}' + - ecn='{{qp_sc.wm_pg_headroom.ecn}}' + - pg='{{qp_sc.wm_pg_headroom.pg}}' - dst_port_id='{{dst_port_id}}' - dst_port_ip='{{dst_port_ip}}' - src_port_id='{{src_port_id}}' - src_port_ip='{{src_port_ip}}' - - pkts_num_leak_out='{{qp.wm_pg_headroom.pkts_num_leak_out}}' - - pkts_num_trig_pfc='{{qp.wm_pg_headroom.pkts_num_trig_pfc}}' - - pkts_num_trig_ingr_drp='{{qp.wm_pg_headroom.pkts_num_trig_ingr_drp}}' - - cell_size='{{qp.wm_pg_headroom.cell_size}}' + - pkts_num_leak_out='{{qp_sc.pkts_num_leak_out}}' + - pkts_num_trig_pfc='{{qp_sc.wm_pg_headroom.pkts_num_trig_pfc}}' + - pkts_num_trig_ingr_drp='{{qp_sc.wm_pg_headroom.pkts_num_trig_ingr_drp}}' + - cell_size='{{qp_sc.wm_pg_headroom.cell_size}}' - debug: var: out.stdout_lines @@ -365,17 +389,17 @@ test_name: Queue shared watermark test, lossless traffic test_path: sai_qos_tests.QSharedWatermarkTest test_params: - - dscp='{{qp.wm_q_shared_lossless.dscp}}' - - ecn='{{qp.wm_q_shared_lossless.ecn}}' - - queue='{{qp.wm_q_shared_lossless.queue}}' + - dscp='{{qp_sc.wm_q_shared_lossless.dscp}}' + - ecn='{{qp_sc.wm_q_shared_lossless.ecn}}' + - queue='{{qp_sc.wm_q_shared_lossless.queue}}' - dst_port_id='{{dst_port_id}}' - dst_port_ip='{{dst_port_ip}}' - src_port_id='{{src_port_id}}' - src_port_ip='{{src_port_ip}}' - - pkts_num_leak_out='{{qp.wm_q_shared_lossless.pkts_num_leak_out}}' - - pkts_num_fill_min='{{qp.wm_q_shared_lossless.pkts_num_fill_min}}' - - pkts_num_trig_drp='{{qp.wm_q_shared_lossless.pkts_num_trig_ingr_drp}}' - - cell_size='{{qp.wm_q_shared_lossless.cell_size}}' + - pkts_num_leak_out='{{qp_sc.pkts_num_leak_out}}' + - pkts_num_fill_min='{{qp_sc.wm_q_shared_lossless.pkts_num_fill_min}}' + - pkts_num_trig_drp='{{qp_sc.wm_q_shared_lossless.pkts_num_trig_ingr_drp}}' + - cell_size='{{qp_sc.wm_q_shared_lossless.cell_size}}' - debug: var: out.stdout_lines @@ -397,7 +421,7 @@ - dst_port_ip='{{dst_port_ip}}' - src_port_id='{{src_port_id}}' - src_port_ip='{{src_port_ip}}' - - pkts_num_leak_out='{{qp.wm_q_shared_lossy.pkts_num_leak_out}}' + - pkts_num_leak_out='{{qp_sc.pkts_num_leak_out}}' - pkts_num_fill_min='{{qp.wm_q_shared_lossy.pkts_num_fill_min}}' - pkts_num_trig_drp='{{qp.wm_q_shared_lossy.pkts_num_trig_egr_drp}}' - cell_size='{{qp.wm_q_shared_lossy.cell_size}}' @@ -449,7 +473,7 @@ - q5_num_of_pkts='{{qp.wrr_chg.q5_num_of_pkts}}' - q6_num_of_pkts='{{qp.wrr_chg.q6_num_of_pkts}}' - limit='{{qp.wrr_chg.limit}}' - - pkts_num_leak_out='{{qp.wrr_chg.pkts_num_leak_out}}' + - pkts_num_leak_out='{{qp_sc.pkts_num_leak_out}}' - debug: var: out.stdout_lines diff --git a/ansible/vars/qos.yml b/ansible/vars/qos.yml index d3327783fbe..b1ab9c39d15 100644 --- a/ansible/vars/qos.yml +++ b/ansible/vars/qos.yml @@ -17,91 +17,47 @@ # xoff_1 for 50G # xoff_2 for 100G qos_params: - ACS-MSN2700: - xoff_1: - dscp: 3 - ecn: 1 - pg: 3 - xoff_2: - dscp: 4 - ecn: 1 - pg: 4 - xon_1: - dscp: 3 - ecn: 1 - pg: 3 - xon_2: - dscp: 4 - ecn: 1 - pg: 4 - ecn_1: - dscp: 8 - ecn: 0 - num_of_pkts: 5000 - limit: 182000 - min_limit: 180000 - cell_size: 96 - ecn_2: - dscp: 8 - ecn: 1 - num_of_pkts: 2047 - limit: 182320 - min_limit: 0 - cell_size: 96 - ecn_3: - dscp: 0 - ecn: 0 - num_of_pkts: 5000 - limit: 182000 - min_limit: 180000 - cell_size: 96 - ecn_4: - dscp: 0 - ecn: 1 - num_of_pkts: 2047 - limit: 182320 - min_limit: 0 - cell_size: 96 - lossy_queue: - dscp: 8 - ecn: 1 - pg: 1 - wrr: - ecn: 1 - q0_num_of_pkts: 600 - q1_num_of_pkts: 400 - q3_num_of_pkts: 500 - q4_num_of_pkts: 500 - limit: 80 - Mellanox-SN2700: - xoff_1: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 11115 - pkts_num_trig_ingr_drp: 11213 - xoff_2: - dscp: 4 - ecn: 1 - pg: 4 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 11115 - pkts_num_trig_ingr_drp: 11213 + spc1: + 40000_5m: + pkts_num_leak_out: 0 + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 22038 + pkts_num_trig_ingr_drp: 22115 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 22038 + pkts_num_trig_ingr_drp: 22115 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 22038 + pkts_num_trig_ingr_drp: 22115 + cell_size: 96 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 22115 + cell_size: 96 xon_1: dscp: 3 ecn: 1 pg: 3 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 11115 - pkts_num_dismiss_pfc: 10924 + pkts_num_trig_pfc: 22038 + pkts_num_dismiss_pfc: 21847 xon_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 11115 - pkts_num_dismiss_pfc: 10924 + pkts_num_trig_pfc: 22038 + pkts_num_dismiss_pfc: 21847 ecn_1: dscp: 8 ecn: 0 @@ -130,69 +86,32 @@ qos_params: limit: 182320 min_limit: 0 cell_size: 96 - lossy_queue: + lossy_queue_1: dscp: 8 ecn: 1 - pg: 1 - pkts_num_leak_out: 0 - pkts_num_trig_egr_drp: 48547 - wrr: - ecn: 1 - q0_num_of_pkts: 600 - q1_num_of_pkts: 400 - q3_num_of_pkts: 500 - q4_num_of_pkts: 500 - limit: 80 - pkts_num_leak_out: 0 - ACS-MSN2740: - xoff_1: - dscp: 3 - ecn: 1 - pg: 3 - xoff_2: - dscp: 4 - ecn: 1 - pg: 4 - xon_1: + pg: 0 + pkts_num_trig_egr_drp: 67965 + wm_pg_shared_lossless: dscp: 3 ecn: 1 pg: 3 - xon_2: - dscp: 4 - ecn: 1 - pg: 4 - ecn_1: - dscp: 8 - ecn: 0 - num_of_pkts: 5000 - limit: 182000 - min_limit: 180000 + pkts_num_fill_min: 6 + pkts_num_trig_pfc: 22038 cell_size: 96 - ecn_2: - dscp: 8 + wm_pg_shared_lossy: + dscp: 1 ecn: 1 - num_of_pkts: 2047 - limit: 182320 - min_limit: 0 - cell_size: 96 - ecn_3: - dscp: 0 - ecn: 0 - num_of_pkts: 5000 - limit: 182000 - min_limit: 180000 + pg: 0 + pkts_num_fill_min: 0 + pkts_num_trig_egr_drp: 67965 cell_size: 96 - ecn_4: - dscp: 0 + wm_q_shared_lossy: + dscp: 1 ecn: 1 - num_of_pkts: 2047 - limit: 182320 - min_limit: 0 + queue: 1 + pkts_num_fill_min: 0 + pkts_num_trig_egr_drp: 67965 cell_size: 96 - lossy_queue: - dscp: 8 - ecn: 1 - pg: 1 wrr: ecn: 1 q0_num_of_pkts: 600 @@ -200,33 +119,93 @@ qos_params: q3_num_of_pkts: 500 q4_num_of_pkts: 500 limit: 80 - Arista-7050-QX-32S: - xoff_1: - dscp: 3 - ecn: 1 - pg: 3 + td2: + 40000_5m: pkts_num_leak_out: 48 - pkts_num_trig_pfc: 4898 - pkts_num_trig_ingr_drp: 5164 - xoff_2: - dscp: 4 - ecn: 1 - pg: 4 + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + cell_size: 208 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 5164 + cell_size: 208 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + pkts_num_fill_egr_min: 0 + cell_size: 208 + 40000_300m: pkts_num_leak_out: 48 - pkts_num_trig_pfc: 4898 - pkts_num_trig_ingr_drp: 5164 + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + cell_size: 208 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 5164 + cell_size: 208 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 4898 + pkts_num_trig_ingr_drp: 5164 + pkts_num_fill_egr_min: 0 + cell_size: 208 xon_1: dscp: 3 ecn: 1 pg: 3 - pkts_num_leak_out: 48 pkts_num_trig_pfc: 4898 pkts_num_dismiss_pfc: 12 xon_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_leak_out: 48 pkts_num_trig_pfc: 4898 pkts_num_dismiss_pfc: 12 ecn_1: @@ -261,7 +240,6 @@ qos_params: dscp: 8 ecn: 1 pg: 0 - pkts_num_leak_out: 48 pkts_num_trig_egr_drp: 31322 wrr: ecn: 1 @@ -273,7 +251,6 @@ qos_params: q5_num_of_pkts: 140 q6_num_of_pkts: 140 limit: 80 - pkts_num_leak_out: 48 wrr_chg: ecn: 1 q0_num_of_pkts: 80 @@ -284,14 +261,12 @@ qos_params: q5_num_of_pkts: 80 q6_num_of_pkts: 80 limit: 80 - pkts_num_leak_out: 48 lossy_weight: 8 lossless_weight: 30 wm_pg_shared_lossless: dscp: 3 ecn: 1 pg: 3 - pkts_num_leak_out: 48 pkts_num_fill_min: 6 pkts_num_trig_pfc: 4898 cell_size: 208 @@ -299,84 +274,141 @@ qos_params: dscp: 1 ecn: 1 pg: 0 - pkts_num_leak_out: 48 pkts_num_fill_min: 0 pkts_num_trig_egr_drp: 31322 cell_size: 208 - wm_pg_headroom: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 48 - pkts_num_trig_pfc: 4898 - pkts_num_trig_ingr_drp: 5164 - cell_size: 208 - wm_q_shared_lossless: - dscp: 3 - ecn: 1 - queue: 3 - pkts_num_leak_out: 48 - pkts_num_fill_min: 0 - pkts_num_trig_ingr_drp: 5164 - cell_size: 208 wm_q_shared_lossy: dscp: 1 ecn: 1 queue: 1 - pkts_num_leak_out: 48 pkts_num_fill_min: 8 pkts_num_trig_egr_drp: 31322 cell_size: 208 - wm_buf_pool_lossless: - dscp: 3 - ecn: 1 - pg: 3 - queue: 3 - pkts_num_leak_out: 48 - pkts_num_fill_ingr_min: 6 - pkts_num_trig_pfc: 4898 - pkts_num_trig_ingr_drp: 5164 - pkts_num_fill_egr_min: 0 - cell_size: 208 wm_buf_pool_lossy: dscp: 8 ecn: 1 pg: 0 queue: 0 - pkts_num_leak_out: 48 pkts_num_fill_ingr_min: 0 pkts_num_trig_egr_drp: 31322 pkts_num_fill_egr_min: 8 cell_size: 208 - Force10-S6000: - xoff_1: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 48 - pkts_num_trig_pfc: 4898 - pkts_num_trig_ingr_drp: 5164 - xoff_2: - dscp: 4 - ecn: 1 - pg: 4 - pkts_num_leak_out: 48 - pkts_num_trig_pfc: 4898 - pkts_num_trig_ingr_drp: 5164 + th: + 40000_300m: + pkts_num_leak_out: 19 + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7063 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7063 + hdrm_pool_size: + dscps: [3, 4] + ecn: 1 + pgs: [3, 4] + src_port_ids: [25, 26, 27, 40, 41] + dst_port_id: 24 + pgs_num: 10 + pkts_num_trig_pfc: 1194 + pkts_num_hdrm_full: 520 + pkts_num_hdrm_partial: 361 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7063 + cell_size: 208 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_fill_min: 8 + pkts_num_trig_ingr_drp: 7063 + cell_size: 208 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7063 + pkts_num_fill_egr_min: 8 + cell_size: 208 + 100000_300m: + pkts_num_leak_out: 36 + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7835 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7835 + hdrm_pool_size: + dscps: [3, 4] + ecn: 1 + pgs: [3, 4] + src_port_ids: [17, 18] + dst_port_id: 16 + pgs_num: 4 + pkts_num_trig_pfc: 2620 + pkts_num_hdrm_full: 1292 + pkts_num_hdrm_partial: 1165 + wm_pg_shared_lossless: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_fill_min: 6 + pkts_num_trig_pfc: 6542 + cell_size: 208 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7835 + cell_size: 208 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_fill_min: 8 + pkts_num_trig_ingr_drp: 7835 + cell_size: 208 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 6542 + pkts_num_trig_ingr_drp: 7835 + pkts_num_fill_egr_min: 8 + cell_size: 208 xon_1: dscp: 3 ecn: 1 pg: 3 - pkts_num_leak_out: 48 - pkts_num_trig_pfc: 4898 - pkts_num_dismiss_pfc: 12 + pkts_num_trig_pfc: 6542 + pkts_num_dismiss_pfc: 11 xon_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_leak_out: 48 - pkts_num_trig_pfc: 4898 - pkts_num_dismiss_pfc: 12 + pkts_num_trig_pfc: 6542 + pkts_num_dismiss_pfc: 11 ecn_1: dscp: 8 ecn: 0 @@ -409,8 +441,7 @@ qos_params: dscp: 8 ecn: 1 pg: 0 - pkts_num_leak_out: 48 - pkts_num_trig_egr_drp: 31322 + pkts_num_trig_egr_drp: 9887 wrr: ecn: 1 q0_num_of_pkts: 140 @@ -421,7 +452,6 @@ qos_params: q5_num_of_pkts: 140 q6_num_of_pkts: 140 limit: 80 - pkts_num_leak_out: 48 wrr_chg: ecn: 1 q0_num_of_pkts: 80 @@ -432,99 +462,147 @@ qos_params: q5_num_of_pkts: 80 q6_num_of_pkts: 80 limit: 80 - pkts_num_leak_out: 48 lossy_weight: 8 lossless_weight: 30 wm_pg_shared_lossless: dscp: 3 ecn: 1 pg: 3 - pkts_num_leak_out: 48 pkts_num_fill_min: 6 - pkts_num_trig_pfc: 4898 + pkts_num_trig_pfc: 6542 cell_size: 208 wm_pg_shared_lossy: - dscp: 1 + dscp: 8 ecn: 1 pg: 0 - pkts_num_leak_out: 48 - pkts_num_fill_min: 0 - pkts_num_trig_egr_drp: 31322 - cell_size: 208 - wm_pg_headroom: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 48 - pkts_num_trig_pfc: 4898 - pkts_num_trig_ingr_drp: 5164 - cell_size: 208 - wm_q_shared_lossless: - dscp: 3 - ecn: 1 - queue: 3 - pkts_num_leak_out: 48 pkts_num_fill_min: 0 - pkts_num_trig_ingr_drp: 5164 + pkts_num_trig_egr_drp: 9887 cell_size: 208 wm_q_shared_lossy: - dscp: 1 + dscp: 8 ecn: 1 - queue: 1 - pkts_num_leak_out: 48 + queue: 0 pkts_num_fill_min: 8 - pkts_num_trig_egr_drp: 31322 - cell_size: 208 - wm_buf_pool_lossless: - dscp: 3 - ecn: 1 - pg: 3 - queue: 3 - pkts_num_leak_out: 48 - pkts_num_fill_ingr_min: 6 - pkts_num_trig_pfc: 4898 - pkts_num_trig_ingr_drp: 5164 - pkts_num_fill_egr_min: 0 + pkts_num_trig_egr_drp: 9887 cell_size: 208 wm_buf_pool_lossy: dscp: 8 ecn: 1 pg: 0 queue: 0 - pkts_num_leak_out: 48 pkts_num_fill_ingr_min: 0 - pkts_num_trig_egr_drp: 31322 + pkts_num_trig_egr_drp: 9887 pkts_num_fill_egr_min: 8 cell_size: 208 - Force10-S6100: - xoff_1: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 19 - pkts_num_trig_pfc: 6542 - pkts_num_trig_ingr_drp: 7063 - xoff_2: - dscp: 4 - ecn: 1 - pg: 4 - pkts_num_leak_out: 19 - pkts_num_trig_pfc: 6542 - pkts_num_trig_ingr_drp: 7063 + th2: + 40000_300m: + pkts_num_leak_out: 0 + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 4978 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 4978 + hdrm_pool_size: + dscps: [3, 4] + ecn: 1 + pgs: [3, 4] + src_port_ids: [6, 7, 8, 9, 10, 38, 39, 40, 41, 42] + dst_port_id: 32 + pgs_num: 19 + pkts_num_trig_pfc: 1490 + pkts_num_hdrm_full: 520 + pkts_num_hdrm_partial: 47 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 4978 + cell_size: 208 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 4978 + cell_size: 208 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 4978 + pkts_num_fill_egr_min: 16 + cell_size: 208 + 50000_300m: + pkts_num_leak_out: 0 + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 5140 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 5140 + hdrm_pool_size: + dscps: [3, 4] + ecn: 1 + pgs: [3, 4] + src_port_ids: [1, 2, 3, 4, 5, 6, 7] + dst_port_id: 0 + pgs_num: 14 + pkts_num_trig_pfc: 1826 + pkts_num_hdrm_full: 682 + pkts_num_hdrm_partial: 542 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 5140 + cell_size: 208 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 5140 + cell_size: 208 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 4457 + pkts_num_trig_ingr_drp: 5140 + pkts_num_fill_egr_min: 16 + cell_size: 208 xon_1: dscp: 3 ecn: 1 pg: 3 - pkts_num_leak_out: 19 - pkts_num_trig_pfc: 6542 - pkts_num_dismiss_pfc: 11 + pkts_num_trig_pfc: 4457 + pkts_num_dismiss_pfc: 12 xon_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_leak_out: 19 - pkts_num_trig_pfc: 6542 - pkts_num_dismiss_pfc: 11 + pkts_num_trig_pfc: 4457 + pkts_num_dismiss_pfc: 12 ecn_1: dscp: 8 ecn: 0 @@ -557,8 +635,7 @@ qos_params: dscp: 8 ecn: 1 pg: 0 - pkts_num_leak_out: 19 - pkts_num_trig_egr_drp: 9887 + pkts_num_trig_egr_drp: 10692 wrr: ecn: 1 q0_num_of_pkts: 140 @@ -569,7 +646,6 @@ qos_params: q5_num_of_pkts: 140 q6_num_of_pkts: 140 limit: 80 - pkts_num_leak_out: 19 wrr_chg: ecn: 1 q0_num_of_pkts: 80 @@ -580,713 +656,34 @@ qos_params: q5_num_of_pkts: 80 q6_num_of_pkts: 80 limit: 80 - pkts_num_leak_out: 19 lossy_weight: 8 lossless_weight: 30 - hdrm_pool_size: - dscps: [3, 4] - ecn: 1 - pgs: [3, 4] - src_port_ids: [25, 26, 27, 40, 41] - dst_port_id: 24 - pgs_num: 10 - pkts_num_leak_out: 19 - pkts_num_trig_pfc: 1194 - pkts_num_hdrm_full: 520 - pkts_num_hdrm_partial: 361 wm_pg_shared_lossless: dscp: 3 ecn: 1 pg: 3 - pkts_num_leak_out: 19 pkts_num_fill_min: 6 - pkts_num_trig_pfc: 6542 + pkts_num_trig_pfc: 4457 cell_size: 208 wm_pg_shared_lossy: dscp: 8 ecn: 1 pg: 0 - pkts_num_leak_out: 19 pkts_num_fill_min: 0 - pkts_num_trig_egr_drp: 9887 + pkts_num_trig_egr_drp: 10692 cell_size: 208 - wm_pg_headroom: - dscp: 3 + wm_q_shared_lossy: + dscp: 8 ecn: 1 - pg: 3 - pkts_num_leak_out: 19 - pkts_num_trig_pfc: 6542 - pkts_num_trig_ingr_drp: 7063 - cell_size: 208 - wm_q_shared_lossless: - dscp: 3 - ecn: 1 - queue: 3 - pkts_num_leak_out: 19 - pkts_num_fill_min: 8 - pkts_num_trig_ingr_drp: 7063 - cell_size: 208 - wm_q_shared_lossy: - dscp: 8 - ecn: 1 - queue: 0 - pkts_num_leak_out: 19 - pkts_num_fill_min: 8 - pkts_num_trig_egr_drp: 9887 - cell_size: 208 - wm_buf_pool_lossless: - dscp: 3 - ecn: 1 - pg: 3 - queue: 3 - pkts_num_leak_out: 19 - pkts_num_fill_ingr_min: 6 - pkts_num_trig_pfc: 6542 - pkts_num_trig_ingr_drp: 7063 - pkts_num_fill_egr_min: 8 - cell_size: 208 - wm_buf_pool_lossy: - dscp: 8 - ecn: 1 - pg: 0 - queue: 0 - pkts_num_leak_out: 19 - pkts_num_fill_ingr_min: 0 - pkts_num_trig_egr_drp: 9887 - pkts_num_fill_egr_min: 8 - cell_size: 208 - Arista-7060CX-32S-C32: - xoff_1: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 36 - pkts_num_trig_pfc: 6542 - pkts_num_trig_ingr_drp: 7835 - xoff_2: - dscp: 4 - ecn: 1 - pg: 4 - pkts_num_leak_out: 36 - pkts_num_trig_pfc: 6542 - pkts_num_trig_ingr_drp: 7835 - xon_1: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 36 - pkts_num_trig_pfc: 6542 - pkts_num_dismiss_pfc: 11 - xon_2: - dscp: 4 - ecn: 1 - pg: 4 - pkts_num_leak_out: 36 - pkts_num_trig_pfc: 6542 - pkts_num_dismiss_pfc: 11 - ecn_1: - dscp: 8 - ecn: 0 - num_of_pkts: 5000 - limit: 182000 - min_limit: 180000 - cell_size: 208 - ecn_2: - dscp: 8 - ecn: 1 - num_of_pkts: 2047 - limit: 182320 - min_limit: 0 - cell_size: 208 - ecn_3: - dscp: 0 - ecn: 0 - num_of_pkts: 5000 - limit: 182000 - min_limit: 180000 - cell_size: 208 - ecn_4: - dscp: 0 - ecn: 1 - num_of_pkts: 2047 - limit: 182320 - min_limit: 0 - cell_size: 208 - lossy_queue_1: - dscp: 8 - ecn: 1 - pg: 0 - pkts_num_leak_out: 36 - pkts_num_trig_egr_drp: 9887 - wrr: - ecn: 1 - q0_num_of_pkts: 140 - q1_num_of_pkts: 140 - q2_num_of_pkts: 140 - q3_num_of_pkts: 150 - q4_num_of_pkts: 150 - q5_num_of_pkts: 140 - q6_num_of_pkts: 140 - limit: 80 - pkts_num_leak_out: 36 - wrr_chg: - ecn: 1 - q0_num_of_pkts: 80 - q1_num_of_pkts: 80 - q2_num_of_pkts: 80 - q3_num_of_pkts: 300 - q4_num_of_pkts: 300 - q5_num_of_pkts: 80 - q6_num_of_pkts: 80 - limit: 80 - pkts_num_leak_out: 36 - lossy_weight: 8 - lossless_weight: 30 - hdrm_pool_size: - dscps: [3, 4] - ecn: 1 - pgs: [3, 4] - src_port_ids: [17, 18] - dst_port_id: 16 - pgs_num: 4 - pkts_num_leak_out: 36 - pkts_num_trig_pfc: 2620 - pkts_num_hdrm_full: 1292 - pkts_num_hdrm_partial: 1165 - wm_pg_shared_lossless: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 36 - pkts_num_fill_min: 6 - pkts_num_trig_pfc: 6542 - cell_size: 208 - wm_pg_shared_lossy: - dscp: 8 - ecn: 1 - pg: 0 - pkts_num_leak_out: 36 - pkts_num_fill_min: 0 - pkts_num_trig_egr_drp: 9887 - cell_size: 208 - wm_pg_headroom: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 36 - pkts_num_trig_pfc: 6542 - pkts_num_trig_ingr_drp: 7835 - cell_size: 208 - wm_q_shared_lossless: - dscp: 3 - ecn: 1 - queue: 3 - pkts_num_leak_out: 36 - pkts_num_fill_min: 8 - pkts_num_trig_ingr_drp: 7835 - cell_size: 208 - wm_q_shared_lossy: - dscp: 8 - ecn: 1 - queue: 0 - pkts_num_leak_out: 36 - pkts_num_fill_min: 8 - pkts_num_trig_egr_drp: 9887 - cell_size: 208 - wm_buf_pool_lossless: - dscp: 3 - ecn: 1 - pg: 3 - queue: 3 - pkts_num_leak_out: 36 - pkts_num_fill_ingr_min: 6 - pkts_num_trig_pfc: 6542 - pkts_num_trig_ingr_drp: 7835 - pkts_num_fill_egr_min: 8 - cell_size: 208 - wm_buf_pool_lossy: - dscp: 8 - ecn: 1 - pg: 0 - queue: 0 - pkts_num_leak_out: 36 - pkts_num_fill_ingr_min: 0 - pkts_num_trig_egr_drp: 9887 - pkts_num_fill_egr_min: 8 - cell_size: 208 - Celestica-DX010-C32: - xoff_1: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 36 - pkts_num_trig_pfc: 6542 - pkts_num_trig_ingr_drp: 7835 - xoff_2: - dscp: 4 - ecn: 1 - pg: 4 - pkts_num_leak_out: 36 - pkts_num_trig_pfc: 6542 - pkts_num_trig_ingr_drp: 7835 - xon_1: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 36 - pkts_num_trig_pfc: 6542 - pkts_num_dismiss_pfc: 11 - xon_2: - dscp: 4 - ecn: 1 - pg: 4 - pkts_num_leak_out: 36 - pkts_num_trig_pfc: 6542 - pkts_num_dismiss_pfc: 11 - ecn_1: - dscp: 8 - ecn: 0 - num_of_pkts: 5000 - limit: 182000 - min_limit: 180000 - cell_size: 208 - ecn_2: - dscp: 8 - ecn: 1 - num_of_pkts: 2047 - limit: 182320 - min_limit: 0 - cell_size: 208 - ecn_3: - dscp: 0 - ecn: 0 - num_of_pkts: 5000 - limit: 182000 - min_limit: 180000 - cell_size: 208 - ecn_4: - dscp: 0 - ecn: 1 - num_of_pkts: 2047 - limit: 182320 - min_limit: 0 - cell_size: 208 - lossy_queue_1: - dscp: 8 - ecn: 1 - pg: 0 - pkts_num_leak_out: 36 - pkts_num_trig_egr_drp: 9887 - wrr: - ecn: 1 - q0_num_of_pkts: 140 - q1_num_of_pkts: 140 - q2_num_of_pkts: 140 - q3_num_of_pkts: 150 - q4_num_of_pkts: 150 - q5_num_of_pkts: 140 - q6_num_of_pkts: 140 - limit: 80 - pkts_num_leak_out: 36 - wrr_chg: - ecn: 1 - q0_num_of_pkts: 80 - q1_num_of_pkts: 80 - q2_num_of_pkts: 80 - q3_num_of_pkts: 300 - q4_num_of_pkts: 300 - q5_num_of_pkts: 80 - q6_num_of_pkts: 80 - limit: 80 - pkts_num_leak_out: 36 - lossy_weight: 8 - lossless_weight: 30 - hdrm_pool_size: - dscps: [3, 4] - ecn: 1 - pgs: [3, 4] - src_port_ids: [17, 18] - dst_port_id: 16 - pgs_num: 4 - pkts_num_leak_out: 36 - pkts_num_trig_pfc: 2620 - pkts_num_hdrm_full: 1292 - pkts_num_hdrm_partial: 1165 - wm_pg_shared_lossless: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 36 - pkts_num_fill_min: 6 - pkts_num_trig_pfc: 6542 - cell_size: 208 - wm_pg_shared_lossy: - dscp: 8 - ecn: 1 - pg: 0 - pkts_num_leak_out: 36 - pkts_num_fill_min: 0 - pkts_num_trig_egr_drp: 9887 - cell_size: 208 - wm_pg_headroom: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 36 - pkts_num_trig_pfc: 6542 - pkts_num_trig_ingr_drp: 7835 - cell_size: 208 - wm_q_shared_lossless: - dscp: 3 - ecn: 1 - queue: 3 - pkts_num_leak_out: 36 - pkts_num_fill_min: 8 - pkts_num_trig_ingr_drp: 7835 - cell_size: 208 - wm_q_shared_lossy: - dscp: 8 - ecn: 1 - queue: 0 - pkts_num_leak_out: 36 - pkts_num_fill_min: 8 - pkts_num_trig_egr_drp: 9887 - cell_size: 208 - wm_buf_pool_lossless: - dscp: 3 - ecn: 1 - pg: 3 - queue: 3 - pkts_num_leak_out: 36 - pkts_num_fill_ingr_min: 6 - pkts_num_trig_pfc: 6542 - pkts_num_trig_ingr_drp: 7835 - pkts_num_fill_egr_min: 8 - cell_size: 208 - wm_buf_pool_lossy: - dscp: 8 - ecn: 1 - pg: 0 - queue: 0 - pkts_num_leak_out: 36 - pkts_num_fill_ingr_min: 0 - pkts_num_trig_egr_drp: 9887 - pkts_num_fill_egr_min: 8 - cell_size: 208 - Arista-7260CX3-Q64: - xoff_1: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 4978 - xoff_2: - dscp: 4 - ecn: 1 - pg: 4 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 4978 - xon_1: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 4457 - pkts_num_dismiss_pfc: 12 - xon_2: - dscp: 4 - ecn: 1 - pg: 4 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 4457 - pkts_num_dismiss_pfc: 12 - ecn_1: - dscp: 8 - ecn: 0 - num_of_pkts: 5000 - limit: 182000 - min_limit: 180000 - cell_size: 208 - ecn_2: - dscp: 8 - ecn: 1 - num_of_pkts: 2047 - limit: 182320 - min_limit: 0 - cell_size: 208 - ecn_3: - dscp: 0 - ecn: 0 - num_of_pkts: 5000 - limit: 182000 - min_limit: 180000 - cell_size: 208 - ecn_4: - dscp: 0 - ecn: 1 - num_of_pkts: 2047 - limit: 182320 - min_limit: 0 - cell_size: 208 - lossy_queue_1: - dscp: 8 - ecn: 1 - pg: 0 - pkts_num_leak_out: 0 - pkts_num_trig_egr_drp: 10692 - wrr: - ecn: 1 - q0_num_of_pkts: 140 - q1_num_of_pkts: 140 - q2_num_of_pkts: 140 - q3_num_of_pkts: 150 - q4_num_of_pkts: 150 - q5_num_of_pkts: 140 - q6_num_of_pkts: 140 - limit: 80 - pkts_num_leak_out: 0 - wrr_chg: - ecn: 1 - q0_num_of_pkts: 80 - q1_num_of_pkts: 80 - q2_num_of_pkts: 80 - q3_num_of_pkts: 300 - q4_num_of_pkts: 300 - q5_num_of_pkts: 80 - q6_num_of_pkts: 80 - limit: 80 - pkts_num_leak_out: 0 - lossy_weight: 8 - lossless_weight: 30 - hdrm_pool_size: - dscps: [3, 4] - ecn: 1 - pgs: [3, 4] - src_port_ids: [6, 7, 8, 9, 10, 38, 39, 40, 41, 42] - dst_port_id: 32 - pgs_num: 19 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 1490 - pkts_num_hdrm_full: 520 - pkts_num_hdrm_partial: 47 - wm_pg_shared_lossless: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 0 - pkts_num_fill_min: 6 - pkts_num_trig_pfc: 4457 - cell_size: 208 - wm_pg_shared_lossy: - dscp: 8 - ecn: 1 - pg: 0 - pkts_num_leak_out: 0 - pkts_num_fill_min: 0 - pkts_num_trig_egr_drp: 10692 - cell_size: 208 - wm_pg_headroom: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 4978 - cell_size: 208 - wm_q_shared_lossless: - dscp: 3 - ecn: 1 - queue: 3 - pkts_num_leak_out: 0 - pkts_num_fill_min: 0 - pkts_num_trig_ingr_drp: 4978 - cell_size: 208 - wm_q_shared_lossy: - dscp: 8 - ecn: 1 - queue: 0 - pkts_num_leak_out: 0 - pkts_num_fill_min: 8 - pkts_num_trig_egr_drp: 10692 - cell_size: 208 - wm_buf_pool_lossless: - dscp: 3 - ecn: 1 - pg: 3 - queue: 3 - pkts_num_leak_out: 0 - pkts_num_fill_ingr_min: 6 - pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 4978 - pkts_num_fill_egr_min: 16 - cell_size: 208 - wm_buf_pool_lossy: - dscp: 8 - ecn: 1 - pg: 0 - queue: 0 - pkts_num_leak_out: 0 - pkts_num_fill_ingr_min: 0 - pkts_num_trig_egr_drp: 10692 - pkts_num_fill_egr_min: 16 - cell_size: 208 - Arista-7260CX3-D108C8: - xoff_1: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 5140 - xoff_2: - dscp: 4 - ecn: 1 - pg: 4 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 5140 - xon_1: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 4457 - pkts_num_dismiss_pfc: 12 - xon_2: - dscp: 4 - ecn: 1 - pg: 4 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 4457 - pkts_num_dismiss_pfc: 12 - ecn_1: - dscp: 8 - ecn: 0 - num_of_pkts: 5000 - limit: 182000 - min_limit: 180000 - cell_size: 208 - ecn_2: - dscp: 8 - ecn: 1 - num_of_pkts: 2047 - limit: 182320 - min_limit: 0 - cell_size: 208 - ecn_3: - dscp: 0 - ecn: 0 - num_of_pkts: 5000 - limit: 182000 - min_limit: 180000 - cell_size: 208 - ecn_4: - dscp: 0 - ecn: 1 - num_of_pkts: 2047 - limit: 182320 - min_limit: 0 - cell_size: 208 - lossy_queue_1: - dscp: 8 - ecn: 1 - pg: 0 - pkts_num_leak_out: 0 - pkts_num_trig_egr_drp: 10692 - wrr: - ecn: 1 - q0_num_of_pkts: 140 - q1_num_of_pkts: 140 - q2_num_of_pkts: 140 - q3_num_of_pkts: 150 - q4_num_of_pkts: 150 - q5_num_of_pkts: 140 - q6_num_of_pkts: 140 - limit: 80 - pkts_num_leak_out: 0 - wrr_chg: - ecn: 1 - q0_num_of_pkts: 80 - q1_num_of_pkts: 80 - q2_num_of_pkts: 80 - q3_num_of_pkts: 300 - q4_num_of_pkts: 300 - q5_num_of_pkts: 80 - q6_num_of_pkts: 80 - limit: 80 - pkts_num_leak_out: 0 - lossy_weight: 8 - lossless_weight: 30 - hdrm_pool_size: - dscps: [3, 4] - ecn: 1 - pgs: [3, 4] - src_port_ids: [1, 2, 3, 4, 5, 6, 7] - dst_port_id: 0 - pgs_num: 14 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 1826 - pkts_num_hdrm_full: 682 - pkts_num_hdrm_partial: 542 - wm_pg_shared_lossless: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 0 - pkts_num_fill_min: 6 - pkts_num_trig_pfc: 4457 - cell_size: 208 - wm_pg_shared_lossy: - dscp: 8 - ecn: 1 - pg: 0 - pkts_num_leak_out: 0 - pkts_num_fill_min: 0 - pkts_num_trig_egr_drp: 10692 - cell_size: 208 - wm_pg_headroom: - dscp: 3 - ecn: 1 - pg: 3 - pkts_num_leak_out: 0 - pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 5140 - cell_size: 208 - wm_q_shared_lossless: - dscp: 3 - ecn: 1 - queue: 3 - pkts_num_leak_out: 0 - pkts_num_fill_min: 0 - pkts_num_trig_ingr_drp: 5140 - cell_size: 208 - wm_q_shared_lossy: - dscp: 8 - ecn: 1 - queue: 0 - pkts_num_leak_out: 0 - pkts_num_fill_min: 8 - pkts_num_trig_egr_drp: 10692 - cell_size: 208 - wm_buf_pool_lossless: - dscp: 3 - ecn: 1 - pg: 3 - queue: 3 - pkts_num_leak_out: 0 - pkts_num_fill_ingr_min: 6 - pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 5140 - pkts_num_fill_egr_min: 16 + queue: 0 + pkts_num_fill_min: 8 + pkts_num_trig_egr_drp: 10692 cell_size: 208 wm_buf_pool_lossy: dscp: 8 ecn: 1 pg: 0 queue: 0 - pkts_num_leak_out: 0 pkts_num_fill_ingr_min: 0 pkts_num_trig_egr_drp: 10692 pkts_num_fill_egr_min: 16 From 679dc849b88df2643e903e54be755661d01e0101 Mon Sep 17 00:00:00 2001 From: Stephen Sun <5379172+stephenxs@users.noreply.github.com> Date: Wed, 27 May 2020 02:03:40 +0800 Subject: [PATCH 199/218] [QoS] Support designating the packet size when testing water mark of shared buffer (#1663) * [qos] Support designating the packet size when testing water mark of shared buffer --- .../test/files/saitests/sai_qos_tests.py | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/ansible/roles/test/files/saitests/sai_qos_tests.py b/ansible/roles/test/files/saitests/sai_qos_tests.py index 55e8edcd617..f6dc7872ad3 100644 --- a/ansible/roles/test/files/saitests/sai_qos_tests.py +++ b/ansible/roles/test/files/saitests/sai_qos_tests.py @@ -1394,12 +1394,17 @@ def runTest(self): pkts_num_fill_min = int(self.test_params['pkts_num_fill_min']) pkts_num_fill_shared = int(self.test_params['pkts_num_fill_shared']) cell_size = int(self.test_params['cell_size']) + if 'packet_size' in self.test_params.keys(): + default_packet_length = int(self.test_params['packet_size']) + else: + default_packet_length = 64 + + cell_occupancy = (default_packet_length + cell_size - 1) / cell_size # Prepare TCP packet data tos = dscp << 2 tos |= ecn ttl = 64 - default_packet_length = 64 pkt = simple_tcp_packet(pktlen=default_packet_length, eth_dst=router_mac if router_mac != '' else dst_port_mac, eth_src=src_port_mac, @@ -1438,13 +1443,16 @@ def runTest(self): # first round sends only 1 packet expected_wm = 0 total_shared = pkts_num_fill_shared - pkts_num_fill_min - pkts_inc = total_shared >> 2 + pkts_inc = (total_shared / cell_occupancy) >> 2 pkts_num = 1 + margin - while (expected_wm < total_shared): - expected_wm += pkts_num + fragment = 0 + while (expected_wm < total_shared - fragment): + expected_wm += pkts_num * cell_occupancy if (expected_wm > total_shared): - pkts_num -= (expected_wm - total_shared) - expected_wm = total_shared + diff = (expected_wm - total_shared + cell_occupancy - 1) / cell_occupancy + pkts_num -= diff + expected_wm -= diff * cell_occupancy + fragment = total_shared - expected_wm print >> sys.stderr, "pkts num to send: %d, total pkts: %d, pg shared: %d" % (pkts_num, expected_wm, total_shared) send_packet(self, src_port_id, pkt, pkts_num) @@ -1460,10 +1468,11 @@ def runTest(self): send_packet(self, src_port_id, pkt, pkts_num) time.sleep(8) q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[src_port_id]) - print >> sys.stderr, "exceeded pkts num sent: %d, expected watermark: %d, actual value: %d" % (pkts_num, (expected_wm * cell_size), pg_shared_wm_res[pg]) - assert(expected_wm == total_shared) + print >> sys.stderr, "exceeded pkts num sent: %d, expected watermark: %d, actual value: %d" % (pkts_num, ((expected_wm + cell_occupancy) * cell_size), pg_shared_wm_res[pg]) +# assert(expected_wm == total_shared) + assert(fragment < cell_occupancy) assert(expected_wm * cell_size <= pg_shared_wm_res[pg]) - assert(pg_shared_wm_res[pg] <= (expected_wm + margin) * cell_size) + assert(pg_shared_wm_res[pg] <= (expected_wm + margin + cell_occupancy) * cell_size) finally: sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) From 0daf720ca5a9b4ab56ef5949260bc8e4e6efceb9 Mon Sep 17 00:00:00 2001 From: Stephen Sun <5379172+stephenxs@users.noreply.github.com> Date: Sat, 30 May 2020 00:43:02 +0800 Subject: [PATCH 200/218] [Mellanox] Provide qos parameters for packet size and 40G 40m (#1709) --- ansible/vars/qos.yml | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/ansible/vars/qos.yml b/ansible/vars/qos.yml index b1ab9c39d15..9777c65c158 100644 --- a/ansible/vars/qos.yml +++ b/ansible/vars/qos.yml @@ -39,6 +39,7 @@ qos_params: pkts_num_trig_pfc: 22038 pkts_num_trig_ingr_drp: 22115 cell_size: 96 + packet_size: 300 wm_q_shared_lossless: dscp: 3 ecn: 1 @@ -46,6 +47,35 @@ qos_params: pkts_num_fill_min: 0 pkts_num_trig_ingr_drp: 22115 cell_size: 96 + 40000_40m: + pkts_num_leak_out: 0 + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 22038 + pkts_num_trig_ingr_drp: 22190 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 22038 + pkts_num_trig_ingr_drp: 22190 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 22038 + pkts_num_trig_ingr_drp: 22190 + cell_size: 96 + packet_size: 300 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 22190 + cell_size: 96 xon_1: dscp: 3 ecn: 1 @@ -105,6 +135,7 @@ qos_params: pkts_num_fill_min: 0 pkts_num_trig_egr_drp: 67965 cell_size: 96 + packet_size: 300 wm_q_shared_lossy: dscp: 1 ecn: 1 From a9d7c2563f70d9bd1100acc390b6728b6862beed Mon Sep 17 00:00:00 2001 From: Neetha John Date: Fri, 29 May 2020 14:51:27 -0700 Subject: [PATCH 201/218] Include pkt size parameter for PG SharedWM tests (#1711) Signed-off-by: Neetha John --- ansible/roles/test/tasks/qos_sai.yml | 2 ++ ansible/vars/qos.yml | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/ansible/roles/test/tasks/qos_sai.yml b/ansible/roles/test/tasks/qos_sai.yml index 7e550af196e..09c4eb212a5 100644 --- a/ansible/roles/test/tasks/qos_sai.yml +++ b/ansible/roles/test/tasks/qos_sai.yml @@ -316,6 +316,7 @@ - pkts_num_leak_out='{{qp_sc.pkts_num_leak_out}}' - pkts_num_fill_min='{{qp.wm_pg_shared_lossless.pkts_num_fill_min}}' - pkts_num_fill_shared='{{qp.wm_pg_shared_lossless.pkts_num_trig_pfc}}' + - packet_size='{{qp.wm_pg_shared_lossless.packet_size}}' - cell_size='{{qp.wm_pg_shared_lossless.cell_size}}' when: minigraph_hwsku is defined and (minigraph_hwsku not in ['Arista-7260CX3-Q64', 'Arista-7260CX3-D108C8']) @@ -345,6 +346,7 @@ - pkts_num_leak_out='{{qp_sc.pkts_num_leak_out}}' - pkts_num_fill_min='{{qp.wm_pg_shared_lossy.pkts_num_fill_min}}' - pkts_num_fill_shared='{{qp.wm_pg_shared_lossy.pkts_num_trig_egr_drp|int - 1}}' + - packet_size='{{qp.wm_pg_shared_lossy.packet_size}}' - cell_size='{{qp.wm_pg_shared_lossy.cell_size}}' when: minigraph_hwsku is defined and minigraph_hwsku not in ['Arista-7260CX3-Q64', 'Arista-7260CX3-D108C8'] diff --git a/ansible/vars/qos.yml b/ansible/vars/qos.yml index 9777c65c158..32362506fcd 100644 --- a/ansible/vars/qos.yml +++ b/ansible/vars/qos.yml @@ -128,6 +128,7 @@ qos_params: pkts_num_fill_min: 6 pkts_num_trig_pfc: 22038 cell_size: 96 + packet_size: 300 wm_pg_shared_lossy: dscp: 1 ecn: 1 @@ -300,6 +301,7 @@ qos_params: pg: 3 pkts_num_fill_min: 6 pkts_num_trig_pfc: 4898 + packet_size: 64 cell_size: 208 wm_pg_shared_lossy: dscp: 1 @@ -307,6 +309,7 @@ qos_params: pg: 0 pkts_num_fill_min: 0 pkts_num_trig_egr_drp: 31322 + packet_size: 64 cell_size: 208 wm_q_shared_lossy: dscp: 1 @@ -501,6 +504,7 @@ qos_params: pg: 3 pkts_num_fill_min: 6 pkts_num_trig_pfc: 6542 + packet_size: 64 cell_size: 208 wm_pg_shared_lossy: dscp: 8 @@ -508,6 +512,7 @@ qos_params: pg: 0 pkts_num_fill_min: 0 pkts_num_trig_egr_drp: 9887 + packet_size: 64 cell_size: 208 wm_q_shared_lossy: dscp: 8 @@ -695,6 +700,7 @@ qos_params: pg: 3 pkts_num_fill_min: 6 pkts_num_trig_pfc: 4457 + packet_size: 64 cell_size: 208 wm_pg_shared_lossy: dscp: 8 @@ -702,6 +708,7 @@ qos_params: pg: 0 pkts_num_fill_min: 0 pkts_num_trig_egr_drp: 10692 + packet_size: 64 cell_size: 208 wm_q_shared_lossy: dscp: 8 From c8eb3233c019f336afec280fd2177295055e47b2 Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Wed, 10 Jun 2020 11:40:21 -0700 Subject: [PATCH 202/218] [install image] removing config_db.json with -f option (#1753) If a DUT has already have the target image installed, then there will be no /host/old_config/config_db.json afterward installing. Add -f option to ignore file not exists error. Signed-off-by: Ying Xie --- ansible/library/reduce_and_add_sonic_images.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/library/reduce_and_add_sonic_images.py b/ansible/library/reduce_and_add_sonic_images.py index 5e2d134f266..f2bb7e5de6c 100644 --- a/ansible/library/reduce_and_add_sonic_images.py +++ b/ansible/library/reduce_and_add_sonic_images.py @@ -79,7 +79,7 @@ def install_new_sonic_image(module, new_image_url): # to force next image to load minigraph. if path.exists("/host/old_config/minigraph.xml"): exec_command(module, - cmd="rm /host/old_config/config_db.json", + cmd="rm -f /host/old_config/config_db.json", msg="Remove config_db.json in preference of minigraph.xml") def main(): From 6d804ef4f959c9980bfa0cf5663698871ccb3fc2 Mon Sep 17 00:00:00 2001 From: Volodymyr Samotiy Date: Tue, 16 Jun 2020 20:52:30 +0300 Subject: [PATCH 203/218] [pfc_wd] Detach pfc_gen.py from terminal when run in background on Mellanox fanout (#1764) Signed-off-by: Volodymyr Samotiy --- ansible/roles/test/templates/pfc_storm_mlnx.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/templates/pfc_storm_mlnx.j2 b/ansible/roles/test/templates/pfc_storm_mlnx.j2 index b7054a02c5c..89902595a0f 100644 --- a/ansible/roles/test/templates/pfc_storm_mlnx.j2 +++ b/ansible/roles/test/templates/pfc_storm_mlnx.j2 @@ -6,9 +6,9 @@ configure terminal docker exec {{ container_name }} /bin/bash cd /root/ {% if (pfc_asym is defined) and (pfc_asym == True) %} -{% if pfc_storm_defer_time is defined %} sleep {{pfc_storm_defer_time}} &&{% endif %} python {{pfc_gen_file}} -p {{pfc_queue_index}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("ernet 1/", "") | replace("/", "_")}} & +{% if pfc_storm_defer_time is defined %} sleep {{pfc_storm_defer_time}} &&{% endif %} nohup python {{pfc_gen_file}} -p {{pfc_queue_index}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("ernet 1/", "") | replace("/", "_")}} & {% else %} -{% if pfc_storm_defer_time is defined %} sleep {{pfc_storm_defer_time}} &&{% endif %} python {{pfc_gen_file}} -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("ernet 1/", "") | replace("/", "_")}} -r {{ansible_eth0_ipv4_addr}} & +{% if pfc_storm_defer_time is defined %} sleep {{pfc_storm_defer_time}} &&{% endif %} nohup python {{pfc_gen_file}} -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface | replace("ernet 1/", "") | replace("/", "_")}} -r {{ansible_eth0_ipv4_addr}} & {% endif %} exit From ad7e36ba85d859a4df0bcc3bf25fdcb6d3a21d8a Mon Sep 17 00:00:00 2001 From: abdosi <58047199+abdosi@users.noreply.github.com> Date: Wed, 24 Jun 2020 14:05:29 -0700 Subject: [PATCH 204/218] Fix Ported for 201811 for master (#1814) PR: https://github.com/Azure/sonic-mgmt/pull/1810/files --- ansible/roles/test/tasks/qos_sai.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ansible/roles/test/tasks/qos_sai.yml b/ansible/roles/test/tasks/qos_sai.yml index 09c4eb212a5..3b0258623a0 100644 --- a/ansible/roles/test/tasks/qos_sai.yml +++ b/ansible/roles/test/tasks/qos_sai.yml @@ -62,12 +62,12 @@ ansible_shell_type: docker ansible_python_interpreter: docker exec -i bgp python - - name: Add iptables rule to drop BGP SYN Packet from peer so that we do not ACK back - shell: "iptables -A INPUT -j DROP -p tcp --destination-port bgp" + - name: Add iptables rule to drop BGP SYN Packet from peer so that we do not ACK back. Add at top so existing rules don't have precedence over it. + shell: "iptables -I INPUT 1 -j DROP -p tcp --destination-port bgp" become: true - - name: Add ip6tables rule to drop BGP SYN Packet from peer so that we do not ACK back - shell: "ip6tables -A INPUT -j DROP -p tcp --destination-port bgp" + - name: Add ip6tables rule to drop BGP SYN Packet from peer so that we do not ACK back. Add at top so existing rules don't have precedence over it. + shell: "ip6tables -I INPUT 1 -j DROP -p tcp --destination-port bgp" become: true - meta: flush_handlers From bf6d8360ce7465224bd20015fe5749c580ac83be Mon Sep 17 00:00:00 2001 From: abdosi <58047199+abdosi@users.noreply.github.com> Date: Tue, 23 Jun 2020 10:37:18 -0700 Subject: [PATCH 205/218] After this PR https://github.com/Azure/sonic-utilities/pull/838 (#1803) there is check pfc_wd poll_time <= pfc_wd_detection/restoration_time. So make sure in testscript before setting poll interval stop pfc wd if enable by default because default detection/restoration time can be < poll time interval making script failure. --- ansible/roles/test/tasks/pfc_wd.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ansible/roles/test/tasks/pfc_wd.yml b/ansible/roles/test/tasks/pfc_wd.yml index f9cae0fb8e3..1da3c878c47 100644 --- a/ansible/roles/test/tasks/pfc_wd.yml +++ b/ansible/roles/test/tasks/pfc_wd.yml @@ -144,6 +144,10 @@ pfc_wd_restore_time_large: 3000 pfc_wd_poll_time: 400 + - name: Stop PFC watchdog if enable by default before setting poll interval. + shell: "pfcwd stop" + become: yes + - name: Set polling interval {{ pfc_wd_poll_time }}. shell: "pfcwd interval {{ pfc_wd_poll_time }}" become: yes From 5cb4aaf44fc15f0acba0c5ef30ea1e2b25f87a9c Mon Sep 17 00:00:00 2001 From: Joe LeVeque Date: Fri, 26 Jun 2020 12:06:00 -0700 Subject: [PATCH 206/218] [service_acl] Wait until SSH is stopped rather than waiting for start to timeout (#1820) With the recent changes to caclmgrd, this check was not consistent, and could potentially fail with the following error: ``` TASK [test : Ensure the SSH port on the DuT becomes closed to us] ******************************************************************************************************************** Friday 26 June 2020 01:17:14 +0000 (0:00:05.323) 0:01:29.829 *********** fatal: [sonic-dut-1]: FAILED! => {"msg": "The conditional check ''Timeout when waiting for search string OpenSSH' not in result.msg' failed. The error was: error while evaluating conditional ('Timeout when waiting for search string OpenSSH' not in result.msg): Unable to look up a name or access an attribute in template string ({% if 'Timeout when waiting for search string OpenSSH' not in result.msg %} True {% else %} False {% endif %}).\nMake sure your variable name does not contain invalid characters like '-': argument of type 'AnsibleUndefined' is not iterable"} ``` `result` could potentially be `AnsibleUndefined`. This changes the logic to match that of the new pytest, which is a more appropriate method of checking that we are no longer able to SSH to the device, and no longer relies on parsing an error message (see https://github.com/Azure/sonic-mgmt/blob/master/tests/cacl/test_control_plane_acl.py#L34). Also remove unnecessary sleep, which also aligns more with the pytest version. --- ansible/roles/test/tasks/service_acl.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ansible/roles/test/tasks/service_acl.yml b/ansible/roles/test/tasks/service_acl.yml index 28ef106c199..957b25e4e4e 100644 --- a/ansible/roles/test/tasks/service_acl.yml +++ b/ansible/roles/test/tasks/service_acl.yml @@ -21,20 +21,14 @@ become: true shell: "nohup /tmp/config_service_acls.sh < /dev/null > /dev/null 2>&1 &" -- name: Sleep a bit to allow config_service_acls.sh to apply the new service ACLs - pause: - seconds: 5 - - name: Ensure the SSH port on the DuT becomes closed to us local_action: wait_for args: host: "{{ ansible_host }}" port: 22 - state: started + state: stopped search_regex: "OpenSSH" timeout: 10 - register: result - failed_when: "'Timeout when waiting for search string OpenSSH' not in result.msg" # Gather facts with SNMP version 2 - name: Ensure attempt to gather basic SNMP facts about the device now times out From 04463777731b396ac7ae915c055a3d9bf62e6cba Mon Sep 17 00:00:00 2001 From: crzas Date: Wed, 8 Jul 2020 12:29:36 +0800 Subject: [PATCH 207/218] Wrong DIP on packet (#1171) * wrong DIP on packet it cannot receive BGP, SNMP, SSH IP2ME packet on t1-lag and the root cause is copp_test.py config wrong DIP on packet * fix comment indentation Co-authored-by: Ying Xie --- ansible/roles/test/files/ptftests/copp_tests.py | 17 +++++++++++++---- ansible/roles/test/tasks/copp.yml | 1 + 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/ansible/roles/test/files/ptftests/copp_tests.py b/ansible/roles/test/files/ptftests/copp_tests.py index 20933a7cd02..cdbc6f4042b 100644 --- a/ansible/roles/test/files/ptftests/copp_tests.py +++ b/ansible/roles/test/files/ptftests/copp_tests.py @@ -53,12 +53,21 @@ def __init__(self): self.timeout_thr = None + self.minig_bgp = test_params.get('minig_bgp', None) + idx = 0 self.myip = {} self.peerip = {} - for i in xrange(self.MAX_PORTS): - self.myip[i] = "10.0.0.%d" % (i*2+1) - self.peerip[i] = "10.0.0.%d" % (i*2) - + + for peer in self.minig_bgp: + if str(peer['peer_addr']).find('10.0.0') == 0:#filter IPv6 info. + self.myip[idx] = peer['addr'] + self.peerip[idx] = peer['peer_addr'] + idx = idx+1 + #if port number is out of the total of IPv4, take the last IPv4 + if int(target_port_str) > idx-1: + self.myip[self.target_port] = self.myip[idx-1] + self.peerip[self.target_port] = self.peerip[idx-1] + return def log(self, message, debug=False): diff --git a/ansible/roles/test/tasks/copp.yml b/ansible/roles/test/tasks/copp.yml index 890a4c00f19..12fb97d2f13 100644 --- a/ansible/roles/test/tasks/copp.yml +++ b/ansible/roles/test/tasks/copp.yml @@ -67,6 +67,7 @@ - verbose=False - pkt_tx_count={{ pkt_tx_count|default(0) }} - target_port={{ nn_target_port }} + - minig_bgp={{ minigraph_bgp }} ptf_extra_options: "--device-socket 0-{{ nn_target_port }}@tcp://127.0.0.1:10900 --device-socket 1-{{ nn_target_port }}@tcp://{{ ansible_eth0['ipv4']['address'] }}:10900" with_items: - ARPTest From fbbc6c5ab2aa06791bfe59e728cb500e7561048c Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Mon, 13 Jul 2020 23:07:54 +0800 Subject: [PATCH 208/218] Add support for starting stopping VMs of specified test setup (#1878) A test server may have VMs for multiple test setups. The existing tool sets can start the first N VMs and remove all VMs on the server. This change added the support of starting and stopping partial of the VMs used by specified test setup. Signed-off-by: Xin Wang --- ansible/group_vars/sonic/{vars => variables} | 0 ansible/roles/vm_set/tasks/main.yml | 2 +- ansible/roles/vm_set/tasks/start.yml | 8 +++++ ansible/roles/vm_set/tasks/stop.yml | 8 +++++ ansible/testbed-cli.sh | 37 ++++++++++++++++++-- 5 files changed, 52 insertions(+), 3 deletions(-) rename ansible/group_vars/sonic/{vars => variables} (100%) diff --git a/ansible/group_vars/sonic/vars b/ansible/group_vars/sonic/variables similarity index 100% rename from ansible/group_vars/sonic/vars rename to ansible/group_vars/sonic/variables diff --git a/ansible/roles/vm_set/tasks/main.yml b/ansible/roles/vm_set/tasks/main.yml index 68c649d5002..5fab767b05d 100644 --- a/ansible/roles/vm_set/tasks/main.yml +++ b/ansible/roles/vm_set/tasks/main.yml @@ -160,7 +160,7 @@ set_fact: current_server={{ group_names | extract_by_prefix('server_') }} - name: Extract VM names from the inventory - set_fact: VM_hosts={{ groups[current_server] | filter_by_prefix('VM') }} + set_fact: VM_hosts={{ groups[current_server] | filter_by_prefix('VM') | sort}} - name: Stop VMs include: stop.yml diff --git a/ansible/roles/vm_set/tasks/start.yml b/ansible/roles/vm_set/tasks/start.yml index 120016111f8..6c458eb8c2a 100644 --- a/ansible/roles/vm_set/tasks/start.yml +++ b/ansible/roles/vm_set/tasks/start.yml @@ -1,3 +1,11 @@ +- name: Load topo variables + include_vars: "vars/topo_{{ topo }}.yml" + when: topo is defined + +- name: Filter VMs for specified topology + set_fact: VM_hosts={{ VM_hosts | filter_vm_targets(topology['VMs'], VM_base) | sort }} + when: topology['VMs'] is defined and VM_base is defined + - name: Create directory for vm images and vm disks file: path={{ item }} state=directory mode=0755 with_items: diff --git a/ansible/roles/vm_set/tasks/stop.yml b/ansible/roles/vm_set/tasks/stop.yml index bfb63d168ac..3bdd402247a 100644 --- a/ansible/roles/vm_set/tasks/stop.yml +++ b/ansible/roles/vm_set/tasks/stop.yml @@ -1,3 +1,11 @@ +- name: Load topo variables + include_vars: "vars/topo_{{ topo }}.yml" + when: topo is defined + +- name: Filter VMs for specified topology + set_fact: VM_hosts={{ VM_hosts | filter_vm_targets(topology['VMs'], VM_base) | sort }} + when: topology['VMs'] is defined and VM_base is defined + - name: Remove VMs. include: stop_vm.yml vars: diff --git a/ansible/testbed-cli.sh b/ansible/testbed-cli.sh index 5d416b2ad63..29c95ac7507 100755 --- a/ansible/testbed-cli.sh +++ b/ansible/testbed-cli.sh @@ -7,6 +7,7 @@ function usage echo "testbed-cli. Interface to testbeds" echo "Usage:" echo " $0 [options] (start-vms | stop-vms) " + echo " $0 [options] (start-topo-vms | stop-topo-vms) " echo " $0 [options] (add-topo | remove-topo | renumber-topo | connect-topo) " echo " $0 [options] refresh-dut " echo " $0 [options] (connect-vms | disconnect-vms) " @@ -23,7 +24,7 @@ function usage echo " : Name of the target topology" echo " : Name of the Ansible inventory containing the DUT" echo - echo "To start VMs on a server: $0 start-vms 'server-name' ~/.password" + echo "To start all VMs on a server: $0 start-vms 'server-name' ~/.password" echo "To restart a subset of VMs:" echo " $0 start-vms server-name vault-password-file -e respin_vms=[vm_list]" echo " vm_list is separated by comma and shouldn't have space in the list." @@ -32,7 +33,9 @@ function usage echo " $0 start-vms server-name vault-password-file -e batch_size=2 -e interval=60" echo "To enable autostart of VMs:" echo " $0 start-vms server-name vault-password-file -e autostart=yes" - echo "To stop VMs on a server: $0 stop-vms 'server-name' ~/.password" + echo "To start VMs for specified topology on server: $0 start-topo-vms 'topo-name' ~/.password" + echo "To stop all VMs on a server: $0 stop-vms 'server-name' ~/.password" + echo "To stop VMs for specified topology on server: $0 stop-topo-vms 'topo-name' ~/.password" echo "To deploy a topology on a server: $0 add-topo 'topo-name' ~/.password" echo " Optional argument for add-topo:" echo " -e ptf_imagetag= # Use PTF image with specified tag for creating PTF container" @@ -109,6 +112,32 @@ function stop_vms ANSIBLE_SCP_IF_SSH=y ansible-playbook -i $vmfile testbed_stop_VMs.yml --vault-password-file="${passwd}" -l "${server}" $@ } +function start_topo_vms +{ + topology=$1 + passwd=$2 + shift + shift + read_file ${topology} + + echo "Starting VMs for topology '${topology}' on server '${server}'" + + ANSIBLE_SCP_IF_SSH=y ansible-playbook -i $vmfile testbed_start_VMs.yml --vault-password-file="${passwd}" -l "${server}" -e VM_base="$vm_base" -e topo="$topo" $@ +} + +function stop_topo_vms +{ + topology=$1 + passwd=$2 + shift + shift + read_file ${topology} + + echo "Stopping VMs for topology '${topology}' on server '${server}'" + + ANSIBLE_SCP_IF_SSH=y ansible-playbook -i $vmfile testbed_stop_VMs.yml --vault-password-file="${passwd}" -l "${server}" -e VM_base="$vm_base" -e topo="$topo" $@ +} + function add_topo { topology=$1 @@ -303,6 +332,10 @@ case "${subcmd}" in ;; stop-vms) stop_vms $@ ;; + start-topo-vms) start_topo_vms $@ + ;; + stop-topo-vms) stop_topo_vms $@ + ;; add-topo) add_topo $@ ;; remove-topo) remove_topo $@ From 79c562d9ab2454a0064944d420ae4788d04acbb7 Mon Sep 17 00:00:00 2001 From: abdosi <58047199+abdosi@users.noreply.github.com> Date: Mon, 13 Jul 2020 20:33:09 -0700 Subject: [PATCH 209/218] Fix for COPP Failure on T1-Lag Topology. (#1893) Verified with Dell 6000 Platforms. --- ansible/roles/test/templates/ptf_nn_agent.conf.dut.j2 | 2 +- ansible/swap_syncd.yml | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ansible/roles/test/templates/ptf_nn_agent.conf.dut.j2 b/ansible/roles/test/templates/ptf_nn_agent.conf.dut.j2 index 7e327fde45d..ba0245578b2 100644 --- a/ansible/roles/test/templates/ptf_nn_agent.conf.dut.j2 +++ b/ansible/roles/test/templates/ptf_nn_agent.conf.dut.j2 @@ -1,5 +1,5 @@ [program:ptf_nn_agent] -command=/usr/bin/python /opt/ptf_nn_agent.py --device-socket 1@tcp://0.0.0.0:10900 -i 1-{{ nn_target_port }}@{{ nn_target_interface }} --set-nn-rcv-buffer=109430400 --set-iface-rcv-buffer=109430400 --set-nn-snd-buffer=109430400 --set-iface-snd-buffer=109430400 +command=/usr/bin/python /opt/ptf_nn_agent.py --device-socket 1@tcp://0.0.0.0:10900 -i 1-{{ nn_target_port }}@{{ nn_target_interface }} --set-nn-rcv-buffer=609430400 --set-iface-rcv-buffer=609430400 --set-nn-snd-buffer=609430400 --set-iface-snd-buffer=609430400 process_name=ptf_nn_agent stdout_logfile=/tmp/ptf_nn_agent.out.log stderr_logfile=/tmp/ptf_nn_agent.err.log diff --git a/ansible/swap_syncd.yml b/ansible/swap_syncd.yml index bfe55a8af1b..8cec6f5e41d 100644 --- a/ansible/swap_syncd.yml +++ b/ansible/swap_syncd.yml @@ -50,6 +50,13 @@ value: 509430500 sysctl_set: yes + - name: Set sysctl SENDBUF parameter for tests + become: true + sysctl: + name: "net.core.wmem_max" + value: 509430500 + sysctl_set: yes + - name: Gather SONiC base image version shell: sonic-cfggen -y /etc/sonic/sonic_version.yml -v build_version register: result From b271f7ebf749e2d9eb4cd72adb62cb4bc951e60d Mon Sep 17 00:00:00 2001 From: Joe LeVeque Date: Wed, 15 Jul 2020 16:07:18 -0700 Subject: [PATCH 210/218] [201811][bgp_speaker] Terminate all processes with 'exabgp' in the command line (#1908) Add `-f` flag to pkill so that it will send the signal to processes where "exabgp" appears anywhere in the command line. Without this flag, it only sends the signal to processes where "exabgp" is the actual file being executed, thus leaving two `sh exabgp/start.sh` processes running. This change ensures all "exabgp" processes as well as the `sh exabgp/start.sh` processes are stopped. Also update comment to be more precise about what signal is being sent, in case we need to be more forceful in the future, we could send SIGKILL instead. --- ansible/roles/test/tasks/bgp_speaker.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/tasks/bgp_speaker.yml b/ansible/roles/test/tasks/bgp_speaker.yml index 248aea38846..720f5292db7 100644 --- a/ansible/roles/test/tasks/bgp_speaker.yml +++ b/ansible/roles/test/tasks/bgp_speaker.yml @@ -201,8 +201,8 @@ - testbed_mtu={{ mtu|default(9114) }} ptf_extra_options: "--relax --debug info --log-file /tmp/bgp_speaker_test.FibTest.log --socket-recv-size 16384" always: - - name: Kill exabgp instances - shell: pkill exabgp + - name: Send SIGTERM to exabgp instances + shell: pkill -f exabgp delegate_to: "{{ptf_host}}" - name: Flush vlan ips route From 5be5e3c55fa42c50a23a57dd4cb058021010e10b Mon Sep 17 00:00:00 2001 From: Joe LeVeque Date: Thu, 16 Jul 2020 11:58:29 -0700 Subject: [PATCH 211/218] [201811][bgp_speaker] Ignore errors upon terminating exabgp processes (#1915) --- ansible/roles/test/tasks/bgp_speaker.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/roles/test/tasks/bgp_speaker.yml b/ansible/roles/test/tasks/bgp_speaker.yml index 720f5292db7..28b5ea7180b 100644 --- a/ansible/roles/test/tasks/bgp_speaker.yml +++ b/ansible/roles/test/tasks/bgp_speaker.yml @@ -129,7 +129,7 @@ delegate_to: "{{ptf_host}}" - name: Kill exabgp instances if existing - shell: pkill exabgp + shell: pkill -f exabgp delegate_to: "{{ptf_host}}" ignore_errors: yes @@ -204,6 +204,7 @@ - name: Send SIGTERM to exabgp instances shell: pkill -f exabgp delegate_to: "{{ptf_host}}" + ignore_errors: yes - name: Flush vlan ips route command: ip route flush {{item.split('/')[0]}}/32 From a4b079402b8ea5d4a6384e9ea5430bafea75f8b2 Mon Sep 17 00:00:00 2001 From: Danny Allen Date: Fri, 17 Jul 2020 07:05:14 -0700 Subject: [PATCH 212/218] [ansible] Add revert_syncd task to automate cleaning up after swap_syncd (#1917) Signed-off-by: Danny Allen --- ansible/revert_syncd.yml | 68 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 ansible/revert_syncd.yml diff --git a/ansible/revert_syncd.yml b/ansible/revert_syncd.yml new file mode 100644 index 00000000000..2671c85c24b --- /dev/null +++ b/ansible/revert_syncd.yml @@ -0,0 +1,68 @@ +# Revert docker-syncd back to docker-syncd for oneimage deployed switch +# A temporary solution during period that sonic_docker is not ready for oneimage +# Example usage: +# ansible-playbook revert_syncd.yml -i str --vault-password-file ~/password.txt --limit devicename + +- hosts: all + gather_facts: no + vars_files: + - vars/docker_registry.yml + tasks: + + - name: Gathering minigraph facts about the device + minigraph_facts: host={{ inventory_hostname }} + tags: always + + - name: Set sonic_hwsku fact + set_fact: + sonic_hwsku: "{{minigraph_hwsku}}" + tags: always + + - name: Set sonic_asic_type fact + set_fact: + sonic_asic_type: broadcom + docker_rpc_image_name: docker-syncd-brcm-rpc + docker_syncd_name: docker-syncd-brcm + when: sonic_hwsku in broadcom_hwskus + tags: always + + - name: Set sonic_asic_type fact + set_fact: + sonic_asic_type: mellanox + docker_rpc_image_name: docker-syncd-mlnx-rpc + docker_syncd_name: docker-syncd-mlnx + when: sonic_hwsku in mellanox_hwskus + tags: always + + - name: Stop swss service + become: true + command: systemctl stop swss + + - name: Delete syncd docker + become: true + shell: docker rm syncd + ignore_errors: yes + + - name: Gather SONiC base image version + shell: sonic-cfggen -y /etc/sonic/sonic_version.yml -v build_version + register: result + changed_when: false + + - name: Set base image verison variable + set_fact: + sonic_image_version: "{{ result.stdout }}" + + - name: Tag default image as syncd + shell: docker tag {{docker_syncd_name}}:{{sonic_image_version}} {{docker_syncd_name}} + + - name: Delete the rpc image + become: true + shell: docker rmi {{docker_registry_host}}/{{docker_rpc_image_name}}:{{sonic_image_version}} + ignore_errors: yes + + - name: Start swss service + become: true + command: systemctl start swss + + - name: Wait for the initialization process + pause: seconds=60 From 8663646fbb9a3d382a820de190331cad2a1d106d Mon Sep 17 00:00:00 2001 From: abdosi <58047199+abdosi@users.noreply.github.com> Date: Mon, 3 Aug 2020 23:03:32 -0700 Subject: [PATCH 213/218] Two changes are being done here:- (#2021) a) Instead of teamdctl to add/remove port-channel member we shoudl config port-channel command. Reason being show interface status does not get updated as teadctl bypass config db b) One of change done by me as prt of PR https://github.com/Azure/sonic-mgmt/pull/1893 was not complete as I forgot to update Buffer size in swap_syncd.yml. Fixed now. Signed-off-by: Abhishek Dosi --- ansible/roles/test/tasks/arpall.yml | 4 ++-- ansible/swap_syncd.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ansible/roles/test/tasks/arpall.yml b/ansible/roles/test/tasks/arpall.yml index 14e6cad97fb..b1b7c283a89 100644 --- a/ansible/roles/test/tasks/arpall.yml +++ b/ansible/roles/test/tasks/arpall.yml @@ -30,7 +30,7 @@ with_dict: minigraph_portchannels - name: move interface {{ intf1 }} out of {{ po1 }} - shell: teamdctl {{ po1 }} port remove {{ intf1 }} + shell: config portchannel member del {{ po1 }} {{ intf1 }} become: yes when: po1 is defined @@ -46,7 +46,7 @@ with_dict: minigraph_portchannels - name: move {{ intf2 }} out of {{ po2 }} - shell: teamdctl {{ po2 }} port remove {{ intf2 }} + shell: config portchannel member del {{ po2 }} {{ intf2 }} become: yes when: po2 is defined diff --git a/ansible/swap_syncd.yml b/ansible/swap_syncd.yml index 8cec6f5e41d..00e713458c0 100644 --- a/ansible/swap_syncd.yml +++ b/ansible/swap_syncd.yml @@ -47,14 +47,14 @@ become: true sysctl: name: "net.core.rmem_max" - value: 509430500 + value: 609430500 sysctl_set: yes - name: Set sysctl SENDBUF parameter for tests become: true sysctl: name: "net.core.wmem_max" - value: 509430500 + value: 609430500 sysctl_set: yes - name: Gather SONiC base image version From 9dbcc95389a22440b314a11a6792fa3991a6fcf5 Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Tue, 4 Aug 2020 17:01:03 -0700 Subject: [PATCH 214/218] [show interface] make show interface compatible w or w/o fec information (#2024) Sample old output: Interface Lanes Speed MTU Alias Vlan Oper Admin Type Asym PFC --------------- --------------- ------- ----- ------------ --------------- ------ ------- --------------- ---------- Ethernet0 77,78 50G 9100 Ethernet1/1 trunk down up QSFP28 or later off Sample new output: Interface Lanes Speed MTU FEC Alias Vlan Oper Admin Type Asym PFC --------------- --------------- ------- ----- ----- ------------ --------------- ------ ------- --------------- ---------- Ethernet48 57,58,59,60 100G 9100 rs Ethernet13/1 PortChannel0001 up up QSFP28 or later off Signed-off-by: Ying Xie --- ansible/library/show_interface.py | 53 +++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/ansible/library/show_interface.py b/ansible/library/show_interface.py index e56b9852a6d..01366f5556b 100644 --- a/ansible/library/show_interface.py +++ b/ansible/library/show_interface.py @@ -83,6 +83,7 @@ def run(self): self.module.exit_json(ansible_facts=self.facts) def collect_interface_status(self): + regex_int_fec = re.compile(r'(\S+)\s+[\d,N\/A]+\s+(\w+)\s+(\d+)\s+(rs|N\/A)\s+([\w\/]+)\s+(\w+)\s+(\w+)\s+(\w+)') regex_int = re.compile(r'(\S+)\s+[\d,N\/A]+\s+(\w+)\s+(\d+)\s+([\w\/]+)\s+(\w+)\s+(\w+)\s+(\w+)') self.int_status = {} if self.m_args['interfaces'] is not None: @@ -93,13 +94,24 @@ def collect_interface_status(self): rc, self.out, err = self.module.run_command(command, executable='/bin/bash', use_unsafe_shell=True) for line in self.out.split("\n"): line = line.strip() - if regex_int.match(line): - self.int_status[interface]['name'] = regex_int.match(line).group(1) - self.int_status[interface]['speed'] = regex_int.match(line).group(2) - self.int_status[interface]['alias'] = regex_int.match(line).group(4) - self.int_status[interface]['vlan'] = regex_int.match(line).group(5) - self.int_status[interface]['oper_state'] = regex_int.match(line).group(6) - self.int_status[interface]['admin_state'] = regex_int.match(line).group(7) + fec = regex_int_fec.match(line) + old = regex_int.match(line) + if fec and interface == fec.group(1): + self.int_status[interface]['name'] = fec.group(1) + self.int_status[interface]['speed'] = fec.group(2) + self.int_status[interface]['fec'] = fec.group(4) + self.int_status[interface]['alias'] = fec.group(5) + self.int_status[interface]['vlan'] = fec.group(6) + self.int_status[interface]['oper_state'] = fec.group(7) + self.int_status[interface]['admin_state'] = fec.group(8) + elif old and interface == old.group(1): + self.int_status[interface]['name'] = old.group(1) + self.int_status[interface]['speed'] = old.group(2) + self.int_status[interface]['fec'] = 'Unknown' + self.int_status[interface]['alias'] = old.group(4) + self.int_status[interface]['vlan'] = old.group(5) + self.int_status[interface]['oper_state'] = old.group(6) + self.int_status[interface]['admin_state'] = old.group(7) self.facts['int_status'] = self.int_status except Exception as e: self.module.fail_json(msg=str(e)) @@ -110,15 +122,28 @@ def collect_interface_status(self): rc, self.out, err = self.module.run_command('show interface status', executable='/bin/bash', use_unsafe_shell=True) for line in self.out.split("\n"): line = line.strip() - if regex_int.match(line): - interface = regex_int.match(line).group(1) + fec = regex_int_fec.match(line) + old = regex_int.match(line) + if fec: + interface = fec.group(1) self.int_status[interface] = {} self.int_status[interface]['name'] = interface - self.int_status[interface]['speed'] = regex_int.match(line).group(2) - self.int_status[interface]['alias'] = regex_int.match(line).group(4) - self.int_status[interface]['vlan'] = regex_int.match(line).group(5) - self.int_status[interface]['oper_state'] = regex_int.match(line).group(6) - self.int_status[interface]['admin_state'] = regex_int.match(line).group(7) + self.int_status[interface]['speed'] = fec.group(2) + self.int_status[interface]['fec'] = fec.group(4) + self.int_status[interface]['alias'] = fec.group(5) + self.int_status[interface]['vlan'] = fec.group(6) + self.int_status[interface]['oper_state'] = fec.group(7) + self.int_status[interface]['admin_state'] = fec.group(8) + elif old: + interface = old.group(1) + self.int_status[interface] = {} + self.int_status[interface]['name'] = interface + self.int_status[interface]['speed'] = old.group(2) + self.int_status[interface]['fec'] = 'Unknown' + self.int_status[interface]['alias'] = old.group(4) + self.int_status[interface]['vlan'] = old.group(5) + self.int_status[interface]['oper_state'] = old.group(6) + self.int_status[interface]['admin_state'] = old.group(7) self.facts['int_status'] = self.int_status except Exception as e: self.module.fail_json(msg=str(e)) From 8e76bfb761618d5b725a4a50b460b439524bcda0 Mon Sep 17 00:00:00 2001 From: abdosi <58047199+abdosi@users.noreply.github.com> Date: Mon, 31 Aug 2020 09:00:47 -0700 Subject: [PATCH 215/218] Ported master PR https://github.com/Azure/sonic-mgmt/pull/1825 (#2157) for internal-201811 so that we can run 209111 image using anisble-playbook. Signed-off-by: Abhishek Dosi --- ansible/config_sonic_basedon_testbed.yml | 67 ++++++++++++++++++++++ ansible/group_vars/all/telemetry_certs.yml | 10 ++++ 2 files changed, 77 insertions(+) create mode 100644 ansible/group_vars/all/telemetry_certs.yml diff --git a/ansible/config_sonic_basedon_testbed.yml b/ansible/config_sonic_basedon_testbed.yml index e075416395b..65e6d468e96 100644 --- a/ansible/config_sonic_basedon_testbed.yml +++ b/ansible/config_sonic_basedon_testbed.yml @@ -102,6 +102,73 @@ dest=minigraph/{{ inventory_hostname}}.{{ topo }}.xml connection: local when: local_minigraph is defined and local_minigraph|bool == true + + - block: + - name: Init telemetry keys + set_fact: + server_key: "" + server_cer: "" + dsmsroot_key: "" + dsmsroot_cer: "" + dir_path: "" + + - name: read server key + set_fact: + server_key: "{{ telemetry_certs['server_key'] }}" + when: telemetry_certs['server_key'] is defined + + - name: read server cer + set_fact: + server_cer: "{{ telemetry_certs['server_cer'] }}" + when: telemetry_certs['server_cer'] is defined + + - name: read dsmsroot key + set_fact: + dsmsroot_key: "{{ telemetry_certs['dsmsroot_key'] }}" + when: telemetry_certs['dsmsroot_key'] is defined + + - name: read dsmsroot cer + set_fact: + dsmsroot_cer: "{{ telemetry_certs['dsmsroot_cer'] }}" + when: telemetry_certs['dsmsroot_cer'] is defined + + - name: read directory path + set_fact: + dir_path: "{{ telemetry_certs['dir_path'] }}" + when: telemetry_certs['dir_path'] is defined + + - name: Create telemetry directory + file: + path: "{{ dir_path }}" + state: directory + mode: '0755' + become: true + + # {{ server_cer }}/ streamingtelemetryserver.cer need to be copied on PTFDocker and renamed as dsmsroot.cer + - name: Generate server cert using openssl. + command: openssl req \ + -x509 \ + -sha256 \ + -nodes \ + -newkey rsa:2048 \ + -keyout "{{ server_key }}" + -subj "/CN=ndastreamingservertest" + -out "{{ server_cer }}" + become: true + + # {{ dsmsroot_cer }}/ dsmsroot.cer need to be copied on PTFDocker and renamed as streamingtelemetryclient.cer + # {{ dsms_key }}/ dsmsroot.key need to be copied and renamed as streamingtelemetryclient.key + - name: Generate dsmsroot cert using openssl. + command: openssl req \ + -x509 \ + -sha256 \ + -nodes \ + -newkey rsa:2048 \ + -keyout "{{ dsmsroot_key }}" + -subj "/CN=ndastreamingclienttest" + -out "{{ dsmsroot_cer }}" + become: true + - block: - name: saved original minigraph file in SONiC DUT(ignore errors when file doesnot exist) diff --git a/ansible/group_vars/all/telemetry_certs.yml b/ansible/group_vars/all/telemetry_certs.yml new file mode 100644 index 00000000000..867242b43d6 --- /dev/null +++ b/ansible/group_vars/all/telemetry_certs.yml @@ -0,0 +1,10 @@ +# Configure telemetry server and dsmsroot key,cer + +telemetry_certs: + server_key: "/etc/sonic/telemetry/streamingtelemetryserver.key" + server_csr: "/etc/sonic/telemetry/streamingtelemetryserver.csr" + server_cer: "/etc/sonic/telemetry/streamingtelemetryserver.cer" + dsmsroot_key: "/etc/sonic/telemetry/dsmsroot.key" + dsmsroot_csr: "/etc/sonic/telemetry/dsmsroot.csr" + dsmsroot_cer: "/etc/sonic/telemetry/dsmsroot.cer" + dir_path: "/etc/sonic/telemetry" From 1511ae6ff30190a74d8eef2e8d763a6a04f8c34e Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Wed, 2 Jun 2021 12:26:47 -0500 Subject: [PATCH 216/218] [201811] Port changes for 7050QX-32S-S4Q31 into 201811 branch (#3570) * minigraph: Add the ability to set a per-port speed in the minigraph (#3527) * minigraph: Add the ability to set a per-port speed in the minigraph (cherry picked from commit cef1f77fd6afbee587756e0349be24f47fe7e3f9) * minigraph: Fix with_dict syntax in the playbook That entry needs to be specified as referring to a variable. Signed-off-by: Saikrishna Arcot * [topo] Add test topology for 7050QX-32S-S4Q31 (#3568) (cherry picked from commit 6d1720b4414efe3d4aa46cf13bb77178ed5afb15) --- ansible/config_sonic_basedon_testbed.yml | 11 +- ansible/templates/minigraph_device.j2 | 9 +- ansible/vars/topo_t0-35.yml | 187 +++++++++++++++++++++++ 3 files changed, 200 insertions(+), 7 deletions(-) create mode 100644 ansible/vars/topo_t0-35.yml diff --git a/ansible/config_sonic_basedon_testbed.yml b/ansible/config_sonic_basedon_testbed.yml index 65e6d468e96..ad78c8f860c 100644 --- a/ansible/config_sonic_basedon_testbed.yml +++ b/ansible/config_sonic_basedon_testbed.yml @@ -69,6 +69,11 @@ connection: local when: "VM_topo | bool" + - name: get connection graph if defined for dut (ignore any errors) + conn_graph_facts: host="{{ inventory_hostname }}" + connection: local + ignore_errors: true + - name: find interface name mapping and individual interface speed if defined port_alias: hwsku="{{ hwsku }}" @@ -85,17 +90,17 @@ - name: find all interface indexes mapping connecting to VM set_fact: interface_to_vms: "{{ interface_to_vms|default({}) | combine({ item.key: item.value['interface_indexes'] }) }}" - with_dict: vm_topo_config['vm'] + with_dict: "{{ vm_topo_config['vm'] }}" - name: find all interface indexes connecting to VM set_fact: ifindex_to_vms: "{{ ifindex_to_vms|default([]) }} + {{ item.value['interface_indexes']}}" - with_dict: vm_topo_config['vm'] + with_dict: "{{ vm_topo_config['vm'] }}" - name: find all interface names set_fact: intf_names: "{{ intf_names | default({}) | combine({item.key: port_alias[item.value[0]|int:item.value[-1]|int+1] }) }}" - with_dict: interface_to_vms + with_dict: "{{ interface_to_vms }}" - name: create minigraph file in ansible minigraph folder template: src=templates/minigraph_template.j2 diff --git a/ansible/templates/minigraph_device.j2 b/ansible/templates/minigraph_device.j2 index cfaad980078..d1f6ce96e54 100644 --- a/ansible/templates/minigraph_device.j2 +++ b/ansible/templates/minigraph_device.j2 @@ -15,11 +15,12 @@ false 0 0 -{% set speed_option = port_speed | length %} -{% if speed_option == 0 %} - {{ iface_speed }} -{% else %} +{% if port_speed[port_alias[index]] is defined %} {{ port_speed[port_alias[index]] }} +{% elif device_conn[inventory_hostname][port_alias[index]] is defined %} + {{ device_conn[inventory_hostname][port_alias[index]]['speed'] }} +{% else %} + {{ iface_speed }} {% endif %} {% endfor %} diff --git a/ansible/vars/topo_t0-35.yml b/ansible/vars/topo_t0-35.yml new file mode 100644 index 00000000000..7500c856699 --- /dev/null +++ b/ansible/vars/topo_t0-35.yml @@ -0,0 +1,187 @@ +topology: + host_interfaces: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + - 16 + - 17 + - 18 + - 19 + - 20 + - 21 + - 22 + - 23 + - 24 + - 25 + - 26 + - 27 + - 28 + - 29 + - 30 + disabled_host_interfaces: + - 0 + - 1 + - 2 + - 3 + - 28 + - 29 + - 30 + VMs: + ARISTA01T1: + vlans: + - 31 + vm_offset: 0 + ARISTA02T1: + vlans: + - 32 + vm_offset: 1 + ARISTA03T1: + vlans: + - 33 + vm_offset: 2 + ARISTA04T1: + vlans: + - 34 + vm_offset: 3 + DUT: + vlan_configs: + default_vlan_config: one_vlan_a + one_vlan_a: + Vlan1000: + id: 1000 + intfs: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + prefix: 192.168.0.1/21 + prefix_v6: fc02:1000::1/64 + tag: 1000 + two_vlan_a: + Vlan100: + id: 100 + intfs: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + prefix: 192.168.100.1/21 + prefix_v6: fc02:100::1/64 + tag: 100 + Vlan200: + id: 200 + intfs: [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + prefix: 192.168.200.1/21 + prefix_v6: fc02:200::1/64 + tag: 200 + +configuration_properties: + common: + dut_asn: 65100 + dut_type: ToRRouter + swrole: leaf + nhipv4: 10.10.246.254 + nhipv6: FC0A::FF + podset_number: 200 + tor_number: 16 + tor_subnet_number: 2 + max_tor_subnet_number: 16 + tor_subnet_size: 128 + spine_asn: 65534 + leaf_asn_start: 64600 + tor_asn_start: 65500 + failure_rate: 0 + +configuration: + ARISTA01T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.56 + - FC00::71 + interfaces: + Loopback0: + ipv4: 100.1.0.29/32 + ipv6: 2064:100::1d/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.57/31 + ipv6: fc00::72/126 + bp_interface: + ipv4: 10.10.246.29/24 + ipv6: fc0a::1d/64 + + ARISTA02T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.58 + - FC00::75 + interfaces: + Loopback0: + ipv4: 100.1.0.30/32 + ipv6: 2064:100::1e/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.59/31 + ipv6: fc00::76/126 + bp_interface: + ipv4: 10.10.246.30/24 + ipv6: fc0a::1e/64 + + ARISTA03T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.60 + - FC00::79 + interfaces: + Loopback0: + ipv4: 100.1.0.31/32 + ipv6: 2064:100::1f/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.61/31 + ipv6: fc00::7a/126 + bp_interface: + ipv4: 10.10.246.31/24 + ipv6: fc0a::1f/64 + + ARISTA04T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.62 + - FC00::7D + interfaces: + Loopback0: + ipv4: 100.1.0.32/32 + ipv6: 2064:100::20/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.63/31 + ipv6: fc00::7e/126 + bp_interface: + ipv4: 10.10.246.32/24 + ipv6: fc0a::20/64 From 7212f4ba4cb96f241119756b5e945ab57ed9f51d Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Tue, 8 Jun 2021 00:57:00 -0500 Subject: [PATCH 217/218] [201811] minigraph: Fix issue of alias name vs sonic name being used for populating the speed (#3584) For filling in the speed of the port, when reading `device_conn`, the Sonic name needs to be used for reading into the dict, not the alias name. Also, indexing by the hostname isn't required for 201811, since the returned structure gets rid of that "layer" if the hostname exists in that data structure. Signed-off-by: Saikrishna Arcot --- ansible/templates/minigraph_device.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/templates/minigraph_device.j2 b/ansible/templates/minigraph_device.j2 index d1f6ce96e54..7a970fcf4e5 100644 --- a/ansible/templates/minigraph_device.j2 +++ b/ansible/templates/minigraph_device.j2 @@ -17,8 +17,8 @@ 0 {% if port_speed[port_alias[index]] is defined %} {{ port_speed[port_alias[index]] }} -{% elif device_conn[inventory_hostname][port_alias[index]] is defined %} - {{ device_conn[inventory_hostname][port_alias[index]]['speed'] }} +{% elif device_conn[port_alias_map[port_alias[index]]] is defined %} + {{ device_conn[port_alias_map[port_alias[index]]]['speed'] }} {% else %} {{ iface_speed }} {% endif %} From a73cad26410b6a81078a6820a8765ebebb441fda Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Thu, 17 Jun 2021 11:41:21 -0700 Subject: [PATCH 218/218] [201811] Revert ansible syntax changes and clean up t0-35 topo definition (#3680) * [topo] Fix up t0-35 to match the definition for other topos in the 201811 branch Signed-off-by: Saikrishna Arcot * Partial revert of 1511ae6ff30190a74d8eef2e8d763a6a04f8c34e Ansible syntax changes are not needed. Signed-off-by: Saikrishna Arcot --- ansible/config_sonic_basedon_testbed.yml | 6 ++-- ansible/vars/topo_t0-35.yml | 37 +++++------------------- 2 files changed, 10 insertions(+), 33 deletions(-) diff --git a/ansible/config_sonic_basedon_testbed.yml b/ansible/config_sonic_basedon_testbed.yml index ad78c8f860c..0d24b78d61d 100644 --- a/ansible/config_sonic_basedon_testbed.yml +++ b/ansible/config_sonic_basedon_testbed.yml @@ -90,17 +90,17 @@ - name: find all interface indexes mapping connecting to VM set_fact: interface_to_vms: "{{ interface_to_vms|default({}) | combine({ item.key: item.value['interface_indexes'] }) }}" - with_dict: "{{ vm_topo_config['vm'] }}" + with_dict: vm_topo_config['vm'] - name: find all interface indexes connecting to VM set_fact: ifindex_to_vms: "{{ ifindex_to_vms|default([]) }} + {{ item.value['interface_indexes']}}" - with_dict: "{{ vm_topo_config['vm'] }}" + with_dict: vm_topo_config['vm'] - name: find all interface names set_fact: intf_names: "{{ intf_names | default({}) | combine({item.key: port_alias[item.value[0]|int:item.value[-1]|int+1] }) }}" - with_dict: "{{ interface_to_vms }}" + with_dict: interface_to_vms - name: create minigraph file in ansible minigraph folder template: src=templates/minigraph_template.j2 diff --git a/ansible/vars/topo_t0-35.yml b/ansible/vars/topo_t0-35.yml index 7500c856699..d8ecd2a2f8e 100644 --- a/ansible/vars/topo_t0-35.yml +++ b/ansible/vars/topo_t0-35.yml @@ -56,37 +56,12 @@ topology: vlans: - 34 vm_offset: 3 - DUT: - vlan_configs: - default_vlan_config: one_vlan_a - one_vlan_a: - Vlan1000: - id: 1000 - intfs: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] - prefix: 192.168.0.1/21 - prefix_v6: fc02:1000::1/64 - tag: 1000 - two_vlan_a: - Vlan100: - id: 100 - intfs: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - prefix: 192.168.100.1/21 - prefix_v6: fc02:100::1/64 - tag: 100 - Vlan200: - id: 200 - intfs: [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] - prefix: 192.168.200.1/21 - prefix_v6: fc02:200::1/64 - tag: 200 configuration_properties: common: dut_asn: 65100 dut_type: ToRRouter swrole: leaf - nhipv4: 10.10.246.254 - nhipv6: FC0A::FF podset_number: 200 tor_number: 16 tor_subnet_number: 2 @@ -94,8 +69,10 @@ configuration_properties: tor_subnet_size: 128 spine_asn: 65534 leaf_asn_start: 64600 - tor_asn_start: 65500 + tor_asn_start: 65100 failure_rate: 0 + nhipv4: 10.10.246.100 + nhipv6: FC0A::C9 configuration: ARISTA01T1: @@ -118,7 +95,7 @@ configuration: ipv6: fc00::72/126 bp_interface: ipv4: 10.10.246.29/24 - ipv6: fc0a::1d/64 + ipv6: fc0a::3a/64 ARISTA02T1: properties: @@ -140,7 +117,7 @@ configuration: ipv6: fc00::76/126 bp_interface: ipv4: 10.10.246.30/24 - ipv6: fc0a::1e/64 + ipv6: fc0a::3d/64 ARISTA03T1: properties: @@ -162,7 +139,7 @@ configuration: ipv6: fc00::7a/126 bp_interface: ipv4: 10.10.246.31/24 - ipv6: fc0a::1f/64 + ipv6: fc0a::3e/64 ARISTA04T1: properties: @@ -184,4 +161,4 @@ configuration: ipv6: fc00::7e/126 bp_interface: ipv4: 10.10.246.32/24 - ipv6: fc0a::20/64 + ipv6: fc0a::41/64