Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
275 changes: 275 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,19 @@

import pytest

import datetime
import itertools
import logging
import os
import sys
import tempfile

import git
from packaging import version

import lib.config as global_config
from lib import pxe
from lib import commands
from lib.common import (
callable_marker,
DiskDevName,
Expand Down Expand Up @@ -45,6 +48,14 @@
CACHE_IMPORTED_VM = False
assert CACHE_IMPORTED_VM in [True, False]

# Session-level tracking for failed tests and their associated hosts/VMs
FAILED_TESTS_HOSTS = {} # {test_nodeid: set(Host)}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment essentially looks like a good candidate for being a type hint?

FAILED_TESTS_VMS = {} # {test_nodeid: set(VM)}
SESSION_HAS_FAILURES = False
Copy link
Contributor

@ydirson ydirson Nov 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need SESSION_HAS_FAILURES? It looks like it just reflects that FAILED_TESTS_HOSTS has contents or not?


# Path to timestamp file on remote hosts for log extraction
HOST_TIMESTAMPS_FILE = "/tmp/pytest-timestamps.log"
Comment on lines +56 to +57
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We would likely want to use a per-session temporary file, or there might be issues when several sessions run simultaneously.


# pytest hooks

def pytest_addoption(parser):
Expand Down Expand Up @@ -99,11 +110,29 @@ def pytest_addoption(parser):
help="Format of VDI to execute tests on."
"Example: vhd,qcow2"
)
parser.addoption(
"--collect-logs-on-failure",
action="store_true",
default=True,
help="Automatically collect xen-bugtool logs from hosts when tests fail (default: True)"
)
parser.addoption(
"--no-collect-logs-on-failure",
action="store_false",
dest="collect_logs_on_failure",
help="Disable automatic log collection on test failure"
)
Comment on lines +113 to +124
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may not be necessary to have a flag for expliciting the default behaviour?


def pytest_configure(config):
global_config.ignore_ssh_banner = config.getoption('--ignore-ssh-banner')
global_config.ssh_output_max_lines = int(config.getoption('--ssh-output-max-lines'))

# Register custom markers
config.addinivalue_line(
"markers",
"capture_console: Capture VM console screenshots on test failure (opt-in for critical tests)"
)

def pytest_generate_tests(metafunc):
if "vm_ref" in metafunc.fixturenames:
vms = metafunc.config.getoption("vm")
Expand Down Expand Up @@ -760,3 +789,249 @@ def cifs_iso_sr(host, cifs_iso_device_config):
yield sr
# teardown
sr.forget()

# Session-scoped fixture to create a shared log directory for all artifacts
@pytest.fixture(scope='session')
def session_log_dir():
"""
Create and return a session-wide log directory for storing all test artifacts.
The directory is created at session start and shared by all fixtures.

Directory naming includes BUILD_NUMBER if running in Jenkins CI environment.
"""
Comment on lines +793 to +801
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doc says this fixtures creates the dir, but it is actually created by a different one.

timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

# BUILD_NUMBER and WORKSPACE are specific to Jenkins
build_number = os.environ.get('BUILD_NUMBER')
if build_number:
logging.debug(f"Use Jenkins CI build number: {build_number}")
workspace = os.environ.get('WORKSPACE')
log_dir = f"{workspace}/report_{build_number}"
else:
log_dir = f"pytest-logs/report_{timestamp}"

# postpone directory creation only if some test failed
return log_dir

# Helper function for immediate console capture
def capture_vm_console(vm: VM, log_dir: str) -> str:

# Path to console capture Python script
script_path = os.path.join(os.path.dirname(__file__), 'scripts', 'capture-console.py')
if not os.path.exists(script_path):
raise FileNotFoundError(f"Console capture script not found at {script_path}")

host_ip = vm.host.hostname_or_ip
vm_uuid = vm.uuid

try:
vm_name = vm.name() if hasattr(vm, 'name') and callable(vm.name) else vm_uuid[:8]
except Exception:
vm_name = vm_uuid[:8]

# Generate output filename with VM info
output_filename = f"{host_ip.replace(':', '_')}_vm_{vm_name}_{vm_uuid[:8]}_console.png"
output_path = os.path.join(log_dir, output_filename)

logging.debug(f"Capturing console for VM {vm_name} ({vm_uuid}) in {output_path}...")

# Get credentials from data.py
from data import HOST_DEFAULT_PASSWORD, HOST_DEFAULT_USER

# Build command to call Python script directly (XOLite mode)
cmd = [
sys.executable, # Use same Python interpreter as current process
script_path,
'--host', host_ip,
'--vm-uuid', vm_uuid,
output_filename,
'--output-dir', log_dir,
'--user', HOST_DEFAULT_USER,
'--password', HOST_DEFAULT_PASSWORD
]

# Run the capture script
result = commands.local_cmd(cmd, check=True)

return output_path

# Record test timestamps to easily extract logs
@pytest.fixture(scope='class', autouse=True)
def bugreport_timestamp(request, host):
Comment on lines +858 to +860
Copy link
Contributor

@ydirson ydirson Nov 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Record test start and end timestamps"? Could make sense to be a docstring?

The "to easily extract logs" could have more details, as is it is no obvious how those timestamps help that.
That indeed seems to be related to "scripts: add log extraction script for investigation", woudn't it make sense to regroup it into that other commit?

class_name = request.module.__name__
logging.debug(f"Record timestamp (begin): {class_name}")
host.ssh(f"echo begin {class_name} $(date '+%s') >> {HOST_TIMESTAMPS_FILE}")
yield
logging.debug(f"Record timestamp (end): {class_name}")
host.ssh(f"echo end {class_name} $(date '+%s') >> {HOST_TIMESTAMPS_FILE}")

@pytest.fixture(scope='session', autouse=True)
def check_bug_reports(host):
"""
That could be the sign of interrupted tests and some cleanup might be
required. We don't want to accumulate too much of these files.
"""
Comment on lines +868 to +873
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Docstring should start with a one-line explaining what the fixture does

output = host.ssh("find /var/opt/xen/bug-report/ -type f 2> /dev/null | wc -l")
count = int(output)
if count > 3: # arbitrary number; this should let a developer work on that host without warnings
logging.warning(f"Cleanup needed. {count} bug report(s) have been found on host.")

def _introspect_test_fixtures(request):
"""
Introspect test fixtures to find Host and VM objects.
Returns (test_hosts, test_vms) sets.
"""
# Collect all Host and VM objects used by this test from its fixtures
test_hosts = set()
test_vms = set()

# Check all fixtures used by this test
# XXX: this is a bit hard to understand but this introspection of fixtures
# is the best way to gather VMs and Hosts. Other alternatives require,
# maintenance, boilerplate and are more prone to miss some cases.
Comment on lines +888 to +891
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's "hard to understand" maybe an overview of how it works would be useful (it's not that hard to understand actually 😉)
I'm a bit unsure this is a good approach: it still requires to enumerate the contexts in which a host can be found, so it seems possible that we miss some, or some other contexts get added in the future and we would need (or forget) to add them here.

Can't we instead get the Host ctor to cheaply register all hosts used (somewhere in the request), so we can just pick them when we see the test fail?

(sidenote: if we keep it, the function name is a bit too generic and describe how it does things, not what it does)

for fixturename in getattr(request, 'fixturenames', []):
try:
fixture_value = request.getfixturevalue(fixturename)
# Check if fixture is a Host
if isinstance(fixture_value, Host):
test_hosts.add(fixture_value)
# Check if fixture is a VM
elif isinstance(fixture_value, VM):
test_vms.add(fixture_value)
test_hosts.add(fixture_value.host)
# Check if fixture is a list
elif isinstance(fixture_value, list):
for item in fixture_value:
if isinstance(item, Host):
test_hosts.add(item)
elif isinstance(item, VM):
test_vms.add(item)
test_hosts.add(item.host)
# Check if fixture is a dict
elif isinstance(fixture_value, dict):
logging.warning("dictionnary in fixtures case not handled")
# Check if fixture has a 'host' attribute (like SR, Pool objects)
elif hasattr(fixture_value, 'host') and isinstance(fixture_value.host, Host):
test_hosts.add(fixture_value.host)
# Check if fixture is a Pool
elif hasattr(fixture_value, 'hosts') and isinstance(fixture_value.hosts, list):
test_hosts.update(h for h in fixture_value.hosts if isinstance(h, Host))
except Exception:
# Some fixtures may not be available yet or may fail to load
pass

return test_hosts, test_vms

@pytest.fixture(scope='function', autouse=True)
def track_test_host(request, session_log_dir):
"""
Track which hosts and VMs are used by each test.
On test failure, record the test and its hosts/VMs for later log collection and console capture.
"""
global FAILED_TESTS_HOSTS, FAILED_TESTS_VMS, SESSION_HAS_FAILURES

yield

# After test completes, check if it failed
report = request.node.stash.get(PHASE_REPORT_KEY, None)
if report and "call" in report and report["call"].failed:
test_nodeid = request.node.nodeid

# Only introspect fixtures if test failed (performance optimization)
test_hosts, test_vms = _introspect_test_fixtures(request)

logging.debug(f"Test failed: {test_nodeid}, "
f"used hosts: {[h.hostname_or_ip for h in test_hosts]}, "
f"VMs: {[vm.uuid for vm in test_vms]}")

if test_hosts:
FAILED_TESTS_HOSTS[test_nodeid] = test_hosts
SESSION_HAS_FAILURES = True
if test_vms:
FAILED_TESTS_VMS[test_nodeid] = test_vms

# Check if test/class has capture_console marker for console screenshots
has_console_marker = request.node.get_closest_marker("capture_console") is not None
if has_console_marker:
logging.debug("Capturing console NOW...")

# Capture console for each VM immediately (while VM is still alive)
# Use the session-wide log directory
for vm in test_vms:
try:
os.makedirs(session_log_dir, exist_ok=True)
screenshot_path = capture_vm_console(vm, session_log_dir)
logging.debug(f"Console captured: {screenshot_path}")
except Exception as e:
logging.error(f"Failed to capture console for VM {vm.uuid}: {e}")

@pytest.fixture(scope='session', autouse=True)
def collect_logs_on_session_failure(request, hosts, session_log_dir):
"""
Collect xen-bugtool logs from hosts at the end of the test session if any tests failed.
Only collects from hosts that were actually used by failed tests.
"""
global FAILED_TESTS_HOSTS, SESSION_HAS_FAILURES

yield # Let all tests run first

# Check if log collection is enabled
collect_logs = request.config.getoption("--collect-logs-on-failure", default=True)
if not collect_logs:
return

if not SESSION_HAS_FAILURES and not FAILED_TESTS_HOSTS:
return

# Get unique set of hosts that had failures
failed_hosts = set()
for test_hosts in FAILED_TESTS_HOSTS.values():
failed_hosts.update(test_hosts)

if not failed_hosts:
logging.debug("Tests failed but no hosts were tracked. Collecting logs from all configured hosts.")
failed_hosts = set(hosts)

logging.debug("Collecting logs from {len(failed_hosts)} host(s) due to test failures...")

# Use the session-wide log directory (already created by session_log_dir fixture)
# This ensures logs are saved alongside console captures
log_dir = session_log_dir
os.makedirs(log_dir, exist_ok=True)

for host in failed_hosts:
try:
logging.debug(f"Collecting logs from host {host.hostname_or_ip}...")

# Run xen-bugtool on the host
result = host.ssh(['xen-bugtool', '-y', '-s'], check=True)
filepath = result.strip()
assert filepath.endswith('.tar.bz2'), f"expect a .tar.bz2 archive: {filepath}"

filename = os.path.basename(filepath)

# Construct local filename with host identifier
local_filename = f"{host.hostname_or_ip.replace(':', '_')}_{filename}"
local_path = os.path.join(log_dir, local_filename)

# Download the archive from host
logging.debug(f"Downloading {filepath} to {local_path}...")
host.scp(filepath, local_path, local_dest=True, check=True)

# Download the timestamp file if it exists
local_timestamp_filename = f"{host.hostname_or_ip.replace(':', '_')}_pytest-timestamps.log"
local_timestamp_path = os.path.join(log_dir, local_timestamp_filename)
try:
host.scp(HOST_TIMESTAMPS_FILE, local_timestamp_path, local_dest=True, check=False)
logging.debug(f"Timestamp file downloaded to: {local_timestamp_path}")
except Exception as ts_e:
logging.debug(f"Could not download timestamp file (may not exist): {ts_e}")

# Clean up archive from host
logging.info(f"Cleaning up {filepath} from host...")
host.ssh(['rm', '-f', filepath])

except Exception as e:
logging.error(f"Failed to collect logs from host {host.hostname_or_ip}: {e}")

logging.info(f"Log collection complete. Logs saved to: {log_dir}")
9 changes: 9 additions & 0 deletions jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,15 @@
"nb_pools": 2,
"params": {},
"paths": ["tests/misc/test_pool.py"],
},
"no_auto": {
"description": "Tests for special purposes",
"requirements": [
"1 XCP-ng host >= 8.2"
],
"nb_pools": 1,
"params": {},
"paths": ["tests/misc/test_log_collection.py"],
}
}

Expand Down
2 changes: 2 additions & 0 deletions lib/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ def _ssh(hostname_or_ip, cmd, check, simple_output, suppress_fingerprint_warning
opts.append('-o "StrictHostKeyChecking no"')
opts.append('-o "LogLevel ERROR"')
opts.append('-o "UserKnownHostsFile /dev/null"')
if background:
opts.append('-f')

if isinstance(cmd, str):
command = cmd
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ dependencies = [
"pytest>=8.0.0",
"pytest-dependency",
"requests",
# For console capture
"websockets",
"Pillow",
"asyncvnc2",
]

[dependency-groups]
Expand Down
3 changes: 3 additions & 0 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ pluggy>=1.1.0
pytest>=8.0.0
pytest-dependency
requests
websockets
Pillow
asyncvnc2
Loading