Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
WAIT_TIMEOUT_SECONDS = 60 * 20 # Landscape takes a long time to deploy.


USE_HOST_JUJU_MODEL = os.getenv("LANDSCAPE_CHARM_USE_HOST_JUJU_MODEL", False)
USE_HOST_JUJU_MODEL = bool(os.getenv("LANDSCAPE_CHARM_USE_HOST_JUJU_MODEL"))
"""
If `True`, return a reference the current Juju model on the host instead of a temporary
model.
"""

USE_HOST_LBAAS_MODEL = os.getenv("LANDSCAPE_CHARM_USE_HOST_LBAAS_MODEL", False)
USE_HOST_LBAAS_MODEL = bool(os.getenv("LANDSCAPE_CHARM_USE_HOST_LBAAS_MODEL"))
"""
If `True`, use existing LBaaS model instead of creating a temporary one.
The model name should be set in `LBAAS_MODEL_NAME` environment variable.
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def wait_for_service(
last_exc: jubilant.CLIError | None = None
while time.monotonic() < deadline:
try:
juju.ssh(unit, f"systemctl is-active {service}.service")
juju.exec(f"systemctl is-active {service}.service", unit=unit)
return
except jubilant.CLIError as e:
last_exc = e
Expand Down
309 changes: 279 additions & 30 deletions tests/integration/test_bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import pytest

from charm import DEFAULT_SERVICES, LANDSCAPE_UBUNTU_INSTALLER_ATTACH, LEADER_SERVICES
from tests.integration.conftest import USE_HOST_JUJU_MODEL
from tests.integration.helpers import (
get_session,
has_legacy_pg,
Expand Down Expand Up @@ -203,21 +204,25 @@ def test_modern_database_relation(juju: jubilant.Juju, lbaas: jubilant.Juju):
status = juju.status()
initial_relations = set(status.apps["landscape-server"].relations)

if "db" in initial_relations:
juju.remove_relation("landscape-server:db", "postgresql:db-admin", force=True)
juju.wait(lambda status: not has_legacy_pg(juju), timeout=120)

juju.integrate("landscape-server:database", "postgresql:database")
try:
if "db" in initial_relations:
juju.remove_relation(
"landscape-server:db", "postgresql:db-admin", force=True
)
juju.wait(lambda status: not has_legacy_pg(juju), timeout=120)

elif "database" not in initial_relations:
juju.integrate("landscape-server:database", "postgresql:database")
juju.wait(lambda status: has_modern_pg(juju), timeout=120)
juju.integrate("landscape-server:database", "postgresql:database")
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After switching from the legacy db relation to database, this branch calls juju.integrate(...) but doesn’t wait for the modern relation to actually appear before reading juju.status() and asserting. This is inconsistent with the elif branch (which waits for has_modern_pg) and can lead to flaky failures if relation establishment is not immediate. Consider waiting for has_modern_pg(juju) (or jubilant.all_active) after the integrate in this branch as well.

Suggested change
juju.integrate("landscape-server:database", "postgresql:database")
juju.integrate("landscape-server:database", "postgresql:database")
juju.wait(lambda status: has_modern_pg(juju), timeout=120)

Copilot uses AI. Check for mistakes.
juju.wait(lambda status: has_modern_pg(juju), timeout=120)

relations = set(juju.status().apps["landscape-server"].relations)
elif "database" not in initial_relations:
juju.integrate("landscape-server:database", "postgresql:database")
juju.wait(lambda status: has_modern_pg(juju), timeout=120)

assert "database" in relations
relations = set(juju.status().apps["landscape-server"].relations)

restore_db_relations(juju, initial_relations)
assert "database" in relations
finally:
restore_db_relations(juju, initial_relations)


def test_legacy_db_relation(juju: jubilant.Juju, lbaas: jubilant.Juju):
Expand All @@ -230,22 +235,24 @@ def test_legacy_db_relation(juju: jubilant.Juju, lbaas: jubilant.Juju):
status = juju.status()
initial_relations = set(status.apps["landscape-server"].relations)

if "database" in initial_relations:
juju.remove_relation(
"landscape-server:database", "postgresql:database", force=True
)
juju.wait(lambda status: not has_modern_pg(juju), timeout=120)
juju.integrate("landscape-server:db", "postgresql:db-admin")

elif "db" not in initial_relations:
juju.integrate("landscape-server:db", "postgresql:db-admin")
juju.wait(lambda status: has_legacy_pg(juju), timeout=120)
try:
if "database" in initial_relations:
juju.remove_relation(
"landscape-server:database", "postgresql:database", force=True
)
juju.wait(lambda status: not has_modern_pg(juju), timeout=120)
juju.integrate("landscape-server:db", "postgresql:db-admin")
juju.wait(lambda status: has_legacy_pg(juju), timeout=120)

relations = set(juju.status().apps["landscape-server"].relations)
elif "db" not in initial_relations:
juju.integrate("landscape-server:db", "postgresql:db-admin")
juju.wait(lambda status: has_legacy_pg(juju), timeout=120)

assert "db" in relations
relations = set(juju.status().apps["landscape-server"].relations)
Comment on lines +240 to +251
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the branch that removes the modern database relation, the test immediately calls juju.integrate("landscape-server:db", ...) and then proceeds to read status/assert without waiting for has_legacy_pg(juju) (unlike the elif branch). This can be racey/flaky if the relation doesn’t show up in juju.status() immediately. Consider adding a wait for has_legacy_pg(juju) after the integrate in this branch too.

Copilot uses AI. Check for mistakes.

restore_db_relations(juju, initial_relations)
assert "db" in relations
finally:
restore_db_relations(juju, initial_relations)


def test_pgbouncer_relation(juju: jubilant.Juju, bundle: None):
Expand Down Expand Up @@ -301,12 +308,12 @@ def test_landscape_schema_migrated(juju: jubilant.Juju, bundle: None):
host, port = stores["host"].split(":")
password, user, dbname = stores["password"], stores["user"], stores["main"]

result = juju.ssh(
"landscape-server/leader",
result = juju.exec(
f"PGPASSWORD={password} psql -h {host} -p {port} -U {user} -d {dbname}"
' -tAc "SELECT COUNT(*) FROM information_schema.tables'
" WHERE table_schema = 'public' AND table_name = 'account';\"",
).strip()
unit="landscape-server/leader",
).stdout.strip()

assert result == "1", (
"Expected the 'account' table to exist in the landscape database, "
Expand Down Expand Up @@ -401,10 +408,10 @@ def test_ubuntu_installer_attach_toggle_no_maintenance(
assert status.apps["landscape-server"].app_status.current == "active"

for name in status.apps["landscape-server"].units.keys():
with pytest.raises(Exception):
juju.ssh(
name,
with pytest.raises(jubilant.CLIError):
juju.exec(
f"systemctl is-active {LANDSCAPE_UBUNTU_INSTALLER_ATTACH}.service",
unit=name,
)

finally:
Expand Down Expand Up @@ -763,6 +770,11 @@ def test_upgrade_action_updates_ppa(juju: jubilant.Juju, bundle: None):
sources before upgrading, so switching PPAs (ex. upgrade from self-hosted-24.04 to
self-hosted-beta) works correctly.
"""
if USE_HOST_JUJU_MODEL:
pytest.skip(
"test_upgrade_action_updates_ppa mutates PPA state and is not safe "
"to run against a live model. Run in a dedicated upgrade pipeline instead."
)
juju.wait(jubilant.all_active, timeout=300)

landscape_ppa = juju.config("landscape-server").get(
Expand Down Expand Up @@ -799,3 +811,240 @@ def test_upgrade_action_updates_ppa(juju: jubilant.Juju, bundle: None):
juju.run(unit_name, "upgrade")
juju.run(unit_name, "resume")
juju.wait(jubilant.all_active, timeout=300)


# ---------------------------------------------------------------------------
# Service placement and port tests
# ---------------------------------------------------------------------------

_ALL_UNIT_PORTS = [8080, 8070, 8090, 9080] # appserver, pingserver, message-server, api
_LEADER_ONLY_PORTS = [9100, 9099] # package-upload, package-search


def test_leader_services_not_on_non_leaders(
juju: jubilant.Juju, lbaas: jubilant.Juju
):
"""
LEADER_SERVICES (package-search, package-upload) must NOT be active on
non-leader units. test_all_services_up only asserts the positive side;
this asserts the negative to catch misconfigured multi-unit deployments.
"""
status = juju.status()
units = status.apps["landscape-server"].units

if len(units) <= 1:
pytest.skip("Need more than 1 unit to verify non-leader service placement")

juju.wait(jubilant.all_active, timeout=300)

for name, unit_status in units.items():
if not unit_status.leader:
for service in LEADER_SERVICES:
with pytest.raises(jubilant.CLIError):
juju.exec(
f"systemctl is-active {service}.service",
unit=name,
)


def test_service_ports_listening(juju: jubilant.Juju, lbaas: jubilant.Juju):
"""
Verify that services are listening on the expected ports on the expected units.

All-unit services (appserver:8080, pingserver:8070, message-server:8090,
api:9080) must be open on every unit. Leader-only services
(package-upload:9100, package-search:9099) must be open only on the leader.

This guards against HAProxy charm config changes silently dropping backend
ports, which previously nearly brought down prod.
"""
juju.wait(jubilant.all_active, timeout=300)

status = juju.status()
units = status.apps["landscape-server"].units

for name, unit_status in units.items():
for port in _ALL_UNIT_PORTS:
output = juju.exec(f"ss -tlnH 'sport = :{port}'", unit=name)
assert str(port) in output, (
f"Expected port {port} to be listening on {name}, got: {output!r}"
)

for port in _LEADER_ONLY_PORTS:
output = juju.exec(f"ss -tlnH 'sport = :{port}'", unit=name)
if unit_status.leader:
assert str(port) in output, (
f"Expected leader-only port {port} on leader {name}, "
f"got: {output!r}"
)
else:
assert str(port) not in output, (
f"Leader-only port {port} must not be open on non-leader {name}"
)


# ---------------------------------------------------------------------------
# Destructive tests — skipped on live models
# ---------------------------------------------------------------------------


def test_leadership_change_service_follows(
juju: jubilant.Juju, lbaas: jubilant.Juju
):
"""
When the current leader unit is removed, a new leader is elected and
LEADER_SERVICES must start on that new leader while DEFAULT_SERVICES remain
active across all surviving units.

Skipped on live models because it removes a unit temporarily.
"""
if USE_HOST_JUJU_MODEL:
pytest.skip(
"test_leadership_change_service_follows removes a unit; "
"skipped on live model."
)

status = juju.status()
units = status.apps["landscape-server"].units

if len(units) <= 1:
pytest.skip("Need more than 1 unit to test leadership change")

juju.wait(jubilant.all_active, timeout=300)

leader_name = next(n for n, u in units.items() if u.leader)

try:
juju.cli("remove-unit", leader_name, "--no-wait")

def new_leader_active(status):
app_units = status.apps["landscape-server"].units
return any(
u.leader and u.workload_status.current == "active"
for u in app_units.values()
)

juju.wait(new_leader_active, timeout=600)

new_status = juju.status()
new_leader = next(
n
for n, u in new_status.apps["landscape-server"].units.items()
if u.leader
)

for service in LEADER_SERVICES:
wait_for_service(juju, new_leader, service)

for name in new_status.apps["landscape-server"].units:
for service in DEFAULT_SERVICES:
wait_for_service(juju, name, service)
finally:
juju.cli("add-unit", "landscape-server", "-n", "1")
juju.wait(jubilant.all_active, timeout=600)


def test_scale_out_and_in(juju: jubilant.Juju, lbaas: jubilant.Juju):
"""
Adding a unit brings it to active with all DEFAULT_SERVICES running and
LEADER_SERVICES absent. Removing it leaves the model healthy.

Skipped on live models.
"""
if USE_HOST_JUJU_MODEL:
pytest.skip("test_scale_out_and_in adds/removes units; skipped on live model.")

juju.wait(jubilant.all_active, timeout=300)

original_count = len(juju.status().apps["landscape-server"].units)

try:
juju.cli("add-unit", "landscape-server", "-n", "1")
juju.wait(jubilant.all_active, timeout=600)

status = juju.status()
units = status.apps["landscape-server"].units
assert len(units) == original_count + 1, (
"Expected one extra unit after scale-out"
)

new_unit = next(
n for n, u in units.items() if not u.leader and n not in {
nu for nu in units if units[nu].leader
}
)

for service in DEFAULT_SERVICES:
wait_for_service(juju, new_unit, service)

for service in LEADER_SERVICES:
with pytest.raises(jubilant.CLIError):
juju.exec(f"systemctl is-active {service}.service", unit=new_unit)

juju.cli("remove-unit", new_unit, "--no-wait")
juju.wait(jubilant.all_active, timeout=600)

assert (
len(juju.status().apps["landscape-server"].units) == original_count
), "Unit count did not return to original after scale-in"
finally:
current = len(juju.status().apps["landscape-server"].units)
if current != original_count:
juju.cli(
"remove-unit",
sorted(juju.status().apps["landscape-server"].units)[-1],
"--no-wait",
)
juju.wait(jubilant.all_active, timeout=600)


def test_postgresql_failover_services_recover(
juju: jubilant.Juju, lbaas: jubilant.Juju
):
"""
After a PostgreSQL leader failover, landscape-server services must recover
automatically without manual intervention and continue serving HTTP.

Skipped on live models; intended for a dedicated failover pipeline.
"""
if USE_HOST_JUJU_MODEL:
pytest.skip(
"test_postgresql_failover_services_recover triggers a pg failover; "
"skipped on live model. Run in a dedicated failover pipeline."
)

pg_status = juju.status()
pg_apps = [a for a in pg_status.apps if a.startswith("postgresql")]
if not pg_apps:
pytest.skip("No postgresql app found in model")

pg_app = pg_apps[0]
pg_units = pg_status.apps[pg_app].units
pg_leader = next(
(n for n, u in pg_units.items() if u.leader), None
)
if pg_leader is None or len(pg_units) <= 1:
pytest.skip("Need a multi-unit postgresql with a leader to test failover")

juju.wait(jubilant.all_active, timeout=300)

haproxy_ip = _haproxy_ip(juju, lbaas)

try:
juju.exec("sudo systemctl stop postgresql", unit=pg_leader)

juju.wait(jubilant.all_active, timeout=600)

status = juju.status()
for name in status.apps["landscape-server"].units:
for service in DEFAULT_SERVICES:
wait_for_service(juju, name, service)

session = get_session()
response = session.get(f"https://{haproxy_ip}/ping", verify=False, timeout=30)
assert response.status_code == 200, (
f"appserver /ping returned {response.status_code} after pg failover"
)
finally:
juju.exec("sudo systemctl start postgresql", unit=pg_leader)
juju.wait(jubilant.all_active, timeout=600)
Loading