diff --git a/rollingops/pyproject.toml b/rollingops/pyproject.toml
index bd096196b..701b0684b 100644
--- a/rollingops/pyproject.toml
+++ b/rollingops/pyproject.toml
@@ -19,7 +19,9 @@ dependencies = [
     "charmlibs-interfaces-tls-certificates>=1.8.1",
     "charmlibs-pathops>=1.2.1",
     "dpcharmlibs-interfaces==1.0.0",
-    "tenacity"
+    "tenacity",
+    "pydantic>=2.12.5",
+    "shortuuid>=1.0.13",
 ]
 
 [dependency-groups]
diff --git a/rollingops/src/charmlibs/rollingops/__init__.py b/rollingops/src/charmlibs/rollingops/__init__.py
index 453742a80..181969351 100644
--- a/rollingops/src/charmlibs/rollingops/__init__.py
+++ b/rollingops/src/charmlibs/rollingops/__init__.py
@@ -14,17 +14,45 @@
 
 """The charmlibs.rollingops package."""
 
-from ._base_manager import RollingOpsManager
-from ._models import (
-    OperationResult,
+from ._rollingops_manager import RollingOpsManager
+from ._version import __version__ as __version__
+from .common._exceptions import (
+    RollingOpsDecodingError,
+    RollingOpsEtcdctlError,
     RollingOpsEtcdNotConfiguredError,
+    RollingOpsFileSystemError,
     RollingOpsInvalidLockRequestError,
+    RollingOpsInvalidSecretContentError,
+    RollingOpsLibMissingError,
+    RollingOpsNoRelationError,
+    RollingOpsSyncLockError,
+)
+from .common._models import (
+    Operation,
+    OperationQueue,
+    OperationResult,
+    ProcessingBackend,
+    RollingOpsState,
+    RollingOpsStatus,
+    SyncLockBackend,
 )
-from ._version import __version__ as __version__
 
 __all__ = (
+    'Operation',
+    'OperationQueue',
     'OperationResult',
+    'ProcessingBackend',
+    'RollingOpsDecodingError',
     'RollingOpsEtcdNotConfiguredError',
+    'RollingOpsEtcdctlError',
+    'RollingOpsFileSystemError',
     'RollingOpsInvalidLockRequestError',
+    'RollingOpsInvalidSecretContentError',
+    'RollingOpsLibMissingError',
     'RollingOpsManager',
+    'RollingOpsNoRelationError',
+    'RollingOpsState',
+    'RollingOpsStatus',
+    'RollingOpsSyncLockError',
+    'SyncLockBackend',
 )
diff --git a/rollingops/src/charmlibs/rollingops/_base_manager.py b/rollingops/src/charmlibs/rollingops/_base_manager.py
deleted file mode 100644
index 3ac9d50e1..000000000
--- a/rollingops/src/charmlibs/rollingops/_base_manager.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright 2026 Canonical Ltd.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""etcd rolling ops. Spawns and manages the external rolling-ops worker process."""
-
-import logging
-from typing import Any
-
-from ops import CharmBase, Object
-from ops.framework import EventBase
-
-from charmlibs.rollingops._manager import EtcdRollingOpsManager
-from charmlibs.rollingops._peer_manager import PeerRollingOpsManager
-
-logger = logging.getLogger(__name__)
-
-
-class RollingOpsLockGrantedEvent(EventBase):
-    """Custom event emitted when the background worker grants the lock."""
-
-
-class RollingOpsManager(Object):
-    def __init__(
-        self,
-        charm: CharmBase,
-        peer_relation_name: str,
-        etcd_relation_name: str,
-        cluster_id: str,
-        callback_targets: dict[str, Any],
-    ):
-        super().__init__(charm, 'rolling-ops-manager')
-
-        self.charm = charm
-        self.peer_relation_name = peer_relation_name
-        self.etcd_relation_name = etcd_relation_name
-        charm.on.define_event('rollingops_lock_granted', RollingOpsLockGrantedEvent)
-
-        self.peer_manager = PeerRollingOpsManager(
-            charm=charm,
-            relation_name=peer_relation_name,
-            callback_targets=callback_targets,
-        )
-        self.etcd_manager = EtcdRollingOpsManager(
-            charm=charm,
-            peer_relation_name=peer_relation_name,
-            etcd_relation_name=etcd_relation_name,
-            cluster_id=cluster_id,
-            callback_targets=callback_targets,
-        )
-
-        self.framework.observe(charm.on.rollingops_lock_granted, self._on_rollingops_lock_granted)
-
-    def _has_relation(self, relation_name: str) -> bool:
-        return self.model.get_relation(relation_name) is not None
-
-    def _get_active_manager(self) -> Any:
-        has_etcd = self._has_relation(self.etcd_relation_name)
-        has_peer = self._has_relation(self.peer_relation_name)
-
-        if has_etcd:
-            return self.etcd_manager
-
-        if has_peer:
-            return self.peer_manager
-
-        raise RuntimeError('No active rollingops relation found.')
-
-    def request_async_lock(
-        self, callback_id: str, kwargs: dict[str, Any] | None = None, max_retry: int | None = None
-    ) -> None:
-        manager = self._get_active_manager()
-        return manager.request_async_lock(
-            callback_id=callback_id, kwargs=kwargs, max_retry=max_retry
-        )
-
-    def _on_rollingops_lock_granted(self, event: RollingOpsLockGrantedEvent) -> None:
-        """Handler of the custom hook rollingops_lock_granted.
-
-        The custom hook is triggered by a background process.
-        """
-        manager = self._get_active_manager()
-        manager._on_rollingops_lock_granted(event)
diff --git a/rollingops/src/charmlibs/rollingops/_etcd_rollingops.py b/rollingops/src/charmlibs/rollingops/_etcd_rollingops.py
deleted file mode 100644
index 816d7659d..000000000
--- a/rollingops/src/charmlibs/rollingops/_etcd_rollingops.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright 2026 Canonical Ltd.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import argparse
-import subprocess
-import time
-
-
-def main():
-    """Juju hook event dispatcher."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--run-cmd', required=True)
-    parser.add_argument('--unit-name', required=True)
-    parser.add_argument('--charm-dir', required=True)
-    parser.add_argument('--owner', required=True)
-    args = parser.parse_args()
-
-    time.sleep(10)
-
-    dispatch_sub_cmd = (
-        f'JUJU_DISPATCH_PATH=hooks/rollingops_lock_granted {args.charm_dir}/dispatch'
-    )
-    res = subprocess.run([args.run_cmd, '-u', args.unit_name, dispatch_sub_cmd])
-    res.check_returncode()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/rollingops/src/charmlibs/rollingops/_manager.py b/rollingops/src/charmlibs/rollingops/_manager.py
deleted file mode 100644
index 6497db79f..000000000
--- a/rollingops/src/charmlibs/rollingops/_manager.py
+++ /dev/null
@@ -1,201 +0,0 @@
-# Copyright 2026 Canonical Ltd.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from typing import Any
-
-from ops import Relation
-from ops.charm import (
-    CharmBase,
-    RelationBrokenEvent,
-    RelationCreatedEvent,
-    RelationDepartedEvent,
-)
-from ops.framework import EventBase, Object
-
-from charmlibs.rollingops import _etcdctl as etcdctl
-from charmlibs.rollingops._models import (
-    RollingOpsEtcdNotConfiguredError,
-    RollingOpsInvalidLockRequestError,
-    RollingOpsKeys,
-    RollingOpsNoEtcdRelationError,
-)
-from charmlibs.rollingops._relations import EtcdRequiresV1, SharedClientCertificateManager
-from charmlibs.rollingops._worker import EtcdRollingOpsAsyncWorker
-
-logger = logging.getLogger(__name__)
-
-
-class EtcdRollingOpsManager(Object):
-    """Rolling ops manager for clusters."""
-
-    def __init__(
-        self,
-        charm: CharmBase,
-        peer_relation_name: str,
-        etcd_relation_name: str,
-        cluster_id: str,
-        callback_targets: dict[str, Any],
-    ):
-        """Register our custom events.
-
-        params:
-            charm: the charm we are attaching this to.
-            peer_relation_name: peer relation used for rolling ops.
-            etcd_relation_name: the relation to integrate with etcd.
-            cluster_id: unique identifier for the cluster
-            callback_targets: mapping from callback_id -> callable.
-        """
-        super().__init__(charm, 'etcd-rolling-ops-manager')
-        self._charm = charm
-        self.peer_relation_name = peer_relation_name
-        self.etcd_relation_name = etcd_relation_name
-        self.callback_targets = callback_targets
-        self.charm_dir = charm.charm_dir
-
-        owner = f'{self.model.uuid}-{self.model.unit.name}'.replace('/', '-')
-        self.worker = EtcdRollingOpsAsyncWorker(
-            charm, peer_relation_name=peer_relation_name, owner=owner
-        )
-        self.keys = RollingOpsKeys.for_owner(cluster_id, owner)
-
-        self.shared_certificates = SharedClientCertificateManager(
-            charm,
-            peer_relation_name=peer_relation_name,
-        )
-
-        self.etcd = EtcdRequiresV1(
-            charm,
-            relation_name=etcd_relation_name,
-            cluster_id=self.keys.cluster_prefix,
-            shared_certificates=self.shared_certificates,
-        )
-
-        self.framework.observe(
-            charm.on[self.peer_relation_name].relation_departed, self._on_peer_relation_departed
-        )
-        self.framework.observe(
-            charm.on[self.etcd_relation_name].relation_broken, self._on_etcd_relation_broken
-        )
-        self.framework.observe(
-            charm.on[self.etcd_relation_name].relation_created, self._on_etcd_relation_created
-        )
-
-    @property
-    def _peer_relation(self) -> Relation | None:
-        """Return the peer relation for this charm."""
-        return self.model.get_relation(self.peer_relation_name)
-
-    @property
-    def _etcd_relation(self) -> Relation | None:
-        """Return the etcd relation for this charm."""
-        return self.model.get_relation(self.etcd_relation_name)
-
-    def _on_etcd_relation_created(self, event: RelationCreatedEvent) -> None:
-        """Check whether the snap-provided etcdctl command is available."""
-        if not etcdctl.is_etcdctl_installed():
-            logger.error('%s is not installed', etcdctl.ETCDCTL_CMD)
-            # TODO: fallback to peer relation implementation.
-
-    def _on_rollingops_lock_granted(self, event: EventBase) -> None:
-        """Handle the event when a rolling operation lock is granted.
-
-        If etcd is not yet configured, the operation is skipped.
-        """
-        if not self._peer_relation or not self._etcd_relation:
-            # TODO: handle this case. Fallback to peer relation.
-            return
-        try:
-            etcdctl.ensure_initialized()
-        except RollingOpsEtcdNotConfiguredError:
-            # TODO: handle this case. Fallback to peer relation.
-            return
-        logger.info('Received a rolling-op lock granted event.')
-        self._on_run_with_lock()
-
-    def _on_peer_relation_departed(self, event: RelationDepartedEvent) -> None:
-        """Handle a unit departing from the peer relation.
-
-        If the current unit is the one departing, stop the etcd worker
-        process to ensure a clean shutdown.
-        """
-        unit = event.departing_unit
-        if unit == self.model.unit:
-            self.worker.stop()
-
-    def _on_etcd_relation_broken(self, event: RelationBrokenEvent) -> None:
-        """Handle the etcd relation being fully removed.
-
-        This method stops the etcd worker process since the required
-        relation is no longer available.
-        """
-        self.worker.stop()
-
-    def request_async_lock(
-        self,
-        callback_id: str,
-        kwargs: dict[str, Any] | None = None,
-        max_retry: int | None = None,
-    ) -> None:
-        """This is a dummy function.
-
-        Here we spawn a new process that will trigger a Juju hook.
-        This function will be completely remade in the next PR.
-
-        Args:
-            callback_id: Identifier of the registered callback to execute when
-                the lock is granted.
-            kwargs: Optional keyword arguments passed to the callback when
-                executed. Must be JSON-serializable.
-            max_retry: Maximum number of retries for the operation.
-                - None: retry indefinitely
-                - 0: do not retry on failure
-
-        Raises:
-            RollingOpsInvalidLockRequestError: If the callback_id is not registered or
-                invalid parameters were provided.
-            RollingOpsNoEtcdRelationError: if the etcd relation does not exist
-            RollingOpsEtcdNotConfiguredError: if etcd client has not been configured yet
-            PebbleConnectionError: if the remote container cannot be reached.
-            RollingOpsCharmLibMissingError: if the charm libs cannot be found.
-        """
-        if callback_id not in self.callback_targets:
-            raise RollingOpsInvalidLockRequestError(f'Unknown callback_id: {callback_id}')
-
-        if not self._etcd_relation:
-            raise RollingOpsNoEtcdRelationError
-
-        etcdctl.ensure_initialized()
-
-        # TODO: implement actual lock request
-
-        self.worker.start()
-
-    def _on_run_with_lock(self) -> None:
-        """This is a dummy function.
-
-        Here we try to reach etcd from each unit.
-        This function will be completely remade in the next PR.
-        """
-        # TODO: implement the actual execution under lock
-        etcdctl.run('put', self.keys.lock_key, self.keys.owner)
-
-        result = etcdctl.run('get', self.keys.lock_key, '--print-value-only')
-
-        if result is None:
-            logger.error('Unexpected response from etcd.')
-            return
-
-        callback = self.callback_targets.get('_restart', '')
-        callback(delay=1)
diff --git a/rollingops/src/charmlibs/rollingops/_peer_models.py b/rollingops/src/charmlibs/rollingops/_peer_models.py
deleted file mode 100644
index b81ff4b3a..000000000
--- a/rollingops/src/charmlibs/rollingops/_peer_models.py
+++ /dev/null
@@ -1,521 +0,0 @@
-# Copyright 2026 Canonical Ltd.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""peer rolling ops models."""
-
-import json
-import logging
-from collections.abc import Iterator
-from dataclasses import dataclass, field
-from datetime import UTC, datetime
-from enum import StrEnum
-from typing import Any
-
-from ops import Model, RelationDataContent, Unit
-
-logger = logging.getLogger(__name__)
-
-
-def _now_timestamp_str() -> str:
-    """UTC timestamp as a string using ISO 8601 format."""
-    return datetime.now(UTC).isoformat()
-
-
-def _now_timestamp() -> datetime:
-    """UTC timestamp."""
-    return datetime.now(UTC)
-
-
-def _parse_timestamp(timestamp: str) -> datetime | None:
-    """Parse timestamp string. Return None on errors to avoid selecting invalid timestamps."""
-    try:
-        return datetime.fromisoformat(timestamp)
-    except Exception:
-        return None
-
-
-class RollingOpsNoRelationError(Exception):
-    """Raised if we are trying to process a lock, but do not appear to have a relation yet."""
-
-
-class RollingOpsDecodingError(Exception):
-    """Raised if the content of the databag cannot be processed."""
-
-
-class RollingOpsInvalidLockRequestError(Exception):
-    """Raised if the lock request is invalid."""
-
-
-@dataclass
-class Operation:
-    """A single queued operation."""
-
-    callback_id: str
-    requested_at: datetime
-    max_retry: int | None
-    attempt: int
-    kwargs: dict[str, Any] = field(default_factory=dict[str, Any])
-
-    @classmethod
-    def _validate_fields(
-        cls, callback_id: Any, kwargs: Any, requested_at: Any, max_retry: Any, attempt: Any
-    ) -> None:
-        """Validate the class attributes."""
-        if not isinstance(callback_id, str) or not callback_id.strip():
-            raise ValueError('callback_id must be a non-empty string')
-
-        if not isinstance(kwargs, dict):
-            raise ValueError('kwargs must be a dict')
-        try:
-            json.dumps(kwargs)
-        except TypeError as e:
-            raise ValueError(f'kwargs must be JSON-serializable: {e}') from e
-
-        if not isinstance(requested_at, datetime):
-            raise ValueError('requested_at must be a datetime')
-
-        if max_retry is not None:
-            if not isinstance(max_retry, int):
-                raise ValueError('max_retry must be an int')
-            if max_retry < 0:
-                raise ValueError('max_retry must be >= 0')
-
-        if not isinstance(attempt, int):
-            raise ValueError('attempt must be an int')
-        if attempt < 0:
-            raise ValueError('attempt must be >= 0')
-
-    def __post_init__(self) -> None:
-        """Validate the class attributes."""
-        self._validate_fields(
-            self.callback_id,
-            self.kwargs,
-            self.requested_at,
-            self.max_retry,
-            self.attempt,
-        )
-
-    @classmethod
-    def create(
-        cls,
-        callback_id: str,
-        kwargs: dict[str, Any],
-        max_retry: int | None = None,
-    ) -> 'Operation':
-        """Create a new operation from a callback id and kwargs."""
-        return cls(
-            callback_id=callback_id,
-            kwargs=kwargs,
-            requested_at=_now_timestamp(),
-            max_retry=max_retry,
-            attempt=0,
-        )
-
-    def _to_dict(self) -> dict[str, str]:
-        """Dict form (string-only values)."""
-        return {
-            'callback_id': self.callback_id,
-            'kwargs': self._kwargs_to_json(),
-            'requested_at': self.requested_at.isoformat(),
-            'max_retry': '' if self.max_retry is None else str(self.max_retry),
-            'attempt': str(self.attempt),
-        }
-
-    def to_string(self) -> str:
-        """Serialize to a string suitable for a Juju databag."""
-        return json.dumps(self._to_dict(), separators=(',', ':'))
-
-    def increase_attempt(self) -> None:
-        """Increment the attempt counter."""
-        self.attempt += 1
-
-    def is_max_retry_reached(self) -> bool:
-        """Return True if attempt exceeds max_retry (unless max_retry is None)."""
-        if self.max_retry is None:
-            return False
-        return self.attempt > self.max_retry
-
-    @classmethod
-    def from_string(cls, data: str) -> 'Operation':
-        """Deserialize from a Juju databag string.
-
-        Raises:
-            RollingOpsDecodingError: if data cannot be deserialized.
-        """
-        try:
-            obj = json.loads(data)
-
-            return cls(
-                callback_id=obj['callback_id'],
-                requested_at=_parse_timestamp(obj['requested_at']),  # type: ignore[reportArgumentType]
-                max_retry=int(obj['max_retry']) if obj.get('max_retry') else None,
-                attempt=int(obj['attempt']),
-                kwargs=json.loads(obj['kwargs']) if obj.get('kwargs') else {},
-            )
-
-        except (json.JSONDecodeError, KeyError, TypeError, ValueError) as e:
-            logger.error('Failed to deserialize Operation from %s: %s', data, e)
-            raise RollingOpsDecodingError(
-                'Failed to deserialize data to create an Operation'
-            ) from e
-
-    def _kwargs_to_json(self) -> str:
-        """Deterministic JSON serialization for kwargs."""
-        return json.dumps(self.kwargs, sort_keys=True, separators=(',', ':'))
-
-    def __eq__(self, other: object) -> bool:
-        """Equal for the operation."""
-        if not isinstance(other, Operation):
-            return False
-        return self.callback_id == other.callback_id and self.kwargs == other.kwargs
-
-    def __hash__(self) -> int:
-        """Hash for the operation."""
-        return hash((self.callback_id, self._kwargs_to_json()))
-
-
-class OperationQueue:
-    """In-memory FIFO queue of Operations with encode/decode helpers for storing in a databag."""
-
-    def __init__(self, operations: list[Operation] | None = None):
-        self.operations: list[Operation] = list(operations or [])
-
-    def __len__(self) -> int:
-        """Return the number of operations in the queue."""
-        return len(self.operations)
-
-    @property
-    def empty(self) -> bool:
-        """Return True if there are no queued operations."""
-        return not self.operations
-
-    def peek(self) -> Operation | None:
-        """Return the first operation in the queue if it exists."""
-        return self.operations[0] if self.operations else None
-
-    def _peek_last(self) -> Operation | None:
-        """Return the last operation in the queue if it exists."""
-        return self.operations[-1] if self.operations else None
-
-    def dequeue(self) -> Operation | None:
-        """Drop the first operation in the queue if it exists and return it."""
-        return self.operations.pop(0) if self.operations else None
-
-    def increase_attempt(self) -> None:
-        """Increment the attempt counter for the head operation and persist it."""
-        if self.empty:
-            return
-        self.operations[0].increase_attempt()
-
-    def enqueue_lock_request(
-        self, callback_id: str, kwargs: dict[str, Any], max_retry: int | None = None
-    ) -> None:
-        """Append operation only if it is not equal to the last enqueued operation."""
-        operation = Operation.create(callback_id, kwargs, max_retry=max_retry)
-
-        last_operation = self._peek_last()
-        if last_operation is not None and last_operation == operation:
-            return
-        self.operations.append(operation)
-
-    def to_string(self) -> str:
-        """Encode entire queue to a single string."""
-        items = [op.to_string() for op in self.operations]
-        return json.dumps(items, separators=(',', ':'))
-
-    @classmethod
-    def from_string(cls, data: str) -> 'OperationQueue':
-        """Decode queue from a string.
-
-        Raises:
-            RollingOpsDecodingError: if data cannot be deserialized.
-        """
-        if not data:
-            return cls()
-
-        try:
-            items = json.loads(data)
-        except json.JSONDecodeError as e:
-            logger.error(
-                'Failed to deserialize data to create an OperationQueue from %s: %s', data, e
-            )
-            raise RollingOpsDecodingError(
-                'Failed to deserialize data to create an OperationQueue.'
-            ) from e
-        if not isinstance(items, list) or not all(isinstance(s, str) for s in items):  # type: ignore[reportUnknownVariableType]
-            raise RollingOpsDecodingError(
-                'OperationQueue string must decode to a JSON list of strings.'
-            )
-
-        operations = [Operation.from_string(s) for s in items]  # type: ignore[reportUnknownVariableType]
-        return cls(operations)
-
-
-class LockIntent(StrEnum):
-    """Unit-level lock intents stored in unit databags."""
-
-    REQUEST = 'request'
-    RETRY_RELEASE = 'retry-release'
-    RETRY_HOLD = 'retry-hold'
-    IDLE = 'idle'
-
-
-class OperationResult(StrEnum):
-    """Callback return values."""
-
-    RELEASE = 'release'
-    RETRY_RELEASE = 'retry-release'
-    RETRY_HOLD = 'retry-hold'
-
-
-class Lock:
-    """State machine view over peer relation databags for a single unit.
-
-    This class is the only component that should directly read/write the peer relation
-    databags for lock state, queue state, and grant state.
-
-    Important:
-      - All relation databag values are strings.
-      - This class updates both unit databags and app databags, which triggers
-        relation-changed events.
-    """
-
-    def __init__(self, model: Model, relation_name: str, unit: Unit):
-        if not model.get_relation(relation_name):
-            # TODO: defer caller in this case (probably just fired too soon).
-            raise RollingOpsNoRelationError()
-        self.relation = model.get_relation(relation_name)
-        self.unit = unit
-        self.app = model.app
-
-    @property
-    def _app_data(self) -> RelationDataContent:
-        return self.relation.data[self.app]  # type: ignore[reportOptionalMemberAccess]
-
-    @property
-    def _unit_data(self) -> RelationDataContent:
-        return self.relation.data[self.unit]  # type: ignore[reportOptionalMemberAccess]
-
-    @property
-    def _operations(self) -> OperationQueue:
-        return OperationQueue.from_string(self._unit_data.get('operations', ''))
-
-    @property
-    def _state(self) -> str:
-        return self._unit_data.get('state', '')
-
-    def request(
-        self, callback_id: str, kwargs: dict[str, Any], max_retry: int | None = None
-    ) -> None:
-        """Enqueue an operation and mark this unit as requesting the lock.
-
-        Args:
-          callback_id: identifies which callback to execute.
-          kwargs: dict of callback kwargs.
-          max_retry: None -> unlimited retries, else explicit integer.
-        """
-        queue = self._operations
-
-        previous_length = len(queue)
-        queue.enqueue_lock_request(callback_id, kwargs, max_retry)
-        if previous_length == len(queue):
-            logger.info(
-                'Operation %s not added to the queue. It already exists in the back of the queue.',
-                callback_id,
-            )
-            return
-
-        if len(queue) == 1:
-            self._unit_data.update({'state': LockIntent.REQUEST})
-
-        self._unit_data.update({'operations': queue.to_string()})
-        logger.info('Operation %s added to the queue.', callback_id)
-
-    def _set_retry(self, intent: LockIntent) -> None:
-        """Mark the given retry intent on the head operation.
-
-        If max_retry is reached, the head operation is dropped via complete().
-        """
-        self._increase_attempt()
-        if self._is_max_retry_reached():
-            logger.warning('Operation max retry reached. Dropping.')
-            self.complete()
-            return
-        self._unit_data.update({
-            'executed_at': _now_timestamp_str(),
-            'state': intent,
-        })
-
-    def retry_release(self) -> None:
-        """Indicate that the operation should be retried but the lock should be released."""
-        self._set_retry(LockIntent.RETRY_RELEASE)
-
-    def retry_hold(self) -> None:
-        """Indicate that the operation should be retried but the lock should be kept."""
-        self._set_retry(LockIntent.RETRY_HOLD)
-
-    def complete(self) -> None:
-        """Mark the head operation as completed successfully, pop it from the queue.
-
-        Update unit state depending on whether more operations remain.
-        """
-        queue = self._operations
-        queue.dequeue()
-        next_state = LockIntent.REQUEST if queue.peek() else LockIntent.IDLE
-
-        self._unit_data.update({
-            'state': next_state,
-            'operations': queue.to_string(),
-            'executed_at': _now_timestamp_str(),
-        })
-
-    def release(self) -> None:
-        """Clear the application-level grant."""
-        self._app_data.update({'granted_unit': '', 'granted_at': ''})
-
-    def grant(self) -> None:
-        """Grant a lock to a unit."""
-        self._app_data.update({
-            'granted_unit': str(self.unit.name),
-            'granted_at': _now_timestamp_str(),
-        })
-
-    def is_granted(self) -> bool:
-        """Return True if the unit holds the lock."""
-        granted_unit = self._app_data.get('granted_unit', '')
-        return granted_unit == str(self.unit.name)
-
-    def should_run(self) -> bool:
-        """Return True if the lock has been granted to the unit and it is time to run."""
-        return self.is_granted() and not self._unit_executed_after_grant()
-
-    def should_release(self) -> bool:
-        """Return True if the unit finished executing the callback and should be released."""
-        return self.is_completed() or self._unit_executed_after_grant()
-
-    def is_waiting(self) -> bool:
-        """Return True if this unit is waiting for a lock to be granted."""
-        return self._state == LockIntent.REQUEST and not self.is_granted()
-
-    def is_completed(self) -> bool:
-        """Return True if this unit is completed callback but still has the grant.
-
-        Transitional state in which the unit is waiting for the leader to release the lock.
-        """
-        return self._state == LockIntent.IDLE and self.is_granted()
-
-    def is_retry(self) -> bool:
-        """Return True if this unit requested retry but still has the grant.
-
-        Transitional state in which the unit is waiting for the leader to release the lock.
-        """
-        unit_intent = self._state
-        return (
-            unit_intent == LockIntent.RETRY_RELEASE or unit_intent == LockIntent.RETRY_HOLD
-        ) and self.is_granted()
-
-    def is_waiting_retry(self) -> bool:
-        """Return True if the unit requested retry and is waiting for lock to be granted."""
-        return self._state == LockIntent.RETRY_RELEASE and not self.is_granted()
-
-    def is_retry_hold(self) -> bool:
-        """Return True if the unit requested retry and wants to keep the lock."""
-        return self._state == LockIntent.RETRY_HOLD and not self.is_granted()
-
-    def get_current_operation(self) -> Operation | None:
-        """Return the head operation for this unit, if any."""
-        return self._operations.peek()
-
-    def _is_max_retry_reached(self) -> bool:
-        """Return True if the head operation exceeded its max_retry (unless max_retry is None)."""
-        if not (operation := self.get_current_operation()):
-            return True
-        return operation.is_max_retry_reached()
-
-    def _increase_attempt(self) -> None:
-        """Increment the attempt counter for the head operation and persist it."""
-        q = self._operations
-        q.increase_attempt()
-        self._unit_data.update({'operations': q.to_string()})
-
-    def get_last_completed(self) -> datetime | None:
-        """Get the time the unit requested a retry of the head operation."""
-        if timestamp_str := self._unit_data.get('executed_at', ''):
-            return _parse_timestamp(timestamp_str)
-        return None
-
-    def get_requested_at(self) -> datetime | None:
-        """Get the time the head operation was requested at."""
-        if not (operation := self.get_current_operation()):
-            return None
-        return operation.requested_at
-
-    def _unit_executed_after_grant(self) -> bool:
-        """Returns True if the unit executed its callback after the lock was granted."""
-        granted_at = _parse_timestamp(self._app_data.get('granted_at', ''))
-        executed_at = _parse_timestamp(self._unit_data.get('executed_at', ''))
-
-        if granted_at is None or executed_at is None:
-            return False
-        return executed_at > granted_at
-
-
-def pick_oldest_completed(locks: list[Lock]) -> Lock | None:
-    """Choose the retry lock with the oldest executed_at timestamp."""
-    selected = None
-    oldest_timestamp = None
-
-    for lock in locks:
-        timestamp = lock.get_last_completed()
-        if not timestamp:
-            continue
-
-        if oldest_timestamp is None or timestamp < oldest_timestamp:
-            oldest_timestamp = timestamp
-            selected = lock
-
-    return selected
-
-
-def pick_oldest_request(locks: list[Lock]) -> Lock | None:
-    """Choose the lock with the oldest head operation."""
-    selected = None
-    oldest_request = None
-
-    for lock in locks:
-        timestamp = lock.get_requested_at()
-        if not timestamp:
-            continue
-
-        if oldest_request is None or timestamp < oldest_request:
-            oldest_request = timestamp
-            selected = lock
-
-    return selected
-
-
-class LockIterator:
-    """Iterator over Lock objects for each unit present on the peer relation."""
-
-    def __init__(self, model: Model, relation_name: str):
-        relation = model.relations[relation_name][0]
-        units = relation.units
-        units.add(model.unit)
-        self._model = model
-        self._units = units
-        self._relation_name = relation_name
-
-    def __iter__(self) -> Iterator[Lock]:
-        """Yields a lock for each unit we can find on the relation."""
-        for unit in self._units:
-            yield Lock(self._model, self._relation_name, unit=unit)
diff --git a/rollingops/src/charmlibs/rollingops/_peer_worker.py b/rollingops/src/charmlibs/rollingops/_peer_worker.py
deleted file mode 100644
index 4a7a54860..000000000
--- a/rollingops/src/charmlibs/rollingops/_peer_worker.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright 2026 Canonical Ltd.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""etcd rolling ops. Spawns and manages the external rolling-ops worker process."""
-
-import logging
-import os
-import signal
-import subprocess
-from pathlib import Path
-from sys import version_info
-
-from ops import Relation, RelationDataContent
-from ops.charm import (
-    CharmBase,
-)
-from ops.framework import Object
-
-logger = logging.getLogger(__name__)
-
-
-class PeerRollingOpsAsyncWorker(Object):
-    """Spawns and manages the external rolling-ops worker process."""
-
-    def __init__(self, charm: CharmBase, relation_name: str):
-        super().__init__(charm, 'peer-rollingops-async-worker')
-        self._charm = charm
-        self._peers_name = relation_name
-        self._run_cmd = '/usr/bin/juju-exec'
-        self._charm_dir = charm.charm_dir
-
-    @property
-    def _relation(self) -> Relation | None:
-        """Returns the peer relation."""
-        return self._charm.model.get_relation(self._peers_name)
-
-    @property
-    def _app_data(self) -> RelationDataContent:
-        """Returns the application databag in the peer relation."""
-        return self._relation.data[self.model.app]  # type: ignore[reportOptionalMemberAccess]
-
-    def start(self) -> None:
-        """Start a new worker process."""
-        if self._relation is None:
-            return
-        self.stop()
-
-        # Remove JUJU_CONTEXT_ID so juju-run works from the spawned process
-        new_env = os.environ.copy()
-        new_env.pop('JUJU_CONTEXT_ID', None)
-
-        for loc in new_env.get('PYTHONPATH', '').split(':'):
-            path = Path(loc)
-            venv_path = (
-                path
-                / '..'
-                / 'venv'
-                / 'lib'
-                / f'python{version_info.major}.{version_info.minor}'
-                / 'site-packages'
-            )
-            if path.stem == 'lib':
-                new_env['PYTHONPATH'] = f'{venv_path.resolve()}:{new_env["PYTHONPATH"]}'
-                break
-
-        worker = (
-            self._charm_dir
-            / 'venv'
-            / 'lib'
-            / f'python{version_info.major}.{version_info.minor}'
-            / 'site-packages'
-            / 'charmlibs'
-            / 'rollingops'
-            / '_peer_rollingops.py'
-        )
-
-        # These files must stay open for the lifetime of the worker process.
-        log_out = open('/var/log/peer_rollingops_worker.log', 'a')  # noqa: SIM115
-        log_err = open('/var/log/peer_rollingops_worker.err', 'a')  # noqa: SIM115
-
-        pid = subprocess.Popen(
-            [
-                '/usr/bin/python3',
-                '-u',
-                str(worker),
-                '--run-cmd',
-                self._run_cmd,
-                '--unit-name',
-                self._charm.model.unit.name,
-                '--charm-dir',
-                str(self._charm_dir),
-            ],
-            cwd=str(self._charm_dir),
-            stdout=log_out,
-            stderr=log_err,
-            env=new_env,
-        ).pid
-
-        self._app_data.update({'rollingops-worker-pid': str(pid)})
-        logger.info('Started RollingOps worker process with PID %s', pid)
-
-    def stop(self) -> None:
-        """Stop the running worker process if it exists."""
-        if self._relation is None:
-            return
-
-        if not (pid_str := self._app_data.get('rollingops-worker-pid', '')):
-            return
-
-        pid = int(pid_str)
-        try:
-            os.kill(pid, signal.SIGINT)
-            logger.info('Stopped RollingOps worker process PID %s', pid)
-        except OSError:
-            logger.info('Failed to stop RollingOps worker process PID %s', pid)
-
-        self._app_data.update({'rollingops-worker-pid': ''})
diff --git a/rollingops/src/charmlibs/rollingops/_rollingops_manager.py b/rollingops/src/charmlibs/rollingops/_rollingops_manager.py
new file mode 100644
index 000000000..dbd34d1a7
--- /dev/null
+++ b/rollingops/src/charmlibs/rollingops/_rollingops_manager.py
@@ -0,0 +1,465 @@
+# Copyright 2026 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Common rolling-ops interface coordinating etcd-backed and peer-backed execution."""
+
+import logging
+from contextlib import contextmanager
+from typing import Any
+
+from ops import CharmBase, Object, Relation, RelationBrokenEvent
+from ops.framework import EventBase
+
+from charmlibs.rollingops.common._exceptions import (
+    RollingOpsDecodingError,
+    RollingOpsInvalidLockRequestError,
+    RollingOpsNoRelationError,
+    RollingOpsSyncLockError,
+)
+from charmlibs.rollingops.common._models import (
+    Operation,
+    OperationQueue,
+    ProcessingBackend,
+    RollingOpsState,
+    RollingOpsStatus,
+    RunWithLockStatus,
+    SyncLockBackend,
+    UnitBackendState,
+)
+from charmlibs.rollingops.common._utils import ETCD_FAILED_HOOK_NAME, LOCK_GRANTED_HOOK_NAME
+from charmlibs.rollingops.etcd._backend import EtcdRollingOpsBackend
+from charmlibs.rollingops.peer._backend import PeerRollingOpsBackend
+from charmlibs.rollingops.peer._models import PeerUnitOperations
+
+logger = logging.getLogger(__name__)
+
+
+class RollingOpsLockGrantedEvent(EventBase):
+    """Custom event emitted when the background worker grants the lock."""
+
+
+class RollingOpsEtcdFailedEvent(EventBase):
+    """Custom event emitted when the etcd worker hits a fatal error."""
+
+
+class RollingOpsManager(Object):
+    """Coordinate rolling operations across etcd and peer backends.
+
+    This object exposes a common API for queuing asynchronous rolling
+    operations and acquiring synchronous locks. It prefers etcd when
+    available, mirrors operation state into the peer relation, and falls
+    back to peer-based processing when etcd becomes unavailable or
+    inconsistent.
+    """
+
+    def __init__(
+        self,
+        charm: CharmBase,
+        peer_relation_name: str,
+        etcd_relation_name: str,
+        cluster_id: str,
+        callback_targets: dict[str, Any],
+        sync_lock_targets: dict[str, type[SyncLockBackend]] | None = None,
+    ):
+        """Create a rolling operations manager with etcd and peer backends.
+
+        This manager coordinates rolling operations across two backends:
+
+        - an etcd-backed backend, used when etcd is available
+        - a peer-relation-backed backend, used as a fallback
+
+        Operations are always persisted in the peer backend. When etcd is
+        available, operations are also mirrored to etcd and processed there.
+        If etcd becomes unavailable or unhealthy, this manager falls back to
+        the peer backend and continues processing from the mirrored state.
+
+        Args:
+            charm: The charm instance owning this manager.
+            peer_relation_name: Name of the peer relation used for fallback
+                state and operation mirroring.
+            etcd_relation_name: Name of the relation providing etcd access.
+            cluster_id: Identifier used to scope etcd-backed state for this
+                rolling-ops instance.
+            callback_targets: Mapping of callback identifiers to callables
+                executed when queued operations are granted the lock.
+            sync_lock_targets: Optional mapping of sync lock backend
+                identifiers to backend implementations used when acquiring
+                synchronous locks through the peer fallback path.
+        """
+        super().__init__(charm, 'rolling-ops-manager')
+
+        self.charm = charm
+        self.peer_relation_name = peer_relation_name
+        self.etcd_relation_name = etcd_relation_name
+        self._sync_lock_targets = sync_lock_targets or {}
+        charm.on.define_event(LOCK_GRANTED_HOOK_NAME, RollingOpsLockGrantedEvent)
+        charm.on.define_event(ETCD_FAILED_HOOK_NAME, RollingOpsEtcdFailedEvent)
+
+        self.peer_backend = PeerRollingOpsBackend(
+            charm=charm,
+            relation_name=peer_relation_name,
+            callback_targets=callback_targets,
+        )
+        self.etcd_backend = EtcdRollingOpsBackend(
+            charm=charm,
+            peer_relation_name=peer_relation_name,
+            etcd_relation_name=etcd_relation_name,
+            cluster_id=cluster_id,
+            callback_targets=callback_targets,
+        )
+        self.framework.observe(
+            charm.on[self.etcd_relation_name].relation_broken, self._on_etcd_relation_broken
+        )
+        self.framework.observe(charm.on.rollingops_lock_granted, self._on_rollingops_lock_granted)
+        self.framework.observe(charm.on.rollingops_etcd_failed, self._on_rollingops_etcd_failed)
+        self.framework.observe(charm.on.update_status, self._on_update_status)
+
+    @property
+    def _peer_relation(self) -> Relation | None:
+        """Return the peer relation for this charm."""
+        return self.model.get_relation(self.peer_relation_name)
+
+    @property
+    def _backend_state(self) -> UnitBackendState:
+        """Return the backend selection state stored for the current unit.
+
+        This state determines whether the current unit is managed by the etcd
+        backend or the peer backend, and is used to control fallback and
+        recovery decisions.
+        """
+        return UnitBackendState(self.model, self.peer_relation_name, self.model.unit)
+
+    def _on_etcd_relation_broken(self, event: RelationBrokenEvent) -> None:
+        """Handle the etcd relation being fully removed.
+
+        This method stops the etcd worker process since the required
+        relation is no longer available.
+        """
+        self._fallback_current_unit_to_peer()
+
+    def _select_processing_backend(self) -> ProcessingBackend:
+        """Choose which backend should handle new operations for this unit.
+
+        Etcd is preferred when available, but a unit that has fallen back to
+        peer remains peer-managed until its pending peer work is drained.
+        This ensures backend transitions happen only from a clean state.
+
+        Returns:
+            The selected processing backend.
+        """
+        if not self.etcd_backend.is_available():
+            logger.info('etcd backend unavailable; selecting peer backend.')
+            return ProcessingBackend.PEER
+
+        if self._backend_state.is_peer_managed() and not self.peer_backend.has_pending_work():
+            logger.info('etcd backend is available. Switching to etcd backend.')
+            return ProcessingBackend.ETCD
+
+        if self._backend_state.is_etcd_managed():
+            logger.info('etcd backend selected.')
+            return ProcessingBackend.ETCD
+
+        logger.info('peer backend selected.')
+        return ProcessingBackend.PEER
+
+    def _fallback_current_unit_to_peer(self) -> None:
+        """Move the current unit to the peer backend and resume processing there.
+
+        This method marks the unit as peer-managed, stops the etcd worker,
+        and ensures that peer-based processing is running.
+
+        It is used when etcd becomes unavailable, unhealthy, or inconsistent,
+        so that queued operations can continue without being lost.
+        """
+        self._backend_state.fallback_to_peer()
+        self.etcd_backend.worker.stop()
+        self.peer_backend.ensure_processing()
+
+    def request_async_lock(
+        self,
+        callback_id: str,
+        kwargs: dict[str, Any] | None = None,
+        max_retry: int | None = None,
+    ) -> None:
+        """Queue a rolling operation and trigger processing on the active backend.
+
+        A new operation is created and always persisted in the peer backend.
+        If etcd is currently selected as the processing backend, the operation
+        is also mirrored to etcd and processing is triggered there.
+
+        If persisting to etcd fails, the manager falls back to peer-based
+        processing. This guarantees that operations remain schedulable even
+        when etcd is unavailable.
+
+        Args:
+            callback_id: Identifier of the callback to execute when the
+                operation is granted the rolling lock.
+            kwargs: Optional keyword arguments passed to the callback target.
+            max_retry: Optional maximum number of retries allowed for the
+                operation. None means infinte retries.
+
+        Raises:
+            RollingOpsInvalidLockRequestError: If the callback identifier is
+                unknown, the operation cannot be created, or it cannot be
+                persisted in the peer backend.
+            RollingOpsNoRelationError: If the peer relation is not available.
+        """
+        if callback_id not in self.peer_backend.callback_targets:
+            raise RollingOpsInvalidLockRequestError(f'Unknown callback_id: {callback_id}')
+
+        if not self._peer_relation:
+            raise RollingOpsNoRelationError('No %s peer relation yet.', self.peer_relation_name)
+
+        if kwargs is None:
+            kwargs = {}
+
+        backend = self._select_processing_backend()
+
+        try:
+            operation = Operation.create(callback_id, kwargs, max_retry)
+        except (RollingOpsDecodingError, ValueError) as e:
+            logger.error('Failed to create operation: %s', e)
+            raise RollingOpsInvalidLockRequestError('Failed to create the lock request') from e
+
+        try:
+            self.peer_backend.enqueue_operation(operation)
+        except (RollingOpsDecodingError, ValueError) as e:
+            logger.error('Failed to persists operation in peer backend: %s', e)
+            raise RollingOpsInvalidLockRequestError(
+                'Failed to persists operation in peer backend.'
+            ) from e
+
+        if backend == ProcessingBackend.ETCD:
+            try:
+                self.etcd_backend.enqueue_operation(operation)
+            except Exception as e:
+                logger.warning(
+                    'Failed to persist operation in etcd backend; falling back to peer: %s',
+                    e,
+                )
+                backend = ProcessingBackend.PEER
+
+        if backend == ProcessingBackend.ETCD:
+            self.etcd_backend.ensure_processing()
+        else:
+            self._fallback_current_unit_to_peer()
+
+    def _on_rollingops_lock_granted(self, event: RollingOpsLockGrantedEvent) -> None:
+        """Handle a granted rolling lock and dispatch execution to the active backend.
+
+        If the current unit is peer-managed, the operation is executed through
+        the peer backend.
+
+        If the current unit is etcd-managed, the operation is executed through
+        the etcd backend.
+        """
+        if self._backend_state.is_peer_managed():
+            logger.info('Executing rollingop on peer backend.')
+            self.peer_backend._on_rollingops_lock_granted(event)
+            return
+        self._run_etcd_and_mirror_or_fallback()
+
+    def _run_etcd_and_mirror_or_fallback(self) -> None:
+        """Run the etcd execution path and mirror its outcome to peer.
+
+        On successful execution, the result is mirrored back
+        to the peer relation so that peer state remains consistent and can be
+        used for fallback.
+
+        If etcd execution fails or mirrored state becomes inconsistent, the
+        manager falls back to the peer backend and resumes processing there.
+        """
+        try:
+            logger.info('Executing rollingop on etcd backend.')
+            outcome = self.etcd_backend._on_run_with_lock()
+        except Exception as e:
+            logger.warning(
+                'etcd backend failed while handling rollingops_lock_granted; '
+                'falling back to peer: %s',
+                e,
+            )
+            self._fallback_current_unit_to_peer()
+            return
+
+        try:
+            self.peer_backend.mirror_outcome(outcome)
+        except RollingOpsDecodingError:
+            logger.info(
+                'Inconsistencies found between peer relation and etcd. '
+                'Falling back to peer backend.'
+            )
+            self._fallback_current_unit_to_peer()
+            return
+        logger.info('Execution mirrored to peer relation.')
+        if outcome.status == RunWithLockStatus.EXECUTED_NOT_COMMITTED:
+            self._fallback_current_unit_to_peer()
+            logger.info('Fell back to peer backend.')
+
+    def _on_rollingops_etcd_failed(self, event: RollingOpsEtcdFailedEvent) -> None:
+        """Fall back to peer when the etcd worker reports a fatal failure."""
+        logger.warning('Received %s.', ETCD_FAILED_HOOK_NAME)
+        if self._backend_state.is_etcd_managed():
+            # No need to stop the background process. This hook means that it stopped.
+            self._backend_state.fallback_to_peer()
+            self.peer_backend.ensure_processing()
+            logger.info('Fell back to peer backend.')
+
+    def _get_sync_lock_backend(self, backend_id: str) -> SyncLockBackend:
+        """Instantiate the configured peer sync lock backend.
+
+        Args:
+            backend_id: Identifier of the configured sync lock backend.
+
+        Returns:
+            A new sync lock backend instance.
+
+        Raises:
+            RollingOpsSyncLockError: If no backend is registered for
+                the given identifier.
+        """
+        backend_cls = self._sync_lock_targets.get(backend_id, None)
+        if backend_cls is None:
+            raise RollingOpsSyncLockError(f'Unknown sync lock backend: {backend_id}.')
+
+        return backend_cls()
+
+    @contextmanager
+    def acquire_sync_lock(self, backend_id: str, timeout: int):
+        """Acquire a synchronous lock, using etcd when available and peer as fallback.
+
+        This context manager first attempts to acquire the lock through the
+        etcd backend. If etcd is available and the lock is acquired, the
+        protected block is executed under the etcd lock.
+
+        If etcd fails due to an operational error, the manager falls back to
+        the configured peer sync lock backend identified by `backend_id`.
+        If etcd acquisition times out, the timeout is propagated and no
+        fallback occurs.
+
+        On context exit, the acquired lock is released through the backend
+        that granted it.
+
+        Args:
+            backend_id: Identifier of the peer sync lock backend to use if
+                etcd acquisition cannot be used.
+            timeout: Maximum time in seconds to wait for lock acquisition.
+                None means infinite time.
+
+        Yields:
+            None. The protected code runs while the lock is held.
+
+        Raises:
+            TimeoutError: If lock acquisition through etcd or the peer backend
+                times out.
+            RollingOpsSyncLockError: if there is an error when acquiring the lock.
+        """
+        if self.etcd_backend.is_available():
+            logger.info('Acquiring sync lock on etcd.')
+            try:
+                self.etcd_backend.acquire_sync_lock(timeout)
+                yield
+                return
+            except TimeoutError:
+                raise
+            except Exception as e:
+                # etcd is not reachable or unhealthy
+                logger.exception(
+                    'Failed to request etcd sync lock; falling back to peer: %s',
+                    e,
+                )
+            finally:
+                try:
+                    self.etcd_backend.release_sync_lock()
+                    logger.info('etcd lock released.')
+                except Exception as e:
+                    logger.exception('Failed to release sync lock: %s', e)
+
+        backend = self._get_sync_lock_backend(backend_id)
+        logger.info('Acquiring sync lock backend %s.', backend_id)
+        try:
+            backend.acquire(timeout=timeout)
+        except Exception as e:
+            raise RollingOpsSyncLockError(
+                f'Failed to acquire sync lock backend {backend_id}'
+            ) from e
+
+        try:
+            yield
+        finally:
+            try:
+                backend.release()
+                logger.info('Sync lock backend %s released.', backend_id)
+            except Exception as e:
+                raise RollingOpsSyncLockError(
+                    f'Failed to release sync lock backend {backend_id}'
+                ) from e
+
+    @property
+    def state(self) -> RollingOpsState:
+        """Return the current rolling-ops state for this unit.
+
+        The returned state is always based on the peer relation for the
+        operation queue, since peer state is the durable fallback source of
+        truth.
+
+        Status is taken from the etcd backend when this unit is currently
+        etcd-managed. If status retrieval from etcd fails, the unit falls
+        back to the peer backend and peer status is returned instead.
+
+        Returns:
+            A snapshot of the current rolling-ops status, backend selection,
+            and queued operations for this unit.
+        """
+        if self._peer_relation is None:
+            return RollingOpsState(
+                status=RollingOpsStatus.UNAVAILABLE,
+                processing_backend=ProcessingBackend.PEER,
+                operations=OperationQueue(),
+            )
+
+        status = self.peer_backend.get_status()
+        if self._backend_state.is_etcd_managed():
+            status = self.etcd_backend.get_status()
+            if status == RollingOpsStatus.UNAVAILABLE:
+                logger.info('etcd backend is not available. Falling back to peer backend.')
+                self._fallback_current_unit_to_peer()
+                status = self.peer_backend.get_status()
+
+        operations = PeerUnitOperations(self.model, self.peer_relation_name, self.model.unit)
+        return RollingOpsState(
+            status=status,
+            processing_backend=self._backend_state.backend,
+            operations=operations.queue,
+        )
+
+    def _on_update_status(self, event: EventBase) -> None:
+        """Periodic reconciliation of rolling-ops state."""
+        logger.info('Received a update-status event.')
+        if self._backend_state.is_etcd_managed():
+            if not self.etcd_backend.is_available():
+                logger.warning('etcd unavailable during update_status; falling back.')
+                self._fallback_current_unit_to_peer()
+                return
+
+            if not self.etcd_backend.is_processing():
+                logger.warning(
+                    'etcd backend is selected but no worker process is running; falling back.'
+                )
+                self._fallback_current_unit_to_peer()
+                return
+
+            self._run_etcd_and_mirror_or_fallback()
+            return
+
+        self.peer_backend._on_rollingops_lock_granted(event)
diff --git a/rollingops/src/charmlibs/rollingops/_worker.py b/rollingops/src/charmlibs/rollingops/_worker.py
deleted file mode 100644
index 9b5430280..000000000
--- a/rollingops/src/charmlibs/rollingops/_worker.py
+++ /dev/null
@@ -1,173 +0,0 @@
-# Copyright 2026 Canonical Ltd.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""etcd rolling ops. Spawns and manages the external rolling-ops worker process."""
-
-import logging
-import os
-import signal
-import subprocess
-from sys import version_info
-
-from ops import Relation
-from ops.charm import CharmBase
-from ops.framework import Object
-
-from charmlibs import pathops
-from charmlibs.rollingops._models import RollingOpsCharmLibMissingError, with_pebble_retry
-
-logger = logging.getLogger(__name__)
-
-WORKER_PID_FIELD = 'etcd-rollingops-worker-pid'
-
-
-class EtcdRollingOpsAsyncWorker(Object):
-    """Spawns and manages the external rolling-ops worker process."""
-
-    def __init__(self, charm: CharmBase, peer_relation_name: str, owner: str):
-        super().__init__(charm, 'etcd-rollingops-async-worker')
-        self._charm = charm
-        self._peer_relation_name = peer_relation_name
-        self._run_cmd = '/usr/bin/juju-exec'
-        self._owner = owner
-        self._charm_dir = charm.charm_dir
-
-    @property
-    def _relation(self) -> Relation | None:
-        return self.model.get_relation(self._peer_relation_name)
-
-    def start(self) -> None:
-        """Start a new worker process.
-
-        Raises:
-            RollingOpsCharmLibMissingError: if the lib files cannot be found.
-        """
-        if self._relation is None:
-            return
-
-        if pid_str := self._relation.data[self.model.unit].get(WORKER_PID_FIELD):
-            try:
-                pid = int(pid_str)
-            except (ValueError, TypeError):
-                pid = None
-
-            if pid is not None and self._is_pid_alive(pid):
-                logger.info(
-                    'RollingOps worker already running with PID %s; not starting a new one.', pid
-                )
-                return
-
-        # Remove JUJU_CONTEXT_ID so juju-run works from the spawned process
-        new_env = os.environ.copy()
-        new_env.pop('JUJU_CONTEXT_ID', None)
-
-        venv_path = (
-            self._charm_dir
-            / 'venv'
-            / 'lib'
-            / f'python{version_info.major}.{version_info.minor}'
-            / 'site-packages'
-        )
-        if not with_pebble_retry(lambda: venv_path.exists()):
-            raise RollingOpsCharmLibMissingError(
-                f'Expected virtualenv site-packages not found: {venv_path}'
-            )
-
-        for loc in new_env.get('PYTHONPATH', '').split(':'):
-            path = pathops.LocalPath(loc)
-
-            if path.stem != 'lib':
-                continue
-            new_env['PYTHONPATH'] = f'{venv_path.resolve()}:{new_env["PYTHONPATH"]}'
-            break
-
-        worker = venv_path / 'charmlibs' / 'rollingops' / '_etcd_rollingops.py'
-        if not with_pebble_retry(lambda: worker.exists()):
-            raise RollingOpsCharmLibMissingError(f'Worker script not found: {worker}')
-
-        # These files must stay open for the lifetime of the worker process.
-        log_out = open('/var/log/etcd_rollingops_worker.log', 'a')  # noqa: SIM115
-        log_err = open('/var/log/etcd_rollingops_worker.err', 'a')  # noqa: SIM115
-
-        pid = subprocess.Popen(
-            [
-                '/usr/bin/python3',
-                '-u',
-                str(worker),
-                '--run-cmd',
-                self._run_cmd,
-                '--unit-name',
-                self.model.unit.name,
-                '--charm-dir',
-                str(self._charm_dir),
-                '--owner',
-                self._owner,
-            ],
-            cwd=str(self._charm_dir),
-            stdout=log_out,
-            stderr=log_err,
-            env=new_env,
-        ).pid
-
-        self._relation.data[self.model.unit].update({WORKER_PID_FIELD: str(pid)})
-        logger.info('Started etcd rollingops worker process with PID %s', pid)
-
-    def _is_pid_alive(self, pid: int) -> bool:
-        if pid <= 0:
-            return False
-        try:
-            os.kill(pid, 0)
-            return True
-        except ProcessLookupError:
-            return False
-        except PermissionError:
-            return True
-
-    def stop(self) -> None:
-        """Stop the running worker process if it exists."""
-        if self._relation is None:
-            return
-
-        pid_str = self._relation.data[self.model.unit].get(WORKER_PID_FIELD, '')
-
-        try:
-            pid = int(pid_str)
-        except (TypeError, ValueError):
-            logger.info('Missing PID or invalid PID found in the databag.')
-            self._relation.data[self.model.unit].update({WORKER_PID_FIELD: ''})
-            return
-
-        try:
-            os.kill(pid, signal.SIGTERM)
-            logger.info('Sent SIGTERM to etcd rollingops worker process PID %s.', pid)
-        except ProcessLookupError:
-            logger.info('Process PID %s is already gone.', pid)
-        except PermissionError:
-            logger.warning('No permission to stop etcd rollingops worker process PID %s.', pid)
-            return
-        except OSError:
-            logger.warning('SIGTERM failed for PID %s, attempting SIGKILL', pid)
-            try:
-                os.kill(pid, signal.SIGKILL)
-                logger.info('Sent SIGKILL to etcd rollingops worker process PID %s', pid)
-            except ProcessLookupError:
-                logger.info('Process PID %s exited before SIGKILL', pid)
-            except PermissionError:
-                logger.warning('No permission to SIGKILL process PID %s', pid)
-                return
-            except OSError:
-                logger.warning('Failed to SIGKILL process PID %s', pid)
-                return
-
-        self._relation.data[self.model.unit].update({WORKER_PID_FIELD: ''})
diff --git a/rollingops/src/charmlibs/rollingops/common/__init__.py b/rollingops/src/charmlibs/rollingops/common/__init__.py
new file mode 100644
index 000000000..33bb77934
--- /dev/null
+++ b/rollingops/src/charmlibs/rollingops/common/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2026 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Common code used by peer and etcd rolling ops."""
diff --git a/rollingops/src/charmlibs/rollingops/common/_base_worker.py b/rollingops/src/charmlibs/rollingops/common/_base_worker.py
new file mode 100644
index 000000000..c444b344d
--- /dev/null
+++ b/rollingops/src/charmlibs/rollingops/common/_base_worker.py
@@ -0,0 +1,279 @@
+# Copyright 2026 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Common class to manager background processes."""
+
+import logging
+import os
+import signal
+import subprocess
+from sys import version_info
+
+from ops import CharmBase, Object, Relation
+
+from charmlibs import pathops
+from charmlibs.rollingops.common._exceptions import RollingOpsLibMissingError
+from charmlibs.rollingops.common._utils import with_pebble_retry
+
+logger = logging.getLogger(__name__)
+
+
+class BaseRollingOpsAsyncWorker(Object):
+    """Base class for external rolling-ops worker processes.
+
+    This class provides the common lifecycle management for background
+    worker processes used by rolling-ops backends. It is responsible for:
+
+    - locating the worker script inside the charm virtualenv
+    - building the execution environment for the subprocess
+    - validating required files before startup
+    - starting and stopping the worker process
+    - persisting and retrieving the worker PID through backend-specific storage
+
+    Subclasses define where worker state is stored, how existing workers
+    should be handled, and which worker script and arguments should be used.
+    """
+
+    _pid_field: str
+    _log_filename: str
+
+    def __init__(self, charm: CharmBase, handle_name: str, peer_relation_name: str):
+        """Initialize the base rolling-ops worker helper.
+
+        Args:
+            charm: The charm instance managing the worker process.
+            handle_name: Framework handle name used for this worker object.
+            peer_relation_name: Name of the peer relation used by subclasses
+                to store and retrieve worker state.
+        """
+        super().__init__(charm, handle_name)
+        self._charm = charm
+        self._charm_dir = charm.charm_dir
+        self._peer_relation_name = peer_relation_name
+        self._handle_name = handle_name
+
+    @property
+    def _relation(self) -> Relation | None:
+        """Return the peer relation used for worker state."""
+        return self._charm.model.get_relation(self._peer_relation_name)
+
+    def _venv_site_packages(self) -> pathops.LocalPath:
+        """Return the site-packages path for the charm virtualenv.
+
+        This path is used to locate the rolling-ops worker scripts and ensure
+        the spawned subprocess can import charm library code.
+        """
+        return pathops.LocalPath(
+            self._charm_dir
+            / 'venv'
+            / 'lib'
+            / f'python{version_info.major}.{version_info.minor}'
+            / 'site-packages'
+        )
+
+    def _build_env(self) -> dict[str, str]:
+        """Build the environment used to spawn the worker subprocess.
+
+        The worker runs outside the current Juju hook context, so the Juju
+        context identifier is removed from the environment. The charm virtualenv
+        site-packages path is also prepended to ``PYTHONPATH`` so that the
+        worker can import charm libraries correctly.
+
+        Returns:
+            A copy of the current environment adjusted for the worker process.
+        """
+        new_env = os.environ.copy()
+        new_env.pop('JUJU_CONTEXT_ID', None)
+
+        venv_path = self._venv_site_packages()
+
+        for loc in new_env.get('PYTHONPATH', '').split(':'):
+            path = pathops.LocalPath(loc)
+
+            if path.stem != 'lib':
+                continue
+            new_env['PYTHONPATH'] = f'{venv_path.resolve()}:{new_env["PYTHONPATH"]}'
+            break
+        return new_env
+
+    def _worker_script_path(self) -> pathops.LocalPath:
+        """Return the worker script path."""
+        raise NotImplementedError
+
+    def _worker_args(self) -> list[str]:
+        """Return additional backend-specific command-line arguments.
+
+        Subclasses may override this to pass extra arguments required by the
+        worker process.
+
+        Returns:
+            A list of command-line arguments to append when starting the worker.
+        """
+        return []
+
+    @property
+    def _pid(self) -> int | None:
+        """Return the stored worker PID.
+
+        Returns:
+            The stored PID, None if no PID is stored.
+
+        Raises:
+            NotImplementedError: If not implemented by a subclass.
+        """
+        raise NotImplementedError
+
+    @_pid.setter
+    def _pid(self, value: int | None) -> None:
+        """Persist the worker PID string.
+
+        Args:
+            value: The PID string to persist. An empty string clears the stored PID.
+
+        Raises:
+            NotImplementedError: If not implemented by a subclass.
+        """
+        raise NotImplementedError
+
+    def _on_existing_worker(self, pid: int) -> bool:
+        """Handle case where a worker is already running.
+
+        Returns:
+            True if a new worker should be started,
+            False if start() should return early.
+        """
+        raise NotImplementedError
+
+    def _validate_startup_paths(self) -> None:
+        """Validate that the worker runtime files exist before startup.
+
+        This checks that the charm virtualenv site-packages directory exists
+        and that the backend-specific worker script is present.
+
+        Raises:
+            RollingOpsLibMissingError: If the virtualenv or worker script
+                cannot be found.
+        """
+        venv_path = self._venv_site_packages()
+        if not with_pebble_retry(lambda: venv_path.exists()):
+            raise RollingOpsLibMissingError(
+                f'Expected virtualenv site-packages not found: {venv_path}'
+            )
+
+        worker = self._worker_script_path()
+        if not with_pebble_retry(lambda: worker.exists()):
+            raise RollingOpsLibMissingError(f'Worker script not found: {worker}')
+
+    def _is_pid_alive(self, pid: int) -> bool:
+        """Return whether the given PID appears to be alive."""
+        if pid <= 0:
+            return False
+        try:
+            os.kill(pid, 0)
+            return True
+        except ProcessLookupError:
+            return False
+        except PermissionError:
+            return True
+
+    def start(self) -> None:
+        """Start the worker subprocess if one is not already running.
+
+        Raises:
+            RollingOpsLibMissingError: If the virtualenv or worker script
+                required to start the worker is missing.
+            OSError: If the worker subprocess cannot be started.
+        """
+        if self._relation is None:
+            logger.info('Peer relation does not exist. Worker cannot start.')
+            return
+        pid = self._pid
+        if pid is not None and self._is_pid_alive(pid) and not self._on_existing_worker(pid):
+            return
+
+        self._validate_startup_paths()
+
+        worker = self._worker_script_path()
+        env = self._build_env()
+
+        with open(f'{self._log_filename}', 'a') as log_out:
+            pid = subprocess.Popen(
+                [
+                    '/usr/bin/python3',
+                    '-u',
+                    str(worker),
+                    '--unit-name',
+                    self.model.unit.name,
+                    '--charm-dir',
+                    str(self._charm_dir),
+                    *self._worker_args(),
+                ],
+                cwd=str(self._charm_dir),
+                stdout=log_out,
+                stderr=log_out,
+                env=env,
+            ).pid
+
+        self._pid = pid
+        logger.info('Started %s process with PID %s', self._handle_name, pid)
+
+    def stop(self) -> None:
+        """Stop the running worker subprocess, if one is recorded.
+
+        This method reads the stored PID, sends ``SIGTERM`` to the process,
+        and falls back to ``SIGKILL`` if termination fails. If the process is
+        already gone or the stored PID is invalid, worker state is cleaned up.
+
+        The stored PID is cleared when the worker is successfully considered
+        stopped or no longer present.
+        """
+        if self._relation is None:
+            logger.info('Peer relation not found. Worker cannot be stopped.')
+            return
+
+        pid = self._pid
+        if pid is None or pid <= 0:
+            logger.info('Invalid PID found. Worker cannot be stopped.')
+            return
+
+        try:
+            os.kill(pid, signal.SIGTERM)
+            logger.info('Sent SIGTERM to rollingops worker process PID %s.', pid)
+        except ProcessLookupError:
+            logger.info('Process PID %s is already gone.', pid)
+        except PermissionError:
+            logger.warning('No permission to stop rollingops worker process PID %s.', pid)
+            return
+        except OSError:
+            logger.warning('SIGTERM failed for PID %s, attempting SIGKILL', pid)
+            try:
+                os.kill(pid, signal.SIGKILL)
+                logger.info('Sent SIGKILL to rollingops worker process PID %s', pid)
+            except ProcessLookupError:
+                logger.info('Process PID %s exited before SIGKILL', pid)
+            except PermissionError:
+                logger.warning('No permission to SIGKILL process PID %s', pid)
+                return
+            except OSError:
+                logger.warning('Failed to SIGKILL process PID %s', pid)
+                return
+
+        self._pid = None
+
+    def is_running(self) -> bool:
+        """Return whether the recorded worker process appears to be alive."""
+        pid = self._pid
+        if pid is None:
+            return False
+        return self._is_pid_alive(pid)
diff --git a/rollingops/src/charmlibs/rollingops/common/_exceptions.py b/rollingops/src/charmlibs/rollingops/common/_exceptions.py
new file mode 100644
index 000000000..9dd97c9d2
--- /dev/null
+++ b/rollingops/src/charmlibs/rollingops/common/_exceptions.py
@@ -0,0 +1,75 @@
+# Copyright 2026 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Exceptions used in rollingops."""
+
+
+class RollingOpsError(Exception):
+    """General rollingops error."""
+
+
+class RollingOpsNoRelationError(RollingOpsError):
+    """Raised if we are trying to process a lock, but do not appear to have a relation yet."""
+
+
+class RollingOpsNoEtcdRelationError(RollingOpsNoRelationError):
+    """Raised if we are trying to process a lock, but do not appear to have a relation yet."""
+
+
+class RollingOpsFileSystemError(RollingOpsError):
+    """Raised if there is a problem when interacting with the filesystem."""
+
+
+class RollingOpsInvalidLockRequestError(RollingOpsError):
+    """Raised if the lock request is invalid."""
+
+
+class RollingOpsDecodingError(RollingOpsError):
+    """Raised if json content cannot be processed."""
+
+
+class RollingOpsInvalidSecretContentError(RollingOpsError):
+    """Raised if the content of a secret is invalid."""
+
+
+class RollingOpsLibMissingError(RollingOpsError):
+    """Raised if the path to the libraries cannot be resolved."""
+
+
+class RollingOpsEtcdctlError(RollingOpsError):
+    """Base exception for etcdctl command failures."""
+
+
+class RollingOpsEtcdctlRetryableError(RollingOpsEtcdctlError):
+    """A transient etcdctl failure that may succeed on retry."""
+
+
+class RollingOpsEtcdNotConfiguredError(RollingOpsEtcdctlError):
+    """Raised if etcd client has not been configured yet (env file does not exist)."""
+
+
+class RollingOpsEtcdctlFatalError(RollingOpsEtcdctlError):
+    """A non-retryable etcdctl failure."""
+
+
+class RollingOpsEtcdctlParseError(RollingOpsEtcdctlError):
+    """Raised when etcdctl output cannot be parsed."""
+
+
+class RollingOpsSyncLockError(RollingOpsError):
+    """Raised when there is an error during sync lock execution."""
+
+
+class RollingOpsEtcdTransactionError(RollingOpsError):
+    """Raised when an etcd transaction fails."""
diff --git a/rollingops/src/charmlibs/rollingops/common/_models.py b/rollingops/src/charmlibs/rollingops/common/_models.py
new file mode 100644
index 000000000..7b15a1fd0
--- /dev/null
+++ b/rollingops/src/charmlibs/rollingops/common/_models.py
@@ -0,0 +1,499 @@
+# Copyright 2026 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Rolling ops common models."""
+
+import json
+import logging
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from datetime import datetime
+from enum import StrEnum
+from typing import Any
+
+from ops import Model, Unit
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    RootModel,
+    field_serializer,
+    field_validator,
+)
+
+from charmlibs.rollingops.common._exceptions import (
+    RollingOpsDecodingError,
+    RollingOpsNoRelationError,
+)
+from charmlibs.rollingops.common._utils import datetime_to_str, now_timestamp, parse_timestamp
+
+logger = logging.getLogger(__name__)
+
+
+class OperationResult(StrEnum):
+    """Result values returned by rolling-ops callbacks on async locks.
+
+    These values control how the rolling-ops manager updates the operation
+    state and whether the distributed lock is released or retained.
+
+    - RELEASE:
+        The operation completed successfully and no retry is required.
+        The lock is released and the next unit may be scheduled.
+
+    - RETRY_RELEASE:
+        The operation failed or timed out and should be retried later.
+        The operation is re-queued and the lock is released so that
+        other units may proceed before this operation is retried.
+
+    - RETRY_HOLD:
+        The operation failed or timed out and should be retried immediately.
+        The operation is re-queued and the lock is kept by the current
+        unit, allowing it to retry immediately.
+    """
+
+    RELEASE = 'release'
+    RETRY_RELEASE = 'retry-release'
+    RETRY_HOLD = 'retry-hold'
+
+
+class ProcessingBackend(StrEnum):
+    """Backend responsible for processing a unit's queue."""
+
+    PEER = 'peer'
+    ETCD = 'etcd'
+
+
+class RunWithLockStatus(StrEnum):
+    """Status of an attempt to execute an operation under a distributed lock.
+
+    These values describe what happened when a unit tried to run an
+    operation while interacting with the lock.
+    """
+
+    NOT_GRANTED = 'not_granted'
+    NO_OPERATION = 'no_operation'
+    MISSING_CALLBACK = 'missing_callback'
+    EXECUTED = 'executed'
+    EXECUTED_NOT_COMMITTED = 'executed_not_committed'
+
+
+class RollingOpsStatus(StrEnum):
+    """High-level rolling-ops status for a unit.
+
+    It reflects whether the unit is currently executing work, waiting
+    for execution, idle, or unable to participate.
+
+    States:
+
+    - UNAVAILABLE:
+        Rolling-ops cannot be used on this unit. This typically occurs when
+        required relations are missing or the selected backend is not reachable.
+            * peer backend: peer relation does not exist
+            * etcd backend: peer or etcd relation missing, or etcd not reachable
+
+    - WAITING:
+        The unit has pending operations but does not currently hold the lock.
+
+    - GRANTED:
+        The unit currently holds the lock and may execute operations.
+
+    - IDLE:
+        The unit has no pending operations and is not holding the lock.
+    """
+
+    UNAVAILABLE = 'unavailable'
+    WAITING = 'waiting'
+    GRANTED = 'granted'
+    IDLE = 'idle'
+
+
+@dataclass(frozen=True)
+class RunWithLockOutcome:
+    """Result of attempting to execute an operation under a distributed lock.
+
+    This object captures both whether an operation was executed and, if so,
+    the identity and result of that operation. It is used to propagate
+    execution outcomes across backends (e.g. etcd → peer mirroring).
+    """
+
+    status: RunWithLockStatus
+    op_id: str | None = None
+    result: OperationResult | None = None
+
+
+@dataclass
+class BackendState:
+    """Unit-scoped backend ownership and recovery state."""
+
+    processing_backend: str = ProcessingBackend.PEER
+    etcd_cleanup_needed: str = 'false'
+
+    @property
+    def cleanup_needed(self) -> bool:
+        """Return whether stale etcd state must be cleaned before reuse."""
+        return self.etcd_cleanup_needed == 'true'
+
+    @cleanup_needed.setter
+    def cleanup_needed(self, value: bool) -> None:
+        """Persist whether stale etcd state cleanup is required."""
+        self.etcd_cleanup_needed = 'true' if value else 'false'
+
+    @property
+    def backend(self) -> ProcessingBackend:
+        """Return which backend owns execution for this unit's queue."""
+        if not self.processing_backend:
+            return ProcessingBackend.PEER
+        return ProcessingBackend(self.processing_backend)
+
+    @backend.setter
+    def backend(self, value: ProcessingBackend) -> None:
+        """Persist the backend owner."""
+        self.processing_backend = value
+
+
+class UnitBackendState:
+    """Manage backend ownership and fallback state for one unit queue."""
+
+    def __init__(self, model: Model, relation_name: str, unit: Unit):
+        relation = model.get_relation(relation_name)
+        if relation is None:
+            raise RollingOpsNoRelationError()
+
+        self._relation = relation
+        self.unit = unit
+
+        self._backend_state = self._relation.load(BackendState, self.unit, decoder=lambda s: s)
+
+    def _save(self, data: BackendState) -> None:
+        self._relation.save(data, self.unit, encoder=str)
+
+    @property
+    def backend(self) -> ProcessingBackend:
+        """Return which backend owns execution for this unit's queue."""
+        return self._backend_state.backend
+
+    @property
+    def cleanup_needed(self) -> bool:
+        """Return whether etcd cleanup is required before etcd can be reused."""
+        return self._backend_state.cleanup_needed
+
+    def fallback_to_peer(self) -> None:
+        """Switch this unit's queue to peer processing and mark etcd cleanup needed."""
+        self._backend_state.backend = ProcessingBackend.PEER
+        self._backend_state.cleanup_needed = True
+        self._save(self._backend_state)
+
+    def clear_fallback(self) -> None:
+        """Clear the etcd cleanup-needed flag and set the backend to ETCD."""
+        self._backend_state.backend = ProcessingBackend.ETCD
+        self._backend_state.cleanup_needed = False
+        self._save(self._backend_state)
+
+    def is_peer_managed(self) -> bool:
+        """Return whether the peer backend should process this unit's queue."""
+        return self.backend == ProcessingBackend.PEER
+
+    def is_etcd_managed(self) -> bool:
+        """Return whether the etcd backend should process this unit's queue."""
+        return self.backend == ProcessingBackend.ETCD
+
+
+class Operation(BaseModel):
+    """A single queued operation."""
+
+    model_config = ConfigDict(use_enum_values=True)
+
+    callback_id: str
+    requested_at: datetime
+    max_retry: int | None = None
+    attempt: int = 0
+    result: OperationResult | None = None
+    kwargs: dict[str, Any] = Field(default_factory=dict)
+
+    @field_validator('callback_id')
+    @classmethod
+    def validate_callback_id(cls, value: str) -> str:
+        if not value.strip():
+            raise ValueError('callback_id must be a non-empty string')
+        return value
+
+    @field_validator('kwargs')
+    @classmethod
+    def validate_kwargs(cls, value: dict[str, Any]) -> dict[str, Any]:
+        try:
+            json.dumps(value)
+        except TypeError as e:
+            raise ValueError(f'kwargs must be JSON-serializable: {e}') from e
+        return value
+
+    @field_serializer('kwargs')
+    def serialize_kwargs(self, value: dict[str, Any]) -> dict[str, Any]:
+        """Ensure deterministic ordering of kwargs."""
+        return dict(sorted(value.items()))
+
+    @field_validator('max_retry')
+    @classmethod
+    def validate_max_retry(cls, value: int | None) -> int | None:
+        if value is not None and value < 0:
+            raise ValueError('max_retry must be >= 0')
+        return value
+
+    @field_validator('attempt')
+    @classmethod
+    def validate_attempt(cls, value: int) -> int:
+        if value < 0:
+            raise ValueError('attempt must be >= 0')
+        return value
+
+    @field_validator('requested_at', mode='before')
+    @classmethod
+    def validate_requested_at(cls, value: Any) -> Any:
+        if isinstance(value, str):
+            return parse_timestamp(value)
+        return value
+
+    @field_serializer('requested_at')
+    def serialize_requested_at(self, value: datetime) -> str:
+        return datetime_to_str(value)
+
+    @classmethod
+    def create(
+        cls,
+        callback_id: str,
+        kwargs: dict[str, Any],
+        max_retry: int | None = None,
+    ) -> 'Operation':
+        """Create a new operation from a callback id and kwargs."""
+        return cls(
+            callback_id=callback_id,
+            kwargs=kwargs,
+            requested_at=now_timestamp(),
+            max_retry=max_retry,
+            attempt=0,
+            result=None,
+        )
+
+    def to_string(self) -> str:
+        """Serialize to a single JSON object string."""
+        return self.model_dump_json()
+
+    @classmethod
+    def from_string(cls, data: str) -> 'Operation':
+        """Deserialize from a JSON string."""
+        try:
+            return cls.model_validate_json(data)
+        except Exception as e:
+            logger.error('Failed to deserialize Operation from %s: %s', data, e)
+            raise RollingOpsDecodingError(
+                'Failed to deserialize data to create an Operation'
+            ) from e
+
+    def increase_attempt(self) -> None:
+        """Increment the attempt counter."""
+        self.attempt += 1
+
+    def is_max_retry_reached(self) -> bool:
+        """Return True if attempt exceeds max_retry (unless max_retry is None)."""
+        if self.max_retry is None:
+            return False
+        return self.attempt > self.max_retry
+
+    def complete(self) -> None:
+        """Mark the operation as completed to indicate the lock should be released."""
+        self.increase_attempt()
+        self.result = OperationResult.RELEASE
+
+    def retry_release(self) -> None:
+        """Mark the operation to be retried later, releasing the lock.
+
+        If the maximum retry count is reached, the operation is marked as
+        ``RELEASE`` and will not be retried further.
+        """
+        self.increase_attempt()
+        if self.is_max_retry_reached():
+            logger.warning('Operation max retry reached. Dropping.')
+            self.result = OperationResult.RELEASE
+        else:
+            self.result = OperationResult.RETRY_RELEASE
+
+    def retry_hold(self) -> None:
+        """Mark the operation to be retried immediately, retaining the lock.
+
+        If the maximum retry count is reached, the operation is marked as
+        ``RELEASE`` and will not be retried further.
+        """
+        self.increase_attempt()
+        if self.is_max_retry_reached():
+            self.result = OperationResult.RELEASE
+            logger.warning('Operation max retry reached. Dropping.')
+        else:
+            self.result = OperationResult.RETRY_HOLD
+
+    @property
+    def op_id(self) -> str:
+        """Return the unique identifier for this operation."""
+        return f'{datetime_to_str(self.requested_at)}-{self.callback_id}'
+
+    def _kwargs_to_json(self) -> str:
+        """Deterministic JSON serialization for kwargs."""
+        return json.dumps(self.kwargs, sort_keys=True, separators=(',', ':'))
+
+    def __eq__(self, other: object) -> bool:
+        """Equal for the operation."""
+        if not isinstance(other, Operation):
+            return False
+        return self.callback_id == other.callback_id and self.kwargs == other.kwargs
+
+    def __hash__(self) -> int:
+        """Hash for the operation."""
+        return hash((self.callback_id, self._kwargs_to_json()))
+
+
+class OperationQueue(RootModel[list[Operation]]):
+    """In-memory FIFO queue of Operations with encode/decode helpers for storing in a databag."""
+
+    def __init__(self, operations: list[Operation] | None = None) -> None:
+        super().__init__(root=operations or [])  # pyright: ignore[reportUnknownMemberType]
+
+    @property
+    def operations(self) -> list[Operation]:
+        """Return the underlying list of operations."""
+        return self.root
+
+    def __len__(self) -> int:
+        """Return the number of operations in the queue."""
+        return len(self.root)
+
+    @property
+    def empty(self) -> bool:
+        """Return True if there are no queued operations."""
+        return not self.root
+
+    def peek(self) -> Operation | None:
+        """Return the first operation in the queue if it exists."""
+        return self.operations[0] if self.operations else None
+
+    def _peek_last(self) -> Operation | None:
+        """Return the last operation in the queue if it exists."""
+        return self.operations[-1] if self.operations else None
+
+    def dequeue(self) -> Operation | None:
+        """Drop the first operation in the queue if it exists and return it."""
+        return self.operations.pop(0) if self.operations else None
+
+    def increase_attempt(self) -> None:
+        """Increment the attempt counter for the head operation and persist it."""
+        if self.empty:
+            return
+        self.operations[0].increase_attempt()
+
+    def enqueue(self, operation: Operation) -> None:
+        """Append operation only if it is not equal to the tail operation."""
+        last_operation = self._peek_last()
+        if last_operation is not None and last_operation == operation:
+            return
+        self.operations.append(operation)
+
+    def to_string(self) -> str:
+        """Encode entire queue to a single JSON string."""
+        return self.model_dump_json()
+
+    @classmethod
+    def from_string(cls, data: str) -> 'OperationQueue':
+        """Decode a queue from a JSON string.
+
+        Args:
+            data: Serialized queue as a JSON array of operation objects.
+
+        Returns:
+            The decoded operation queue.
+
+        Raises:
+            RollingOpsDecodingError: If the queue cannot be deserialized.
+        """
+        if not data:
+            return cls([])
+
+        try:
+            return cls.model_validate_json(data)
+        except Exception as e:
+            logger.error(
+                'Failed to deserialize data to create an OperationQueue from %s: %s',
+                data,
+                e,
+            )
+            raise RollingOpsDecodingError(
+                'Failed to deserialize data to create an OperationQueue.'
+            ) from e
+
+
+@dataclass(frozen=True)
+class RollingOpsState:
+    """Snapshot of the rolling-ops state for a unit.
+
+    This object provides a view of the rolling-ops system from the perspective
+    of a single unit.
+
+    This state is intended for decision-making in charm logic
+
+    The `processing_backend` reflects the backend currently selected
+        for execution. It may change dynamically (e.g. fallback from etcd
+        to peer).
+    The `operations` queue always reflects the peer-backed state, which
+        acts as the source of truth and fallback mechanism.
+    When `status` is UNAVAILABLE, the unit cannot currently participate
+        in rolling operations due to missing relations or backend failures.
+
+    Attributes:
+        status: High-level rolling-ops status for the unit.
+        processing_backend: Backend currently responsible for executing
+            operations (e.g. ETCD or PEER).
+        operations: The unit's operation queue.
+    """
+
+    status: RollingOpsStatus
+    processing_backend: ProcessingBackend
+    operations: OperationQueue
+
+
+class SyncLockBackend(ABC):
+    """Interface for synchronous lock backends.
+
+    Implementations provide a mechanism to acquire and release a lock
+    protecting a critical section. These backends are used by the
+    RollingOpsManager to coordinate synchronous operations within a
+    single unit when etcd is not available.
+    """
+
+    @abstractmethod
+    def acquire(self, timeout: int | None) -> None:
+        """Acquire the lock, blocking until it is granted or timeout expires.
+
+        Args:
+            timeout: Maximum time in seconds to wait for the lock.
+                None means wait indefinitely.
+
+        Raises:
+            TimeoutError: If the lock could not be acquired within the timeout.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def release(self) -> None:
+        """Release the lock.
+
+        Implementations must ensure that only the lock owner can release
+        the lock and that any associated resources are cleaned up.
+        """
+        raise NotImplementedError
diff --git a/rollingops/src/charmlibs/rollingops/common/_utils.py b/rollingops/src/charmlibs/rollingops/common/_utils.py
new file mode 100644
index 000000000..dfdddb221
--- /dev/null
+++ b/rollingops/src/charmlibs/rollingops/common/_utils.py
@@ -0,0 +1,156 @@
+# Copyright 2026 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Rolling ops common functions."""
+
+import logging
+import subprocess
+from collections.abc import Callable
+from datetime import UTC, datetime
+from logging.handlers import RotatingFileHandler
+from typing import TypeVar
+
+from ops import pebble
+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
+
+from charmlibs.pathops import PebbleConnectionError
+
+logger = logging.getLogger(__name__)
+T = TypeVar('T')
+
+LOCK_GRANTED_HOOK_NAME = 'rollingops_lock_granted'
+ETCD_FAILED_HOOK_NAME = 'rollingops_etcd_failed'
+
+
+@retry(
+    retry=retry_if_exception_type((PebbleConnectionError, pebble.APIError, pebble.ChangeError)),
+    stop=stop_after_attempt(3),
+    wait=wait_fixed(10),
+    reraise=True,
+)
+def with_pebble_retry[T](func: Callable[[], T]) -> T:
+    return func()
+
+
+def now_timestamp() -> datetime:
+    """UTC timestamp."""
+    return datetime.now(UTC)
+
+
+def parse_timestamp(timestamp: str) -> datetime | None:
+    """Parse epoch timestamp string. Return None on errors."""
+    try:
+        return datetime.fromtimestamp(float(timestamp), tz=UTC)
+    except Exception:
+        return None
+
+
+def datetime_to_str(dt: datetime) -> str:
+    return str(dt.timestamp())
+
+
+def setup_logging(
+    log_file: str,
+    *,
+    unit_name: str,
+    cluster_id: str | None = None,
+    owner: str | None = None,
+) -> None:
+    """Configure logging with file rotation.
+
+    This sets up the root logger to write INFO-level (and above) logs
+    to a rotating file handler. Log files are capped at 10 MB each,
+    with up to 10 backup files retained.
+
+    This functions is used in the context of the background process.
+
+    Args:
+        log_file: Path to the log file where logs should be written.
+        unit_name: Juju unit name associated with the background process.
+        cluster_id: Optional etcd cluster identifier.
+        owner: Optional worker owner identifier.
+    """
+    handler = RotatingFileHandler(
+        log_file,
+        maxBytes=10 * 1024 * 1024,  # 10 MB
+        backupCount=10,
+    )
+
+    formatter = logging.Formatter(
+        '%(asctime)s [%(levelname)s] [%(process)d] '
+        '[unit=%(unit_name)s cluster=%(cluster_id)s owner=%(owner)s] '
+        '%(name)s: %(message)s'
+    )
+    handler.setFormatter(formatter)
+
+    def add_context(record: logging.LogRecord) -> bool:
+        record.unit_name = unit_name
+        record.cluster_id = cluster_id or '-'
+        record.owner = owner or '-'
+        return True
+
+    handler.addFilter(add_context)
+
+    root = logging.getLogger()
+    root.setLevel(logging.INFO)
+    root.handlers.clear()
+    root.addHandler(handler)
+
+
+def _dispatch_hook(unit_name: str, charm_dir: str, hook_name: str) -> None:
+    """Execute a Juju hook on a specific unit via juju-exec.
+
+    This function triggers a charm hook by invoking the charm's `dispatch`
+    script with the appropriate JUJU_DISPATCH_PATH environment variable.
+
+    Args:
+        unit_name: The Juju unit name (e.g., "app/0") on which to run the hook.
+        charm_dir: Filesystem path to the charm directory containing the dispatch script.
+        hook_name: Name of the hook to dispatch (without the "hooks/" prefix).
+
+    Raises:
+        subprocess.CalledProcessError: If the juju-exec command fails.
+    """
+    run_cmd = '/usr/bin/juju-exec'
+    dispatch_sub_cmd = f'JUJU_DISPATCH_PATH=hooks/{hook_name} {charm_dir}/dispatch'
+    res = subprocess.run([run_cmd, '-u', unit_name, dispatch_sub_cmd], check=False)
+    res.check_returncode()
+    logger.info('%s hook dispatched.', hook_name)
+
+
+def dispatch_lock_granted(unit_name: str, charm_dir: str) -> None:
+    """Dispatch the LOCK_GRANTED_HOOK_NAME hook on a unit.
+
+    Args:
+        unit_name: The Juju unit name (e.g., "app/0").
+        charm_dir: Filesystem path to the charm directory.
+
+    Raises:
+        subprocess.CalledProcessError: If the hook execution fails.
+    """
+    _dispatch_hook(unit_name, charm_dir, LOCK_GRANTED_HOOK_NAME)
+
+
+def dispatch_etcd_failed(unit_name: str, charm_dir: str) -> None:
+    """Dispatch the fatal etcd-worker failure hook.
+
+    This notifies the charm that the etcd worker encountered an
+    unrecoverable error so that higher-level logic can fall back to the
+    peer backend.
+
+    Args:
+        unit_name: Name of the unit dispatching the hook.
+        charm_dir: Path to the charm root directory.
+    """
+    _dispatch_hook(unit_name, charm_dir, ETCD_FAILED_HOOK_NAME)
diff --git a/rollingops/src/charmlibs/rollingops/etcd/__init__.py b/rollingops/src/charmlibs/rollingops/etcd/__init__.py
new file mode 100644
index 000000000..064b097a3
--- /dev/null
+++ b/rollingops/src/charmlibs/rollingops/etcd/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2026 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Rollingops for charms using etcd."""
diff --git a/rollingops/src/charmlibs/rollingops/etcd/_backend.py b/rollingops/src/charmlibs/rollingops/etcd/_backend.py
new file mode 100644
index 000000000..ebc616d97
--- /dev/null
+++ b/rollingops/src/charmlibs/rollingops/etcd/_backend.py
@@ -0,0 +1,400 @@
+# Copyright 2026 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import time
+from typing import Any
+
+from ops import Object, Relation
+from ops.charm import (
+    CharmBase,
+    RelationCreatedEvent,
+    RelationDepartedEvent,
+)
+
+from charmlibs.rollingops.common._exceptions import (
+    RollingOpsInvalidLockRequestError,
+    RollingOpsNoEtcdRelationError,
+    RollingOpsSyncLockError,
+)
+from charmlibs.rollingops.common._models import (
+    Operation,
+    OperationResult,
+    RollingOpsStatus,
+    RunWithLockOutcome,
+    RunWithLockStatus,
+    UnitBackendState,
+)
+from charmlibs.rollingops.etcd import _etcdctl as etcdctl
+from charmlibs.rollingops.etcd._etcd import EtcdLease, EtcdLock, ManagerOperationStore
+from charmlibs.rollingops.etcd._models import RollingOpsKeys
+from charmlibs.rollingops.etcd._relations import EtcdRequiresV1, SharedClientCertificateManager
+from charmlibs.rollingops.etcd._worker import EtcdRollingOpsAsyncWorker
+
+logger = logging.getLogger(__name__)
+
+
+class EtcdRollingOpsBackend(Object):
+    """Manage rolling operations using etcd-backed coordination.
+
+    This backend stores operation state in etcd, coordinates asynchronous
+    execution through an etcd-backed distributed lock, and exposes a
+    synchronous lock interface for critical sections.
+
+    Each unit manages its own etcd worker process and operation queues.
+    Operations are scoped using a cluster identifier and a per-unit owner.
+    """
+
+    def __init__(
+        self,
+        charm: CharmBase,
+        peer_relation_name: str,
+        etcd_relation_name: str,
+        cluster_id: str,
+        callback_targets: dict[str, Any],
+    ):
+        """Initialize the etcd-backed rolling-ops backend.
+
+        Args:
+            charm: The charm instance owning this backend.
+            peer_relation_name: Name of the peer relation used for shared
+                state and worker coordination.
+            etcd_relation_name: Name of the relation providing etcd access.
+            cluster_id: Identifier used to scope etcd keys for this rolling-ops
+                instance.
+            callback_targets: Mapping from callback identifiers to callables
+                executed when an operation is granted the asynchronous lock.
+        """
+        super().__init__(charm, 'etcd-rolling-ops-manager')
+        self._charm = charm
+        self.peer_relation_name = peer_relation_name
+        self.etcd_relation_name = etcd_relation_name
+        self.callback_targets = callback_targets
+
+        owner = f'{self.model.uuid}-{self.model.unit.name}'.replace('/', '-')
+        self.worker = EtcdRollingOpsAsyncWorker(
+            charm, peer_relation_name=peer_relation_name, owner=owner, cluster_id=cluster_id
+        )
+        self.keys = RollingOpsKeys.for_owner(cluster_id=cluster_id, owner=owner)
+
+        self.shared_certificates = SharedClientCertificateManager(
+            charm,
+            peer_relation_name=peer_relation_name,
+        )
+
+        self.etcd = EtcdRequiresV1(
+            charm,
+            relation_name=etcd_relation_name,
+            cluster_id=self.keys.cluster_prefix,
+            shared_certificates=self.shared_certificates,
+        )
+        self._async_lock = EtcdLock(lock_key=self.keys.lock_key, owner=owner)
+        self._sync_lock = EtcdLock(lock_key=self.keys.lock_key, owner=f'{owner}:sync')
+        self._lease: EtcdLease | None = None
+        self.operations_store = ManagerOperationStore(self.keys, owner)
+
+        self.framework.observe(
+            charm.on[self.peer_relation_name].relation_departed, self._on_peer_relation_departed
+        )
+        self.framework.observe(
+            charm.on[self.etcd_relation_name].relation_created, self._on_etcd_relation_created
+        )
+
+    @property
+    def _peer_relation(self) -> Relation | None:
+        """Return the peer relation for this backend."""
+        return self.model.get_relation(self.peer_relation_name)
+
+    @property
+    def _etcd_relation(self) -> Relation | None:
+        """Return the etcd relation for this backend."""
+        return self.model.get_relation(self.etcd_relation_name)
+
+    def is_available(self) -> bool:
+        """Return whether the etcd backend is currently usable.
+
+        The backend is considered available only if the etcd relation exists
+        and the etcd client has been initialized successfully.
+
+        Returns:
+            True if etcd can currently be used, otherwise False.
+        """
+        if self._etcd_relation is None:
+            return False
+        try:
+            etcdctl.ensure_initialized()
+        except Exception:
+            return False
+        return True
+
+    def enqueue_operation(self, operation: Operation) -> None:
+        """Persist an operation in etcd for this unit.
+
+        Before storing the operation, this method clears any pending fallback
+        state for the current unit. If the unit had previously fallen back
+        from etcd to peer processing and cleanup is still required, stale etcd
+        operation state is removed first so processing can resume from a clean
+        slate.
+
+        Args:
+            operation: The operation to enqueue.
+
+        Raises:
+            RollingOpsNoEtcdRelationError: If the etcd relation does not exist.
+            RollingOpsEtcdNotConfiguredError: If the etcd client has not been
+                configured yet.
+            PebbleConnectionError: If the remote container cannot be reached.
+        """
+        if self._etcd_relation is None:
+            raise RollingOpsNoEtcdRelationError
+
+        etcdctl.ensure_initialized()
+
+        backend_state = UnitBackendState(self.model, self.peer_relation_name, self.model.unit)
+        if backend_state.cleanup_needed:
+            self.operations_store.clean_up()
+        backend_state.clear_fallback()
+
+        self.operations_store.request(operation)
+
+    def ensure_processing(self):
+        """Ensure that the etcd worker process is running.
+
+        The worker is responsible for acquiring the asynchronous lock and
+        processing queued operations for this unit.
+        """
+        self.worker.start()
+
+    def is_processing(self) -> bool:
+        """Return whether the etcd worker process is currently running."""
+        return self.worker.is_running()
+
+    def _on_etcd_relation_created(self, event: RelationCreatedEvent) -> None:
+        """Validate that the etcdctl command is available when etcd is related.
+
+        Args:
+            event: The relation-created event for the etcd relation.
+        """
+        if not etcdctl.is_etcdctl_installed():
+            logger.error('%s is not installed.', etcdctl.ETCDCTL_CMD)
+
+    def _on_peer_relation_departed(self, event: RelationDepartedEvent) -> None:
+        """Handle removal of a unit from the peer relation.
+
+        If the current unit is departing, the etcd worker process is stopped
+        to ensure a clean shutdown and avoid leaving a stale worker running.
+
+        Args:
+            event: The peer relation departed event.
+        """
+        unit = event.departing_unit
+        if unit == self.model.unit:
+            self.worker.stop()
+
+    def request_async_lock(
+        self,
+        callback_id: str,
+        kwargs: dict[str, Any] | None = None,
+        max_retry: int | None = None,
+    ) -> None:
+        """Queue a rolling operation and trigger asynchronous lock acquisition.
+
+        This method creates a new operation representing a callback to execute
+        once the distributed lock is granted. The operation is appended to the
+        unit's pending operation queue stored in etcd.
+
+        If the operation is successfully enqueued, the background worker process
+        responsible for acquiring the distributed lock and processing operations
+        is started.
+
+        Args:
+            callback_id: Identifier of the registered callback to execute when
+                the lock is granted.
+            kwargs: Optional keyword arguments passed to the callback when
+                executed. Must be JSON-serializable.
+            max_retry: Maximum number of retries for the operation.
+                - None: retry indefinitely
+                - 0: do not retry on failure
+
+        Raises:
+            RollingOpsInvalidLockRequestError: If the callback_id is not registered or
+                invalid parameters were provided.
+            RollingOpsNoEtcdRelationError: if the etcd relation does not exist
+            RollingOpsEtcdNotConfiguredError: if etcd client has not been configured yet
+            PebbleConnectionError: if the remote container cannot be reached.
+        """
+        if callback_id not in self.callback_targets:
+            raise RollingOpsInvalidLockRequestError(f'Unknown callback_id: {callback_id}')
+
+        if not self._etcd_relation:
+            raise RollingOpsNoEtcdRelationError
+
+        etcdctl.ensure_initialized()
+
+        if kwargs is None:
+            kwargs = {}
+
+        operation = Operation.create(callback_id, kwargs, max_retry)
+        self.operations_store.request(operation)
+        self.worker.start()
+
+    def _on_run_with_lock(self) -> RunWithLockOutcome:
+        """Execute the current operation while holding the distributed lock.
+
+        This method is triggered when the worker determines that the current
+        unit owns the distributed lock. The method retrieves the head operation
+        from the in-progress queue and executes its registered callback.
+
+        After execution, the operation is moved to the completed queue and its
+        updated state is persisted.
+
+        Returns:
+            A structured outcome describing whether an operation was executed
+            and, if so, which operation was finalized and with what result.
+
+        Raises:
+            RollingOpsEtcdTransactionError: if the operation cannot be marked
+                as completed.
+        """
+        if not self._async_lock.is_held():
+            logger.info('Lock is not granted. Operation will not run.')
+            return RunWithLockOutcome(status=RunWithLockStatus.NOT_GRANTED)
+
+        if not (operation := self.operations_store.peek_current()):
+            logger.info('Lock granted but there is no operation to run.')
+            return RunWithLockOutcome(status=RunWithLockStatus.NO_OPERATION)
+
+        if not (callback := self.callback_targets.get(operation.callback_id)):
+            logger.error(
+                'Operation %s target was not found. Releasing operation without retry.',
+                operation.callback_id,
+            )
+            self.operations_store.finalize(operation, OperationResult.RELEASE)
+            return RunWithLockOutcome(
+                status=RunWithLockStatus.MISSING_CALLBACK,
+                op_id=operation.op_id,
+                result=OperationResult.RELEASE,
+            )
+        logger.info(
+            'Executing callback_id=%s, attempt=%s', operation.callback_id, operation.attempt
+        )
+
+        try:
+            result = callback(**operation.kwargs)
+        except Exception as e:
+            logger.exception('Operation failed: %s: %s', operation.callback_id, e)
+            result = OperationResult.RETRY_RELEASE
+
+        match result:
+            case OperationResult.RETRY_HOLD:
+                logger.info(
+                    'Finished %s. Operation will be retried immediately.', operation.callback_id
+                )
+            case OperationResult.RETRY_RELEASE:
+                logger.info('Finished %s. Operation will be retried later.', operation.callback_id)
+            case _:
+                logger.info('Finished %s. Lock will be released.', operation.callback_id)
+                result = OperationResult.RELEASE
+
+        try:
+            self.operations_store.finalize(operation, result)
+        except Exception:
+            logger.exception('Failed to commit operation %s to etcd.', operation.callback_id)
+            return RunWithLockOutcome(
+                status=RunWithLockStatus.EXECUTED_NOT_COMMITTED,
+                op_id=operation.op_id,
+                result=result,
+            )
+        return RunWithLockOutcome(
+            status=RunWithLockStatus.EXECUTED,
+            op_id=operation.op_id,
+            result=result,
+        )
+
+    def acquire_sync_lock(self, timeout: int | None) -> None:
+        """Acquire the etcd-backed synchronous lock for this unit.
+
+        A dedicated lease is granted and kept alive for the duration of the
+        lock. The backend then repeatedly attempts to acquire the sync lock
+        until it succeeds or the timeout expires.
+
+        Args:
+            timeout: Maximum time in seconds to wait for the lock.
+                None means wait indefinitely.
+
+        Raises:
+            TimeoutError: If the lock could not be acquired before the timeout.
+            RollingOpsSyncLockError: if there was an error obtaining the lock.
+        """
+        self._lease = EtcdLease()
+
+        deadline = None if timeout is None else time.monotonic() + timeout
+
+        try:
+            self._lease.grant()
+
+            if self._lease.id is None:
+                raise RollingOpsSyncLockError('Failed to grant an etcd lease.')
+            while True:
+                try:
+                    if self._sync_lock.try_acquire(self._lease.id):
+                        logger.info('etcd lock acquired.')
+                        return
+                except Exception:
+                    logger.exception('Failed while trying to acquire etcd sync lock.')
+                    raise
+
+                if deadline is not None and time.monotonic() >= deadline:
+                    raise TimeoutError(f'Timed out acquiring etcd sync lock after {timeout}s.')
+
+                time.sleep(15)
+
+        except Exception as e:
+            try:
+                self._lease.revoke()
+            except Exception:
+                logger.exception('Failed to revoke lease %s.', self._lease.id)
+            raise RollingOpsSyncLockError('Failed to acquire the etcd sync lock') from e
+
+    def release_sync_lock(self) -> None:
+        """Release the synchronous lock and revoke its lease."""
+        self._sync_lock.release()
+        if self._lease is not None:
+            self._lease.revoke()
+
+    def get_status(self) -> RollingOpsStatus:
+        """Return the rolling-ops status for this unit in etcd mode.
+
+        Status is derived from the current etcd-backed lock state and the
+        unit's queued operation state.
+
+        Returned values:
+            - UNAVAILABLE: etcd backend is not available
+            - GRANTED: the async lock is currently held by this unit
+            - WAITING: this unit has queued work but does not hold the lock
+            - IDLE: this unit has no pending work
+
+        Returns:
+            The current rolling-ops status for this unit.
+        """
+        if self._peer_relation is None or self._etcd_relation is None or not self.is_available():
+            return RollingOpsStatus.UNAVAILABLE
+
+        if self._async_lock.is_held():
+            return RollingOpsStatus.GRANTED
+
+        if self.operations_store.has_pending_work():
+            return RollingOpsStatus.WAITING
+
+        return RollingOpsStatus.IDLE
diff --git a/rollingops/src/charmlibs/rollingops/_certificates.py b/rollingops/src/charmlibs/rollingops/etcd/_certificates.py
similarity index 89%
rename from rollingops/src/charmlibs/rollingops/_certificates.py
rename to rollingops/src/charmlibs/rollingops/etcd/_certificates.py
index 34f4b9e71..3c53bd939 100644
--- a/rollingops/src/charmlibs/rollingops/_certificates.py
+++ b/rollingops/src/charmlibs/rollingops/etcd/_certificates.py
@@ -19,11 +19,13 @@
 with etcd via TLS. Certificates are generated only once and persisted
 under a local directory so they can be reused across charm executions.
 
-Certificates are valid for 20 years. They are not renewed or rotated.
+Certificates are valid for 50 years. They are not renewed or rotated.
 """
 
 from datetime import timedelta
 
+import shortuuid
+
 from charmlibs import pathops
 from charmlibs.interfaces.tls_certificates import (
     Certificate,
@@ -32,11 +34,9 @@
     PrivateKey,
     TLSCertificatesError,
 )
-from charmlibs.rollingops._models import (
-    RollingOpsFileSystemError,
-    SharedCertificate,
-    with_pebble_retry,
-)
+from charmlibs.rollingops.common._exceptions import RollingOpsFileSystemError
+from charmlibs.rollingops.common._utils import with_pebble_retry
+from charmlibs.rollingops.etcd._models import SharedCertificate
 
 BASE_DIR = pathops.LocalPath('/var/lib/rollingops/tls')
 CA_CERT_PATH = BASE_DIR / 'client-ca.pem'
@@ -90,7 +90,7 @@ def _has_client_cert_key_and_ca(shared: SharedCertificate) -> bool:
         raise RollingOpsFileSystemError('Failed to read certificates and key.') from e
 
 
-def generate(common_name: str) -> SharedCertificate:
+def generate(model_uuid: str, app_name: str) -> SharedCertificate:
     """Generate a client CA and client certificate if they do not exist.
 
     This method creates:
@@ -103,8 +103,8 @@ def generate(common_name: str) -> SharedCertificate:
     If the certificates already exist, this method does nothing.
 
     Args:
-        common_name: Common Name (CN) used in the client certificate
-            subject. This value should not contain slashes.
+        model_uuid: string used to build the common name.
+        app_name: string used to build the common name.
 
     Raises:
         PebbleConnectionError: if the remote container cannot be reached
@@ -117,6 +117,9 @@ def generate(common_name: str) -> SharedCertificate:
             CA_CERT_PATH,
         )
 
+    # Produce a unique <=64-character string
+    raw = f'{model_uuid}-{app_name}'
+    common_name = shortuuid.uuid(name=raw)
     ca_key = PrivateKey.generate(key_size=KEY_SIZE)
     ca_attributes = CertificateRequestAttributes(
         common_name=common_name,
diff --git a/rollingops/src/charmlibs/rollingops/etcd/_etcd.py b/rollingops/src/charmlibs/rollingops/etcd/_etcd.py
new file mode 100644
index 000000000..5ce1aef7f
--- /dev/null
+++ b/rollingops/src/charmlibs/rollingops/etcd/_etcd.py
@@ -0,0 +1,546 @@
+# Copyright 2026 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Classes that manage etcd concepts."""
+
+import logging
+import os
+import subprocess
+import time
+
+import charmlibs.rollingops.etcd._etcdctl as etcdctl
+from charmlibs.rollingops.common._exceptions import (
+    RollingOpsEtcdctlFatalError,
+    RollingOpsEtcdctlParseError,
+    RollingOpsEtcdTransactionError,
+)
+from charmlibs.rollingops.common._models import Operation, OperationResult
+from charmlibs.rollingops.etcd._models import RollingOpsKeys
+
+logger = logging.getLogger(__name__)
+
+LOCK_LEASE_TTL = '60'
+
+
+class EtcdLease:
+    """Manage the lifecycle of an etcd lease and its keep-alive process."""
+
+    def __init__(self):
+        self.id: str | None = None
+        self.keepalive_proc: subprocess.Popen[str] | None = None
+        self._pipe_write_fd: int | None = None
+
+    def grant(self) -> None:
+        """Create a new lease and start the keep-alive process."""
+        res = etcdctl.run('lease', 'grant', LOCK_LEASE_TTL)
+        # parse: "lease 694d9c9aeca3422a granted with TTL(60s)"
+        parts = res.split()
+        try:
+            lease_id = parts[1]
+            int(lease_id, 16)
+        except (IndexError, ValueError) as e:
+            raise RollingOpsEtcdctlParseError(f'Invalid lease output: {res}') from e
+
+        self.id = parts[1]
+        logger.info('%s', res)
+        self._start_lease_keepalive()
+
+    def revoke(self) -> None:
+        """Revoke the current lease and stop the keep-alive process."""
+        lease_id = self.id
+        try:
+            if self.id is not None:
+                etcdctl.run('lease', 'revoke', self.id)
+        except Exception:
+            logger.exception('Fail to revoke lease %s.', lease_id)
+            raise
+        finally:
+            try:
+                self._stop_keepalive()
+            except Exception:
+                logger.exception('Fail to stop keepalive for lease %s.', lease_id)
+            finally:
+                self.id = None
+
+    def _start_lease_keepalive(self) -> None:
+        """Start the background process that keeps the lease alive."""
+        lease_id = self.id
+        if lease_id is None:
+            logger.info('Lease ID is None. Keepalive for this lease cannot be started.')
+            return
+        etcdctl.ensure_initialized()
+
+        pipe_read_fd, pipe_write_fd = os.pipe()
+        self._pipe_write_fd = pipe_write_fd
+
+        keep_alive_cmd = f'{etcdctl.ETCDCTL_CMD} lease keep-alive {lease_id}  </dev/null & read -r _; kill %1 2>/dev/null; wait'  # noqa: E501
+        try:
+            self.keepalive_proc = subprocess.Popen(
+                ['bash', '-c', keep_alive_cmd],
+                # The pipe read side becomes the child's stdin
+                # so when the parent closes its write side, this stdin gets EOF
+                stdin=pipe_read_fd,
+                env=etcdctl.load_env(),
+                text=True,
+                close_fds=True,
+                preexec_fn=self._close_write_side_in_child,
+            )
+        except Exception:  # OSError perhaps?
+            os.close(pipe_read_fd)
+            os.close(pipe_write_fd)
+            self._pipe_write_fd = None
+            raise
+
+        os.close(pipe_read_fd)
+        logger.info('Keepalive started for lease %s.', self.id)
+
+    def _close_write_side_in_child(self) -> None:
+        if self._pipe_write_fd is None:
+            return
+        os.close(self._pipe_write_fd)
+
+    def _stop_keepalive(self) -> None:
+        """Terminate the keep-alive subprocess if it is running."""
+        # Close the write side of the pipe to set EOF to the child's stdin
+        # and trigger the `read -r _`
+        if self._pipe_write_fd is not None:
+            try:
+                os.close(self._pipe_write_fd)
+            except OSError:
+                pass
+            finally:
+                self._pipe_write_fd = None
+
+        if self.keepalive_proc is None:
+            return
+
+        # Additional safeguard
+        try:
+            self.keepalive_proc.terminate()
+        except ProcessLookupError:
+            # Already dead
+            return
+        except Exception:
+            try:
+                self.keepalive_proc.wait(timeout=2)
+            except subprocess.TimeoutExpired:
+                logger.exception('Fail to stop keepalive for lease %s.')
+                self.keepalive_proc.kill()
+                return
+        finally:
+            self.keepalive_proc = None
+
+
+class EtcdLock:
+    """Distributed lock implementation backed by etcd.
+
+    The lock is represented by a key whose value identifies the current owner.
+
+    Lock acquisition and release are performed using transactions to
+    ensure atomicity.
+
+    The lock is attached to an etcd lease so that it is
+    automatically released if the owner stops refreshing the lease.
+    """
+
+    def __init__(self, lock_key: str, owner: str):
+        self.lock_key = lock_key
+        self.owner = owner
+
+    def try_acquire(self, lease_id: str) -> bool:
+        """Attempt to acquire the lock.
+
+        This method uses an etcd transaction that succeeds only if the
+        lock key does not yet exist. If successful, the lock key is created with the current
+        owner as its value and is attached to the provided lease.
+
+        Args:
+            lease_id: ID of the etcd lease to associate with the lock.
+
+        Returns:
+            True if the lock was successfully acquired, otherwise False.
+        """
+        if not self.lock_key or not self.owner or not lease_id:
+            raise RollingOpsEtcdctlFatalError('Invalid input for lock acquire transaction.')
+
+        txn = f"""\
+        version("{self.lock_key}") = "0"
+
+        put "{self.lock_key}" "{self.owner}" --lease={lease_id}
+
+
+        """
+        return etcdctl.txn(txn)
+
+    def release(self) -> None:
+        """Release the lock if it is currently held by this owner.
+
+        The lock is removed only if the value of the lock key matches
+        the current owner. This prevents one process from accidentally
+        releasing a lock held by another owner.
+        """
+        if not self.lock_key or not self.owner:
+            raise RollingOpsEtcdctlFatalError('Invalid input for lock release transaction.')
+
+        txn = f"""\
+        value("{self.lock_key}") = "{self.owner}"
+
+        del "{self.lock_key}"
+
+
+        """
+        etcdctl.txn(txn)
+
+    def is_held(self) -> bool:
+        """Check whether the lock is currently held by the owner."""
+        if not self.lock_key or not self.owner:
+            raise RollingOpsEtcdctlFatalError('Invalid input for check lock ownership operation.')
+        res = etcdctl.run('get', self.lock_key, '--print-value-only')
+        return res == self.owner
+
+
+class EtcdOperationQueue:
+    """Queue abstraction for operations stored in etcd.
+
+    This class represents a queue of operations stored under a common
+    key prefix in etcd. Each operation is stored as a key-value pair
+    where the key encodes the operation identifier and ordering, and
+    the value contains the serialized operation data.
+    """
+
+    def __init__(self, prefix: str, lock_key: str, owner: str):
+        self.prefix = prefix
+        self.lock_key = lock_key
+        self.owner = owner
+
+    def peek(self) -> Operation | None:
+        """Return the first operation in the queue without removing it."""
+        kv = etcdctl.get_first_key_value_pair(self.prefix)
+        if kv is None:
+            return None
+        return Operation.model_validate(kv.value)
+
+    def _peek_last(self) -> Operation | None:
+        """Return the last operation in the queue without removing it."""
+        kv = etcdctl.get_last_key_value_pair(self.prefix)
+        if kv is None:
+            return None
+        return Operation.model_validate(kv.value)
+
+    def move_head(self, to_queue_prefix: str) -> bool:
+        """Move the first operation in the queue to another queue.
+
+        This operation is performed atomically using an etcd transaction.
+        The transaction succeeds only if:
+        - The lock is currently held by the configured owner.
+        - The head operation still exists.
+
+        Args:
+            to_queue_prefix: Destination queue prefix.
+
+        Returns:
+            True if the operation was moved successfully, otherwise False.
+        """
+        kv = etcdctl.get_first_key_value_pair(self.prefix)
+        if kv is None:
+            return False
+
+        op_id = kv.key.split('/')[-1]
+        new_key = f'{to_queue_prefix}{op_id}'
+        op = Operation.model_validate(kv.value)
+        data = op.to_string()
+
+        txn = f"""\
+        value("{self.lock_key}") = "{self.owner}"
+        version("{kv.key}") != "0"
+
+        put "{new_key}" {data}
+        del "{kv.key}"
+
+
+        """
+        return etcdctl.txn(txn)
+
+    def move_operation(self, to_queue_prefix: str, operation: Operation) -> bool:
+        """Move a specific operation from this queue to another queue.
+
+        The operation is identified using its operation ID and moved
+        atomically via an etcd transaction.
+
+        Args:
+            to_queue_prefix: Destination queue prefix.
+            operation: Operation to move.
+
+        Returns:
+            True if the operation was successfully moved, otherwise False.
+        """
+        old_key = f'{self.prefix}{operation.op_id}'
+        new_key = f'{to_queue_prefix}{operation.op_id}'
+
+        data = operation.to_string()
+
+        txn = f"""\
+        value("{self.lock_key}") = "{self.owner}"
+        version("{old_key}") != "0"
+
+        put "{new_key}" {data}
+        del "{old_key}"
+
+
+        """
+        return etcdctl.txn(txn)
+
+    def watch(self) -> Operation:
+        """Block until at least one operation exists and return it."""
+        while True:
+            kv = etcdctl.get_first_key_value_pair(self.prefix)
+            if kv is not None:
+                return Operation.model_validate(kv.value)
+            time.sleep(10)
+
+    def dequeue(self) -> bool:
+        """Remove the first operation from the queue.
+
+        The removal is performed using an etcd transaction that ensures
+        the lock owner still holds the lock and the operation exists.
+
+        Returns:
+            True if the operation was removed successfully, otherwise False.
+        """
+        kv = etcdctl.get_first_key_value_pair(self.prefix)
+        if kv is None:
+            return False
+
+        txn = f"""\
+        value("{self.lock_key}") = "{self.owner}"
+        version("{kv.key}") != "0"
+
+        del "{kv.key}"
+
+
+        """
+        return etcdctl.txn(txn)
+
+    def enqueue(self, operation: Operation) -> None:
+        """Insert a new operation into the queue.
+
+        The method avoids inserting duplicate operations by comparing
+        the new operation with the last operation currently in the queue.
+
+        Args:
+            operation: Operation to insert.
+        """
+        old_operation = self._peek_last()
+
+        if old_operation is not None and operation == old_operation:
+            logger.info(
+                'Operation %s not added to the etcd queue. '
+                'It already exists in the back of the queue.',
+                operation.callback_id,
+            )
+            return
+
+        op_str = operation.to_string()
+        key = f'{self.prefix}{operation.op_id}'
+        etcdctl.run('put', key, cmd_input=op_str)
+        logger.info('Operation %s added to the etcd queue.', operation.callback_id)
+
+    def clear(self) -> None:
+        etcdctl.run('del', self.prefix, '--prefix')
+
+
+class WorkerOperationStore:
+    """Background-worker view of etcd-backed rolling operations.
+
+    This class is used by the background process that coordinates lock
+    ownership and operation execution. It manages the lifecycle of queued
+    operations across the etcd-backed queue prefixes:
+
+    - pending: operations waiting to be claimed
+    - in-progress: operations currently being executed
+    - completed: operations that finished execution and await post-processing
+
+    It provides worker-oriented methods to:
+    - detect pending work
+    - claim the next operation for execution
+    - wait for completed operations
+    - requeue or delete completed operations
+    """
+
+    def __init__(self, keys: RollingOpsKeys, owner: str):
+        self._pending = EtcdOperationQueue(keys.pending, keys.lock_key, owner)
+        self._inprogress = EtcdOperationQueue(keys.inprogress, keys.lock_key, owner)
+        self._completed = EtcdOperationQueue(keys.completed, keys.lock_key, owner)
+
+    def has_pending(self) -> bool:
+        """Check whether there are pending operations.
+
+        Returns:
+            True if at least one operation exists in the pending queue,
+            otherwise False.
+        """
+        return self._pending.peek() is not None
+
+    def has_inprogress(self) -> bool:
+        """Check whether there are in-progress operations.
+
+        Returns:
+            True if at least one operation exists in the inprogress queue,
+            otherwise False.
+        """
+        return self._inprogress.peek() is not None
+
+    def has_completed(self) -> bool:
+        """Check whether there are completed operations.
+
+        Returns:
+            True if at least one operation exists in the completed queue,
+            otherwise False.
+        """
+        return self._completed.peek() is not None
+
+    def claim_next(self) -> str:
+        """Move the next pending operation to the in-progress queue.
+
+        This operation is performed atomically and only succeeds if:
+        - the lock is still held by this owner
+        - the head of the pending queue has not changed
+
+        Returns:
+            The operation ID of the operation
+
+        Raises:
+            RollingOpsEtcdTransactionError: if the transaction failed.
+        """
+        if not self._pending.move_head(self._inprogress.prefix):
+            raise RollingOpsEtcdTransactionError('Failed to move operation to in progress.')
+
+        operation = self._inprogress.peek()
+        if operation is None:
+            raise RollingOpsEtcdTransactionError('Failed to get the ID of the next operation.')
+        return operation.op_id
+
+    def wait_until_completed(self) -> Operation:
+        """Block until at least one operation appears in the completed queue."""
+        return self._completed.watch()
+
+    def requeue_completed(self) -> None:
+        """Requeue the head completed operation back to the pending queue.
+
+        This is typically used when an operation needs to be retried
+        (e.g., RETRY_RELEASE or RETRY_HOLD semantics).
+
+        Raises:
+            RollingOpsEtcdTransactionError: if the transaction failed.
+        """
+        if not self._completed.move_head(self._pending.prefix):
+            raise RollingOpsEtcdTransactionError('Failed to move operation to pending.')
+
+    def delete_completed(self) -> None:
+        """Remove the head operation from the completed queue.
+
+        This is typically used when an operation has finished successfully
+        and does not need to be retried.
+
+        Raises:
+            RollingOpsEtcdTransactionError: if the transaction failed.
+        """
+        if not self._completed.dequeue():
+            raise RollingOpsEtcdTransactionError('Failed finalize operation.')
+
+
+class ManagerOperationStore:
+    """Charm-facing interface for requesting and finalizing etcd-backed operations.
+
+    This class is used by the RollingOps manager running inside the charm.
+    It provides a narrow interface for interacting with the etcd-backed
+    operation queues without exposing the full queue topology.
+
+    The manager can use it to:
+    - request a new operation
+    - inspect the current in-progress operation
+    - finalize an operation after execution
+
+    Queue transitions and storage details remain encapsulated behind this API.
+    """
+
+    def __init__(self, keys: RollingOpsKeys, owner: str):
+        self._pending = EtcdOperationQueue(keys.pending, keys.lock_key, owner)
+        self._inprogress = EtcdOperationQueue(keys.inprogress, keys.lock_key, owner)
+        self._completed = EtcdOperationQueue(keys.completed, keys.lock_key, owner)
+
+    def request(self, operation: Operation) -> None:
+        """Add a new operation to the pending queue.
+
+        Duplicate operations (same callback_id and kwargs as the last queued
+        operation) are not inserted.
+
+        Args:
+            operation: Operation to enqueue.
+        """
+        self._pending.enqueue(operation)
+
+    def finalize(self, operation: Operation, result: OperationResult) -> None:
+        """Move an in-progress operation to the completed queue.
+
+        This should be called after the operation has been executed and its
+        result has been recorded.
+
+        Args:
+            operation: The operation currently in the in-progress queue.
+            result: Result of the executions.
+
+        Raises:
+            RollingOpsEtcdTransactionError: if the operation cannot be marked
+                as completed.
+        """
+        match result:
+            case OperationResult.RETRY_HOLD:
+                operation.retry_hold()
+            case OperationResult.RETRY_RELEASE:
+                operation.retry_release()
+            case _:
+                operation.complete()
+
+        if not self._inprogress.move_operation(self._completed.prefix, operation):
+            raise RollingOpsEtcdTransactionError('Failed to set the operation as completed.')
+
+    def peek_current(self) -> Operation | None:
+        """Return the current in-progress operation without modifying state.
+
+        Returns:
+            The current in-progress operation, or None if no operation is
+            being processed.
+        """
+        return self._inprogress.peek()
+
+    def has_pending_work(self) -> bool:
+        """Return whether there is an operation currently being processed.
+
+        Returns:
+            True if there is a current operation, otherwise False.
+        """
+        return self.peek_current() is not None
+
+    def clean_up(self) -> None:
+        """Clear all operation queues for this unit.
+
+        This removes all in-progress, pending, and completed operations,
+        resetting the local etcd-backed state. It is typically used when
+        recovering from inconsistencies or after switching backends to
+        ensure a clean starting point.
+        """
+        self._inprogress.clear()
+        self._pending.clear()
+        self._completed.clear()
diff --git a/rollingops/src/charmlibs/rollingops/_etcdctl.py b/rollingops/src/charmlibs/rollingops/etcd/_etcdctl.py
similarity index 51%
rename from rollingops/src/charmlibs/rollingops/_etcdctl.py
rename to rollingops/src/charmlibs/rollingops/etcd/_etcdctl.py
index 4befb143a..e5eb4f1c3 100644
--- a/rollingops/src/charmlibs/rollingops/_etcdctl.py
+++ b/rollingops/src/charmlibs/rollingops/etcd/_etcdctl.py
@@ -27,14 +27,24 @@
 from dataclasses import asdict
 from functools import lru_cache
 
+from tenacity import (
+    before_sleep_log,
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_fixed,
+)
+
 from charmlibs import pathops
-from charmlibs.rollingops._models import (
-    CERT_MODE,
-    EtcdConfig,
+from charmlibs.rollingops.common._exceptions import (
+    RollingOpsEtcdctlFatalError,
+    RollingOpsEtcdctlParseError,
+    RollingOpsEtcdctlRetryableError,
     RollingOpsEtcdNotConfiguredError,
     RollingOpsFileSystemError,
-    with_pebble_retry,
 )
+from charmlibs.rollingops.common._utils import with_pebble_retry
+from charmlibs.rollingops.etcd._models import CERT_MODE, EtcdConfig, EtcdKV
 
 logger = logging.getLogger(__name__)
 
@@ -42,6 +52,9 @@
 SERVER_CA_PATH = BASE_DIR / 'server-ca.pem'
 CONFIG_FILE_PATH = BASE_DIR / 'etcdctl.json'
 ETCDCTL_CMD = 'etcdctl'
+ETCDCTL_TIMEOUT_SECONDS = 15
+ETCDCTL_RETRY_ATTEMPTS = 12
+ETCDCTL_RETRY_WAIT_SECONDS = 5
 
 
 @lru_cache(maxsize=1)
@@ -185,31 +198,210 @@ def cleanup() -> None:
         raise RollingOpsFileSystemError('Failed to remove etcd config file and CA.') from e
 
 
-def run(*args: str) -> str | None:
+def _is_retryable_stderr(stderr: str) -> bool:
+    """Return whether stderr looks like a transient etcd/client failure."""
+    text = stderr.lower()
+    retryable_markers = (
+        'connection refused',
+        'context deadline exceeded',
+        'deadline exceeded',
+        'temporarily unavailable',
+        'transport is closing',
+        'connection reset',
+        'broken pipe',
+        'unavailable',
+        'leader changed',
+        'etcdserver: request timed out',
+    )
+    return any(marker in text for marker in retryable_markers)
+
+
+@retry(
+    retry=retry_if_exception_type(RollingOpsEtcdctlRetryableError),
+    stop=stop_after_attempt(ETCDCTL_RETRY_ATTEMPTS),
+    wait=wait_fixed(ETCDCTL_RETRY_WAIT_SECONDS),
+    before_sleep=before_sleep_log(logger, logging.WARNING),
+    reraise=True,
+)
+def _run_checked(*args: str, cmd_input: str | None = None) -> subprocess.CompletedProcess[str]:
+    """Execute etcdctl and return the completed process.
+
+    Raises:
+        RollingOpsEtcdNotConfiguredError: if etcdctl is not configured.
+        PebbleConnectionError: if the remote container cannot be reached.
+        RollingOpsEtcdctlRetryableError: for transient command failures.
+        RollingOpsEtcdctlFatalError: for non-retryable command failures.
+    """
+    ensure_initialized()
+
+    cmd = [ETCDCTL_CMD, *args]
+
+    try:
+        res = subprocess.run(
+            cmd,
+            env=load_env(),
+            input=cmd_input,
+            text=True,
+            capture_output=True,
+            check=False,
+            timeout=ETCDCTL_TIMEOUT_SECONDS,
+        )
+    except subprocess.TimeoutExpired as e:
+        logger.warning(
+            'Timed out running etcdctl: cmd=%r stdout=%r stderr=%r', cmd, e.stdout, e.stderr
+        )
+        raise RollingOpsEtcdctlRetryableError(f'Timed out running etcdctl: {cmd!r}') from e
+    except FileNotFoundError as e:
+        logger.exception('etcdctl executable not found: %s', ETCDCTL_CMD)
+        raise RollingOpsEtcdctlFatalError(f'etcdctl executable not found: {ETCDCTL_CMD}') from e
+    except OSError as e:
+        logger.exception('Failed to execute etcdctl: cmd=%r', cmd)
+        raise RollingOpsEtcdctlFatalError(f'Failed to execute etcdctl: {cmd!r}') from e
+
+    if res.returncode != 0:
+        logger.warning(
+            'etcdctl command failed: cmd=%r returncode=%s stdout=%r stderr=%r',
+            cmd,
+            res.returncode,
+            res.stdout,
+            res.stderr,
+        )
+        if _is_retryable_stderr(res.stderr):
+            raise RollingOpsEtcdctlRetryableError(
+                f'Retryable etcdctl failure (rc={res.returncode}): {res.stderr.strip()}'
+            )
+        raise RollingOpsEtcdctlFatalError(
+            f'etcdctl failed (rc={res.returncode}): {res.stderr.strip()}'
+        )
+
+    logger.debug('etcdctl command succeeded: cmd=%r stdout=%r', cmd, res.stdout)
+    return res
+
+
+def run(*args: str, cmd_input: str | None = None) -> str:
     """Execute an etcdctl command.
 
     Args:
         args: List of arguments to pass to etcdctl.
+        cmd_input: value to use as input when running the command.
 
     Returns:
         The stdout of the command, stripped, or None if execution failed.
 
     Raises:
-        RollingOpsEtcdNotConfiguredError: if the etcd config file does not exist.
+        RollingOpsEtcdNotConfiguredError: if etcdctl is not configured.
+        RollingOpsFileSystemError: if configuration cannot be read.
         PebbleConnectionError: if the remote container cannot be reached.
+        RollingOpsEtcdctlError: etcdctl command error.
     """
-    ensure_initialized()
-    cmd = [ETCDCTL_CMD, *args]
+    return _run_checked(*args, cmd_input=cmd_input).stdout.strip()
 
-    try:
-        result = subprocess.run(
-            cmd, env=load_env(), check=True, text=True, capture_output=True
-        ).stdout.strip()
-    except subprocess.CalledProcessError as e:
-        logger.error('etcdctl command failed: returncode: %s, error: %s', e.returncode, e.stderr)
-        return None
-    except subprocess.TimeoutExpired as e:
-        logger.error('Timed out running etcdctl: %s', e.stderr)
+
+def _get_key_value_pair(key_prefix: str, *extra_args: str) -> EtcdKV | None:
+    """Retrieve the first key and value under a given prefix.
+
+    Args:
+        key_prefix: Key prefix to search for.
+        extra_args: Arguments to the get command
+
+    Returns:
+        A EtcdKV containing:
+        - The key string
+        - The parsed JSON value as a dictionary
+
+        Returns None if no key exists.
+
+    Raises:
+        RollingOpsEtcdctlParseError: if the output is malformed
+
+    """
+    res = run('get', key_prefix, '--prefix', *extra_args)
+    out = res.splitlines()
+    if len(out) < 2:
         return None
 
-    return result
+    try:
+        value = json.loads(out[1])
+    except json.JSONDecodeError as e:
+        raise RollingOpsEtcdctlParseError(
+            f'Failed to parse JSON value for key {out[0]}: {out[1]}'
+        ) from e
+
+    return EtcdKV(key=out[0], value=value)
+
+
+def get_first_key_value_pair(key_prefix: str) -> EtcdKV | None:
+    """Retrieve the first key and value under a given prefix.
+
+    Args:
+        key_prefix: Key prefix to search for.
+
+    Returns:
+        A tuple containing:
+        - The key string
+        - The parsed JSON value as a dictionary
+
+        Returns None if no key exists or the command fails.
+
+    Raises:
+        RollingOpsEtcdctlParseError: if the output is malformed
+    """
+    return _get_key_value_pair(key_prefix, '--limit=1')
+
+
+def get_last_key_value_pair(key_prefix: str) -> EtcdKV | None:
+    """Retrieve the last key and value under a given prefix.
+
+    Args:
+        key_prefix: Key prefix to search for.
+
+    Returns:
+        A tuple containing:
+        - The key string
+        - The parsed JSON value as a dictionary
+
+        Returns None if no key exists or the command fails.
+
+    Raises:
+        RollingOpsEtcdctlParseError: if the output is malformed
+    """
+    return _get_key_value_pair(
+        key_prefix,
+        '--sort-by=KEY',
+        '--order=DESCEND',
+        '--limit=1',
+    )
+
+
+def txn(txn_input: str) -> bool:
+    """Execute an etcd transaction.
+
+    The transaction string should follow the etcdctl transaction format
+    where comparison statements are followed by operations.
+
+    Args:
+        txn_input: The transaction specification passed to `etcdctl txn`.
+
+    Returns:
+        True if the transaction succeeded, otherwise False.
+
+    Raises:
+        RollingOpsEtcdNotConfiguredError: if etcdctl is not configured.
+        PebbleConnectionError: if the remote container cannot be reached.
+        RollingOpsEtcdctlError: etcdctl command error.
+        RollingOpsEtcdctlParseError: if invalid response is found
+    """
+    res = _run_checked('txn', cmd_input=txn_input)
+
+    lines = res.stdout.splitlines()
+    if not lines:
+        raise RollingOpsEtcdctlParseError('Empty txn response')
+
+    first_line = lines[0].strip()
+
+    if first_line == 'SUCCESS':
+        return True
+    if first_line == 'FAILURE':
+        return False
+
+    raise RollingOpsEtcdctlParseError(f'Unexpected txn response: {res.stdout}')
diff --git a/rollingops/src/charmlibs/rollingops/_models.py b/rollingops/src/charmlibs/rollingops/etcd/_models.py
similarity index 81%
rename from rollingops/src/charmlibs/rollingops/_models.py
rename to rollingops/src/charmlibs/rollingops/etcd/_models.py
index 5e653bc2e..fa1daaa08 100644
--- a/rollingops/src/charmlibs/rollingops/_models.py
+++ b/rollingops/src/charmlibs/rollingops/etcd/_models.py
@@ -14,74 +14,17 @@
 
 """etcd rolling ops models."""
 
-from collections.abc import Callable
 from dataclasses import dataclass
-from enum import StrEnum
-from typing import ClassVar, TypeVar
-
-from ops import pebble
-from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
+from typing import ClassVar
 
 from charmlibs.interfaces.tls_certificates import Certificate, PrivateKey
-from charmlibs.pathops import LocalPath, PebbleConnectionError
-
-T = TypeVar('T')
-
-
-class RollingOpsNoEtcdRelationError(Exception):
-    """Raised if we are trying to process a lock, but do not appear to have a relation yet."""
-
-
-class RollingOpsEtcdUnreachableError(Exception):
-    """Raised if etcd server is unreachable."""
-
-
-class RollingOpsEtcdNotConfiguredError(Exception):
-    """Raised if etcd client has not been configured yet (env file does not exist)."""
-
-
-class RollingOpsFileSystemError(Exception):
-    """Raised if there is a problem when interacting with the filesystem."""
-
-
-class RollingOpsInvalidLockRequestError(Exception):
-    """Raised if the lock request is invalid."""
-
-
-class RollingOpsDecodingError(Exception):
-    """Raised if json content cannot be processed."""
-
-
-class RollingOpsInvalidSecretContentError(Exception):
-    """Raised if the content of a secret is invalid."""
-
-
-class RollingOpsCharmLibMissingError(Exception):
-    """Raised if the path to the libraries cannot be resolved."""
-
+from charmlibs.pathops import LocalPath
+from charmlibs.rollingops.common._utils import with_pebble_retry
 
 CERT_MODE = 0o644
 KEY_MODE = 0o600
 
 
-@retry(
-    retry=retry_if_exception_type((PebbleConnectionError, pebble.APIError, pebble.ChangeError)),
-    stop=stop_after_attempt(3),
-    wait=wait_fixed(10),
-    reraise=True,
-)
-def with_pebble_retry[T](func: Callable[[], T]) -> T:
-    return func()
-
-
-class OperationResult(StrEnum):
-    """Callback return values."""
-
-    RELEASE = 'release'
-    RETRY_RELEASE = 'retry-release'
-    RETRY_HOLD = 'retry-hold'
-
-
 @dataclass(frozen=True)
 class SharedCertificate:
     """Represent the certificates shared within units of an app to connect to etcd."""
@@ -207,6 +150,14 @@ class EtcdConfig:
     key_path: str
 
 
+@dataclass
+class EtcdKV:
+    """A single etcd key-value entry."""
+
+    key: str
+    value: dict[str, str]
+
+
 @dataclass(frozen=True)
 class RollingOpsKeys:
     """Collection of etcd key prefixes used for rolling operations.
diff --git a/rollingops/src/charmlibs/rollingops/_relations.py b/rollingops/src/charmlibs/rollingops/etcd/_relations.py
similarity index 96%
rename from rollingops/src/charmlibs/rollingops/_relations.py
rename to rollingops/src/charmlibs/rollingops/etcd/_relations.py
index 7189f1ef1..8d37f92d8 100644
--- a/rollingops/src/charmlibs/rollingops/_relations.py
+++ b/rollingops/src/charmlibs/rollingops/etcd/_relations.py
@@ -32,9 +32,10 @@
 from ops.framework import Object
 
 from charmlibs.interfaces.tls_certificates import Certificate, TLSCertificatesError
-from charmlibs.rollingops import _certificates as certificates
-from charmlibs.rollingops import _etcdctl as etcdctl
-from charmlibs.rollingops._models import RollingOpsInvalidSecretContentError, SharedCertificate
+from charmlibs.rollingops.common._exceptions import RollingOpsInvalidSecretContentError
+from charmlibs.rollingops.etcd import _certificates as certificates
+from charmlibs.rollingops.etcd import _etcdctl as etcdctl
+from charmlibs.rollingops.etcd._models import SharedCertificate
 
 logger = logging.getLogger(__name__)
 CERT_SECRET_FIELD = 'rollingops-client-secret-id'  # noqa: S105
@@ -111,8 +112,7 @@ def create_and_share_certificate(self) -> None:
             )
             return
 
-        common_name = f'rollingops-{self.model.uuid}-{self.model.app.name}'
-        shared = certificates.generate(common_name)
+        shared = certificates.generate(self.model.uuid, self.model.app.name)
 
         secret = self.model.app.add_secret(
             content={
diff --git a/rollingops/src/charmlibs/rollingops/etcd/_rollingops.py b/rollingops/src/charmlibs/rollingops/etcd/_rollingops.py
new file mode 100644
index 000000000..9d45b183a
--- /dev/null
+++ b/rollingops/src/charmlibs/rollingops/etcd/_rollingops.py
@@ -0,0 +1,181 @@
+# Copyright 2026 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import logging
+import time
+
+from charmlibs.rollingops.common._models import OperationResult
+from charmlibs.rollingops.common._utils import (
+    ETCD_FAILED_HOOK_NAME,
+    dispatch_etcd_failed,
+    dispatch_lock_granted,
+    setup_logging,
+)
+from charmlibs.rollingops.etcd._etcd import (
+    EtcdLease,
+    EtcdLock,
+    WorkerOperationStore,
+)
+from charmlibs.rollingops.etcd._models import RollingOpsKeys
+from charmlibs.rollingops.etcd._worker import ETCD_LOG_FILENAME
+
+logger = logging.getLogger(__name__)
+
+INITIAL_SLEEP = 10  # Delay before the worker begins processing.
+LOCK_ACQUIRE_SLEEP = 15  # Delay between etcd lock acquisition attempts.
+NEXT_OP_SLEEP = 30  # Delay between queue polls when idle.
+
+
+class RollingOpsEtcdInconsistencyError(Exception):
+    """Raised when unexpected or inconsistent etcd operation state is found."""
+
+
+def main():
+    """Run the etcd rolling-ops worker loop.
+
+    This worker is responsible for processing the current unit's
+    etcd-backed operation queue. It waits for pending work, acquires the
+    etcd lock, claims the next operation, dispatches the lock-granted
+    hook, and then waits for the operation result to be written back.
+
+    Processing behavior depends on the final operation result:
+
+    - `RETRY_HOLD`: requeue the operation immediately and keep the lock
+    - `RETRY_RELEASE`: requeue the operation and release the lock
+    - any other result: remove the completed operation and release the lock
+
+    If the worker detects invalid etcd queue state or encounters an
+    unrecoverable error, it dispatches the ETCD_FAILED_HOOK_NAME
+    hook so the charm can fall back to peer-based processing.
+
+    The worker always attempts to revoke its lease and release the lock
+    before exiting.
+    """
+    parser = argparse.ArgumentParser(description='RollingOps etcd worker')
+    parser.add_argument(
+        '--unit-name',
+        type=str,
+        required=True,
+        help='Juju unit name (e.g. app/0)',
+    )
+    parser.add_argument(
+        '--charm-dir',
+        type=str,
+        required=True,
+        help='Path to the charm directory',
+    )
+
+    parser.add_argument(
+        '--owner',
+        type=str,
+        required=True,
+        help='Unique owner identifier for the unit',
+    )
+    parser.add_argument(
+        '--cluster-id',
+        type=str,
+        required=True,
+        help='Cluster identifier',
+    )
+    args = parser.parse_args()
+
+    setup_logging(
+        ETCD_LOG_FILENAME, unit_name=args.unit_name, owner=args.owner, cluster_id=args.cluster_id
+    )
+    logger.info('Starting worker.')
+
+    time.sleep(INITIAL_SLEEP)
+
+    keys = RollingOpsKeys.for_owner(args.cluster_id, args.owner)
+    lock = EtcdLock(keys.lock_key, args.owner)
+    lease = EtcdLease()
+    operations = WorkerOperationStore(keys, args.owner)
+
+    try:
+        while True:
+            if operations.has_inprogress() or operations.has_completed():
+                raise RollingOpsEtcdInconsistencyError('Invalid operations found in etcd queues.')
+
+            if not operations.has_pending():
+                time.sleep(NEXT_OP_SLEEP)
+                continue
+
+            logger.info('Operation found in the pending queue.')
+
+            if not lock.is_held():
+                if lease.id is None:
+                    lease.grant()
+
+                if lease.id is None:
+                    raise RollingOpsEtcdInconsistencyError('Invalid lease ID found.')
+
+                logger.info('Try to get lock using lease %s.', lease.id)
+                while not lock.try_acquire(lease.id):
+                    time.sleep(LOCK_ACQUIRE_SLEEP)
+                    continue
+            logger.info('Lock granted using lease %s.', lease.id)
+
+            op_id = operations.claim_next()
+
+            dispatch_lock_granted(args.unit_name, args.charm_dir)
+
+            logger.info('Waiting for operation %s to be finished.', op_id)
+            operation = operations.wait_until_completed()
+
+            logger.info('Operation %s completed with %s', operation.op_id, operation.result)
+            match operation.result:
+                case OperationResult.RETRY_HOLD:
+                    operations.requeue_completed()
+                    continue
+
+                case OperationResult.RETRY_RELEASE:
+                    operations.requeue_completed()
+
+                case _:
+                    operations.delete_completed()
+
+            lease_id = lease.id
+            lease.revoke()
+            lock.release()
+            logger.info('Lease %s revoked and lock released.', lease_id)
+            time.sleep(NEXT_OP_SLEEP)
+
+    except Exception as e:
+        logger.exception('Fatal etcd worker error: %s', e)
+
+        try:
+            dispatch_etcd_failed(args.unit_name, args.charm_dir)
+        except Exception:
+            logger.exception('Failed to dispatch %s hook.', ETCD_FAILED_HOOK_NAME)
+
+    finally:
+        lease_id = lease.id
+        try:
+            lease.revoke()
+            logger.info('Lease %s revoked.', lease_id)
+        except Exception:
+            logger.exception('Failed to revoke lease %s during worker shutdown.', lease_id)
+
+        try:
+            lock.release()
+            logger.info('Lock released.')
+        except Exception:
+            logger.exception('Failed to release lock during worker shutdown.')
+
+        logger.info('Exit.')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/rollingops/src/charmlibs/rollingops/etcd/_worker.py b/rollingops/src/charmlibs/rollingops/etcd/_worker.py
new file mode 100644
index 000000000..8240b4378
--- /dev/null
+++ b/rollingops/src/charmlibs/rollingops/etcd/_worker.py
@@ -0,0 +1,124 @@
+# Copyright 2026 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""etcd rolling ops. Spawns and manages the external rolling-ops worker process."""
+
+import logging
+
+from ops.charm import CharmBase
+
+from charmlibs import pathops
+from charmlibs.rollingops.common._base_worker import BaseRollingOpsAsyncWorker
+
+logger = logging.getLogger(__name__)
+
+ETCD_LOG_FILENAME = '/var/log/etcd_rollingops_worker.log'
+
+
+class EtcdRollingOpsAsyncWorker(BaseRollingOpsAsyncWorker):
+    """Manage the etcd-backed rolling-ops worker process.
+
+    Unlike the peer backend, each unit runs its own worker process when
+    using the etcd backend. Worker PID is stored in the unit databag,
+    ensuring isolation between units and allowing each unit to independently
+    manage its own worker lifecycle.
+    """
+
+    _pid_field = 'etcd-rollingops-worker-pid'
+    _log_filename = ETCD_LOG_FILENAME
+
+    def __init__(self, charm: CharmBase, peer_relation_name: str, owner: str, cluster_id: str):
+        super().__init__(charm, 'etcd-rollingops-async-worker', peer_relation_name)
+        self._owner = owner
+        self._cluster_id = cluster_id
+
+    def _worker_script_path(self) -> pathops.LocalPath:
+        """Return the path to the etcd rolling-ops worker script.
+
+        This script is executed in a background process to handle operation
+        processing for the etcd backend.
+        """
+        return pathops.LocalPath(
+            self._venv_site_packages() / 'charmlibs' / 'rollingops' / 'etcd' / '_rollingops.py'
+        )
+
+    def _worker_args(self) -> list[str]:
+        """Return the arguments passed to the etcd worker process.
+
+        Returns:
+            A list of command-line arguments for the worker process.
+        """
+        return [
+            '--owner',
+            self._owner,
+            '--cluster-id',
+            self._cluster_id,
+        ]
+
+    @property
+    def _pid(self) -> int | None:
+        """Return the stored worker process PID for this unit.
+
+        The PID is stored in the unit databag because each unit runs its own
+        independent worker process when using the etcd backend. This ensures
+        that worker lifecycle management is isolated per unit.
+
+        Returns:
+            The worker process PID, or None if not set.
+        """
+        if self._relation is None:
+            return None
+        pid = self._relation.data[self.model.unit].get(self._pid_field, '')
+
+        try:
+            pid = int(pid)
+        except (ValueError, TypeError):
+            logger.info('Missing PID or invalid PID found in etcd worker state.')
+            pid = None
+
+        return pid
+
+    @_pid.setter
+    def _pid(self, value: int | None) -> None:
+        """Persist the worker process PID in the unit databag.
+
+        The PID is stored per unit to reflect that each unit owns and manages
+        its own worker process when using the etcd backend.
+
+        Args:
+            value: The process identifier to store.
+        """
+        if self._relation is None:
+            return
+        self._relation.data[self.model.unit].update({
+            self._pid_field: '' if value is None else str(value)
+        })
+
+    def _on_existing_worker(self, pid: int) -> bool:
+        """Executed on detection of an already running worker for this unit.
+
+        Since each unit manages its own worker process, an existing worker is
+        considered valid and is left running. No restart is performed.
+
+        Args:
+            pid: The PID of the currently running worker.
+
+        Returns:
+            False to indicate that no new worker should be started.
+        """
+        logger.info(
+            'RollingOps worker already running with PID %s; not starting a new one.',
+            pid,
+        )
+        return False
diff --git a/rollingops/src/charmlibs/rollingops/peer/__init__.py b/rollingops/src/charmlibs/rollingops/peer/__init__.py
new file mode 100644
index 000000000..c75a6c654
--- /dev/null
+++ b/rollingops/src/charmlibs/rollingops/peer/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2026 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Rollingops for charms using peer relations."""
diff --git a/rollingops/src/charmlibs/rollingops/_peer_manager.py b/rollingops/src/charmlibs/rollingops/peer/_backend.py
similarity index 57%
rename from rollingops/src/charmlibs/rollingops/_peer_manager.py
rename to rollingops/src/charmlibs/rollingops/peer/_backend.py
index e75bdcb78..310285c95 100644
--- a/rollingops/src/charmlibs/rollingops/_peer_manager.py
+++ b/rollingops/src/charmlibs/rollingops/peer/_backend.py
@@ -86,19 +86,19 @@
     interface: rolling_op
 ```
 
-Import this library into src/charm.py, and initialize a PeerRollingOpsManager in the Charm's
+Import this library into src/charm.py, and initialize a PeerRollingOpsBackend in the Charm's
 `__init__`. The Charm should also define a callback routine, which will be executed when
 a unit holds the distributed lock:
 
 src/charm.py
 ```python
-from charms.rolling_ops.v1.rollingops import PeerRollingOpsManager, OperationResult
+from charms.rolling_ops.v1.rollingops import PeerRollingOpsBackend, OperationResult
 
 class SomeCharm(CharmBase):
     def __init__(self, *args):
         super().__init__(*args)
 
-        self.rolling_ops = PeerRollingOpsManager(
+        self.rolling_ops = PeerRollingOpsBackend(
             charm=self,
             relation_name="restart",
             callback_targets={
@@ -152,47 +152,66 @@ def _on_restart_action(self, event) -> None:
 from collections.abc import Callable
 from typing import Any
 
-from ops import Relation
+from ops import Object, Relation, Unit
 from ops.charm import (
     CharmBase,
     RelationChangedEvent,
     RelationDepartedEvent,
 )
-from ops.framework import EventBase, Object
+from ops.framework import EventBase
 
-from charmlibs.rollingops._peer_models import (
-    Lock,
-    LockIterator,
-    OperationResult,
+from charmlibs.rollingops.common._exceptions import (
     RollingOpsDecodingError,
     RollingOpsInvalidLockRequestError,
     RollingOpsNoRelationError,
+)
+from charmlibs.rollingops.common._models import (
+    Operation,
+    OperationResult,
+    RollingOpsStatus,
+    RunWithLockOutcome,
+    RunWithLockStatus,
+)
+from charmlibs.rollingops.peer._models import (
+    PeerAppLock,
+    PeerUnitOperations,
+    iter_peer_units,
     pick_oldest_completed,
     pick_oldest_request,
 )
-from charmlibs.rollingops._peer_worker import PeerRollingOpsAsyncWorker
+from charmlibs.rollingops.peer._worker import PeerRollingOpsAsyncWorker
 
 logger = logging.getLogger(__name__)
 
 
-class PeerRollingOpsManager(Object):
-    """Emitters and handlers for rolling ops."""
+class PeerRollingOpsBackend(Object):
+    """Manage rolling operations using the peer-relation backend.
+
+    This backend stores operation queues in the peer relation and relies
+    on the leader unit to schedule lock grants across units. Once a unit
+    is granted the lock, it executes its queued operation locally.
+
+    The peer backend acts as both the primary backend when etcd is not
+    available and as the durable fallback state used to continue
+    processing when etcd-backed execution fails.
+    """
 
     def __init__(
         self, charm: CharmBase, relation_name: str, callback_targets: dict[str, Callable[..., Any]]
     ):
-        """Register our custom events.
+        """Initialize the peer-backed rolling-ops backend.
 
-        params:
-            charm: the charm we are attaching this to.
-            relation_name: the peer relation name from metadata.yaml.
-            callback_targets: mapping from callback_id -> callable.
+        Args:
+            charm: The charm instance owning this backend.
+            relation_name: Name of the peer relation used to store lock and
+                operation state.
+            callback_targets: Mapping from callback identifiers to callables
+                executed when this unit is granted the lock.
         """
         super().__init__(charm, 'peer-rolling-ops-manager')
         self._charm = charm
         self.relation_name = relation_name
         self.callback_targets = callback_targets
-        self.charm_dir = charm.charm_dir
         self.worker = PeerRollingOpsAsyncWorker(charm, relation_name=relation_name)
 
         self.framework.observe(
@@ -202,13 +221,64 @@ def __init__(
             charm.on[self.relation_name].relation_departed, self._on_relation_departed
         )
         self.framework.observe(charm.on.leader_elected, self._process_locks)
-        self.framework.observe(charm.on.update_status, self._on_rollingops_lock_granted)
 
     @property
     def _relation(self) -> Relation | None:
-        """Returns the peer relation used to manage locks."""
+        """Return the peer relation used for lock and operation state."""
         return self.model.get_relation(self.relation_name)
 
+    def _lock(self) -> PeerAppLock:
+        """Return the shared application-level peer lock.
+
+        This lock is stored in the peer relation application databag and is
+        used by the leader to grant execution rights to one unit at a time.
+        """
+        return PeerAppLock(self.model, self.relation_name)
+
+    def _operations(self, unit: Unit) -> PeerUnitOperations:
+        """Return the peer-backed operation queue for a unit.
+
+        Args:
+            unit: The unit whose operation queue should be accessed.
+
+        Returns:
+            A helper for reading and updating that unit's queued operations.
+        """
+        return PeerUnitOperations(self.model, self.relation_name, unit)
+
+    def enqueue_operation(self, operation: Operation) -> None:
+        """Persist an operation in the current unit's peer-backed queue.
+
+        Args:
+            operation: The operation to enqueue.
+
+        Raises:
+            RollingOpsInvalidLockRequestError: If the operation could not be
+                persisted due to invalid or undecodable queue state.
+            RollingOpsNoRelationError: If the peer relation is not available.
+        """
+        try:
+            self._operations(self.model.unit).request(operation)
+        except (RollingOpsDecodingError, ValueError) as e:
+            logger.error('Failed to create operation: %s', e)
+            raise RollingOpsInvalidLockRequestError('Failed to create the lock request') from e
+        except RollingOpsNoRelationError as e:
+            logger.debug('No %s peer relation yet.', self.relation_name)
+            raise e
+
+    def ensure_processing(self) -> None:
+        """Trigger peer-based scheduling if the current unit is leader.
+
+        In the peer backend, scheduling decisions are made only by the
+        leader unit. Non-leader units do not actively process locks.
+        """
+        if self.model.unit.is_leader():
+            self._process_locks()
+
+    def has_pending_work(self) -> bool:
+        """Return whether the current unit has pending peer-managed work."""
+        return self._operations(self.model.unit).has_pending_work()
+
     def _on_rollingops_lock_granted(self, event: EventBase) -> None:
         """Handler of the custom hook rollingops_lock_granted.
 
@@ -216,11 +286,11 @@ def _on_rollingops_lock_granted(self, event: EventBase) -> None:
         """
         if not self._relation:
             return
-        logger.info('Received a rolling-ops lock granted event.')
-        lock = Lock(self.model, self.relation_name, self.model.unit)
-        if lock.should_run():
+        lock = self._lock()
+        operations = self._operations(self.model.unit)
+        if operations.should_run(lock):
             self._on_run_with_lock()
-            self._process_locks()
+        self._process_locks()
 
     def _on_relation_departed(self, event: RelationDepartedEvent) -> None:
         """Leader cleanup: if a departing unit was granted a lock, clear the grant.
@@ -230,19 +300,25 @@ def _on_relation_departed(self, event: RelationDepartedEvent) -> None:
         if not self.model.unit.is_leader():
             return
         if unit := event.departing_unit:
-            lock = Lock(self.model, self.relation_name, unit)
-            if lock.is_granted():
+            lock = self._lock()
+            if lock.is_granted(unit.name):
                 lock.release()
-                self._process_locks()
+        self._process_locks()
 
     def _on_relation_changed(self, _: RelationChangedEvent) -> None:
-        """Process relation changed."""
+        """React to peer relation changes.
+
+        The leader re-runs scheduling whenever peer relation state changes.
+        Non-leader units only check whether they should execute an operation
+        that has already been granted to them.
+        """
         if self.model.unit.is_leader():
             self._process_locks()
             return
 
-        lock = Lock(self.model, self.relation_name, self.model.unit)
-        if lock.should_run():
+        lock = self._lock()
+        operations = self._operations(self.model.unit)
+        if operations.should_run(lock):
             self._on_run_with_lock()
 
     def _valid_peer_unit_names(self) -> set[str]:
@@ -258,7 +334,9 @@ def _release_stale_grant(self) -> None:
         if not self._relation:
             return
 
-        if not (granted_unit := self._relation.data[self.model.app].get('granted_unit', '')):
+        lock = self._lock()
+        granted_unit = lock.granted_unit
+        if not granted_unit:
             return
 
         valid_units = self._valid_peer_unit_names()
@@ -267,7 +345,7 @@ def _release_stale_grant(self) -> None:
                 'granted_unit=%s is not in current peer units; releasing stale grant.',
                 granted_unit,
             )
-            self._relation.data[self.model.app].update({'granted_unit': '', 'granted_at': ''})
+            lock.release()
 
     def _process_locks(self, _: EventBase | None = None) -> None:
         """Process locks.
@@ -278,21 +356,28 @@ def _process_locks(self, _: EventBase | None = None) -> None:
         if not self.model.unit.is_leader():
             return
 
-        for lock in LockIterator(self.model, self.relation_name):
-            if lock.should_release():
+        lock = self._lock()
+
+        for unit in iter_peer_units(self.model, self.relation_name):
+            operations = self._operations(unit)
+            if not operations.is_peer_managed():
+                continue
+            if operations.should_release(lock):
                 lock.release()
                 break
 
         self._release_stale_grant()
-        granted_unit = self._relation.data[self.model.app].get('granted_unit', '')  # type: ignore[reportOptionalMemberAccess]
 
-        if granted_unit:
-            logger.info('Current granted_unit=%s. No new unit will be scheduled.', granted_unit)
+        if lock.granted_unit:
+            logger.info(
+                'Current granted_unit=%s. No new unit will be scheduled.',
+                lock.granted_unit,
+            )
             return
 
-        self._schedule()
+        self._schedule(lock)
 
-    def _schedule(self) -> None:
+    def _schedule(self, lock: PeerAppLock) -> None:
         """Select and grant the next lock based on priority and queue state.
 
         This method iterates over all locks associated with the relation and
@@ -309,17 +394,23 @@ def _schedule(self) -> None:
         """
         logger.info('Starting scheduling.')
 
-        pending_requests: list[Lock] = []
-        pending_retries: list[Lock] = []
+        pending_requests: list[PeerUnitOperations] = []
+        pending_retries: list[PeerUnitOperations] = []
 
-        for lock in LockIterator(self.model, self.relation_name):
-            if lock.is_retry_hold():
-                self._grant_lock(lock)
+        for unit in iter_peer_units(self.model, self.relation_name):
+            operations = self._operations(unit)
+
+            if not operations.is_peer_managed():
+                continue
+
+            if operations.is_retry_hold():
+                self._grant_lock(lock, operations.unit.name)
                 return
-            if lock.is_waiting():
-                pending_requests.append(lock)
-            elif lock.is_waiting_retry():
-                pending_retries.append(lock)
+
+            if operations.is_waiting():
+                pending_requests.append(operations)
+            elif operations.is_waiting_retry():
+                pending_retries.append(operations)
 
         selected = None
         if pending_requests:
@@ -327,30 +418,28 @@ def _schedule(self) -> None:
         elif pending_retries:
             selected = pick_oldest_completed(pending_retries)
 
-        if not selected:
+        if selected is None:
             logger.info('No pending lock requests. Lock was not granted to any unit.')
             return
 
-        self._grant_lock(selected)
+        self._grant_lock(lock, selected)
 
-    def _grant_lock(self, selected: Lock) -> None:
+    def _grant_lock(self, lock: PeerAppLock, unit_name: str) -> None:
         """Grant the lock to the selected unit.
 
-        If the lock is granted to the leader unit:
-            - If it is a retry, starts the worker to break the loop before next execution.
-            - Otherwise, the callback is run immediately
+        Once the lock is granted, the selected unit becomes eligible to
+        execute its next queued operation. If the selected unit is the local
+        unit (leader), its worker process is started to trigger execution.
 
         Args:
-            selected: The lock instance to grant.
+            lock: The peer lock instance to grant.
+            unit_name: Name of the unit receiving the lock grant.
         """
-        selected.grant()
-        logger.info('Lock granted to unit=%s.', selected.unit.name)
-        if selected.unit == self.model.unit:
-            if selected.is_retry():
-                self.worker.start()
-                return
-            self._on_run_with_lock()
-            self._process_locks()
+        lock.grant(unit_name)
+        logger.info('Lock granted to unit=%s.', unit_name)
+
+        if unit_name == self.model.unit.name:
+            self.worker.start()
 
     def request_async_lock(
         self,
@@ -386,11 +475,12 @@ def request_async_lock(
         try:
             if kwargs is None:
                 kwargs = {}
-            lock = Lock(self.model, self.relation_name, self.model.unit)
-            lock.request(callback_id, kwargs, max_retry)
+            operation = Operation.create(callback_id, kwargs, max_retry)
+            operations = self._operations(self.model.unit)
+            operations.request(operation)
 
         except (RollingOpsDecodingError, ValueError) as e:
-            logger.error('Failed operation: %s', e)
+            logger.error('Failed to create operation: %s', e)
             raise RollingOpsInvalidLockRequestError('Failed to create the lock request') from e
         except RollingOpsNoRelationError as e:
             logger.debug('No %s peer relation yet.', self.relation_name)
@@ -407,22 +497,24 @@ def _on_run_with_lock(self) -> None:
         - Otherwise, the operation's callback is looked up by `callback_id` and
             invoked with the operation kwargs.
         """
-        lock = Lock(self.model, self.relation_name, self.model.unit)
+        lock = self._lock()
+        operations = self._operations(self.model.unit)
 
-        if not lock.is_granted():
+        if not lock.is_granted(self.model.unit.name):
             logger.debug('Lock is not granted. Operation will not run.')
             return
 
-        if not (operation := lock.get_current_operation()):
+        if not (operation := operations.get_current()):
             logger.debug('There is no operation to run.')
-            lock.complete()
+            operations.finish(OperationResult.RELEASE)
             return
 
         if not (callback := self.callback_targets.get(operation.callback_id)):
-            logger.warning(
-                'Operation %s target was not found. It cannot be executed.',
+            logger.error(
+                'Operation %s target was not found. Releasing operation without retry.',
                 operation.callback_id,
             )
+            operations.finish(OperationResult.RELEASE)
             return
         logger.info(
             'Executing callback_id=%s, attempt=%s', operation.callback_id, operation.attempt
@@ -433,17 +525,70 @@ def _on_run_with_lock(self) -> None:
             logger.exception('Operation failed: %s: %s', operation.callback_id, e)
             result = OperationResult.RETRY_RELEASE
 
-        match result:
-            case OperationResult.RETRY_HOLD:
-                logger.info(
-                    'Finished %s. Operation will be retried immediately.', operation.callback_id
-                )
-                lock.retry_hold()
+        logger.info('Operation %s executed with result %s.', operation.callback_id, result)
+        operations.finish(result)
+
+    def mirror_outcome(self, outcome: RunWithLockOutcome) -> None:
+        """Apply the execution result to the mirrored peer queue.
 
-            case OperationResult.RETRY_RELEASE:
-                logger.info('Finished %s. Operation will be retried later.', operation.callback_id)
-                lock.retry_release()
+        This keeps the peer standby queue aligned with the backend that
+        actually executed the operation.
+
+        Args:
+            outcome: The etcd execution outcome to mirror.
 
+        Raises:
+            RollingOpsDecodingError: If theres is an inconsistency found.
+        """
+        match outcome.status:
+            case RunWithLockStatus.NOT_GRANTED:
+                logger.info('Skipping mirror: etcd lock was not granted.')
+                return
+
+            case RunWithLockStatus.NO_OPERATION:
+                if not self._operations(self.model.unit).has_pending_work():
+                    logger.info('Skipping mirror: no operation.')
+                    return
+                raise RollingOpsDecodingError(
+                    'Mismatch between the etcd and peer operation queue.'
+                )
+
+            case (
+                RunWithLockStatus.MISSING_CALLBACK
+                | RunWithLockStatus.EXECUTED
+                | RunWithLockStatus.EXECUTED_NOT_COMMITTED
+            ):
+                self._operations(self.model.unit).mirror_result(outcome.op_id, outcome.result)  # type: ignore[reportArgumentType]
             case _:
-                logger.info('Finished %s. Lock will be released.', operation.callback_id)
-                lock.complete()
+                raise RollingOpsDecodingError(
+                    f'Unsupported run-with-lock outcome: {outcome.status}'
+                )
+
+    def get_status(self) -> RollingOpsStatus:
+        """Return the current rolling-ops status for this unit in peer mode.
+
+        Status is derived from the local unit's peer-backed operation queue
+        and from the shared peer lock state.
+
+        Returned values:
+            - UNAVAILABLE: the peer relation does not exist
+            - GRANTED: the current unit holds the peer lock
+            - WAITING: the current unit has queued work but does not hold the lock
+            - IDLE: the current unit has no pending work
+
+        Returns:
+            The current rolling-ops status for this unit.
+        """
+        if self._relation is None:
+            return RollingOpsStatus.UNAVAILABLE
+
+        lock = self._lock()
+        operations = self._operations(self.model.unit)
+
+        if lock.is_granted(self.model.unit.name):
+            return RollingOpsStatus.GRANTED
+
+        if operations.has_pending_work():
+            return RollingOpsStatus.WAITING
+
+        return RollingOpsStatus.IDLE
diff --git a/rollingops/src/charmlibs/rollingops/peer/_models.py b/rollingops/src/charmlibs/rollingops/peer/_models.py
new file mode 100644
index 000000000..d11a3334d
--- /dev/null
+++ b/rollingops/src/charmlibs/rollingops/peer/_models.py
@@ -0,0 +1,386 @@
+# Copyright 2026 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Models for peer-relation rollingops."""
+
+import logging
+from collections.abc import Iterator
+from dataclasses import dataclass
+from datetime import datetime
+from enum import StrEnum
+
+from ops import Model, Unit
+
+from charmlibs.rollingops.common._exceptions import (
+    RollingOpsDecodingError,
+    RollingOpsNoRelationError,
+)
+from charmlibs.rollingops.common._models import (
+    Operation,
+    OperationQueue,
+    OperationResult,
+    UnitBackendState,
+)
+from charmlibs.rollingops.common._utils import datetime_to_str, now_timestamp, parse_timestamp
+
+logger = logging.getLogger(__name__)
+
+
+class LockIntent(StrEnum):
+    """Unit-level lock intents stored in unit databags."""
+
+    REQUEST = 'request'
+    RETRY_RELEASE = 'retry-release'
+    RETRY_HOLD = 'retry-hold'
+    IDLE = 'idle'
+
+
+@dataclass
+class PeerAppData:
+    """Application-scoped peer relation data."""
+
+    granted_unit: str = ''
+    granted_at: str = ''
+
+    @property
+    def granted_at_dt(self) -> datetime | None:
+        """Return the grant timestamp as a datetime, if present."""
+        return parse_timestamp(self.granted_at)
+
+    @granted_at_dt.setter
+    def granted_at_dt(self, value: datetime | None) -> None:
+        """Store the grant timestamp from a datetime."""
+        self.granted_at = datetime_to_str(value) if value is not None else ''
+
+
+@dataclass
+class PeerUnitData:
+    """Unit-scoped peer relation data."""
+
+    state: str = ''
+    operations: str = ''
+    executed_at: str = ''
+
+    @property
+    def intent(self) -> LockIntent:
+        """Return the unit state as a LockIntent."""
+        return LockIntent(self.state) if self.state else LockIntent.IDLE
+
+    @intent.setter
+    def intent(self, value: LockIntent) -> None:
+        """Store the unit state from a LockIntent."""
+        self.state = value
+
+    @property
+    def queue(self) -> OperationQueue:
+        """Return the stored operation queue."""
+        return OperationQueue.from_string(self.operations)
+
+    @queue.setter
+    def queue(self, value: OperationQueue) -> None:
+        """Store the operation queue."""
+        self.operations = value.to_string()
+
+    @property
+    def executed_at_dt(self) -> datetime | None:
+        """Return the execution timestamp as a datetime, if present."""
+        return parse_timestamp(self.executed_at)
+
+    @executed_at_dt.setter
+    def executed_at_dt(self, value: datetime | None) -> None:
+        """Store the execution timestamp from a datetime."""
+        self.executed_at = datetime_to_str(value) if value is not None else ''
+
+
+class PeerAppLock:
+    """Application-scoped distributed lock state."""
+
+    def __init__(self, model: Model, relation_name: str):
+        relation = model.get_relation(relation_name)
+        if relation is None:
+            raise RollingOpsNoRelationError()
+
+        self._relation = relation
+        self._app = model.app
+        self._app_data = self._relation.load(PeerAppData, self._app, decoder=lambda s: s)
+
+    def _save(self, data: PeerAppData) -> None:
+        self._relation.save(data, self._app, encoder=str)
+
+    @property
+    def granted_unit(self) -> str:
+        """Return the unit name currently holding the grant, if any."""
+        return self._app_data.granted_unit
+
+    @property
+    def granted_at(self) -> datetime | None:
+        """Return the timestamp when the grant was issued, if any."""
+        return self._app_data.granted_at_dt
+
+    def grant(self, unit_name: str) -> None:
+        """Grant the lock to the provided unit."""
+        self._app_data.granted_unit = unit_name
+        self._app_data.granted_at_dt = now_timestamp()
+        self._save(self._app_data)
+
+    def release(self) -> None:
+        """Clear the current grant."""
+        self._app_data.granted_unit = ''
+        self._app_data.granted_at_dt = None
+        self._save(self._app_data)
+
+    def is_granted(self, unit_name: str) -> bool:
+        """Return whether the provided unit currently holds the grant."""
+        return self.granted_unit == unit_name
+
+
+class PeerUnitOperations:
+    """Unit-scoped queued operations and execution state."""
+
+    def __init__(self, model: Model, relation_name: str, unit: Unit):
+        relation = model.get_relation(relation_name)
+        if relation is None:
+            raise RollingOpsNoRelationError()
+
+        self._relation = relation
+        self.unit = unit
+        self._backend_state = UnitBackendState(model, relation_name, unit)
+        self._unit_data = self._relation.load(PeerUnitData, self.unit, decoder=lambda s: s)
+
+    def _save(self, data: PeerUnitData) -> None:
+        self._relation.save(data, self.unit, encoder=str)
+
+    def is_peer_managed(self) -> bool:
+        """Return whether the peer backend should process this unit's queue."""
+        return self._backend_state.is_peer_managed()
+
+    @property
+    def intent(self) -> LockIntent:
+        """Return the current unit intent."""
+        return self._unit_data.intent
+
+    @property
+    def executed_at(self) -> datetime | None:
+        """Return the last execution timestamp for this unit."""
+        return self._unit_data.executed_at_dt
+
+    @property
+    def queue(self) -> OperationQueue:
+        return self._unit_data.queue
+
+    def get_current(self) -> Operation | None:
+        """Return the head operation, if any."""
+        return self._unit_data.queue.peek()
+
+    def has_pending_work(self) -> bool:
+        """Return whether this unit still has queued work."""
+        return self.get_current() is not None
+
+    def request(self, operation: Operation) -> None:
+        """Enqueue an operation and mark this unit as requesting the lock."""
+        data = self._unit_data
+        queue = data.queue
+
+        previous_length = len(queue)
+        queue.enqueue(operation)
+        added = len(queue) != previous_length
+        if not added:
+            logger.info(
+                'Operation %s not added to the peer queue. '
+                'It already exists in the back of the queue.',
+                operation.callback_id,
+            )
+            return
+
+        data.queue = queue
+        if len(queue) == 1:
+            data.intent = LockIntent.REQUEST
+        self._unit_data = data
+        self._save(data)
+        logger.info('Operation %s added to the peer queue.', operation.callback_id)
+
+    def finish(self, result: OperationResult) -> None:
+        """Persist the result of executing the current operation."""
+        self._apply_result_to_data(self._unit_data, result)
+        self._save(self._unit_data)
+
+    def _apply_result_to_data(
+        self,
+        data: PeerUnitData,
+        result: OperationResult,
+    ) -> None:
+        queue = data.queue
+        operation = queue.peek()
+
+        if operation is None:
+            data.intent = LockIntent.IDLE
+            data.executed_at_dt = now_timestamp()
+            return
+
+        match result:
+            case OperationResult.RETRY_HOLD:
+                queue.increase_attempt()
+                operation = queue.peek()
+                if operation is None or operation.is_max_retry_reached():
+                    logger.warning('Operation max retry reached. Dropping.')
+                    queue.dequeue()
+                    data.intent = LockIntent.REQUEST if queue.peek() else LockIntent.IDLE
+                else:
+                    data.intent = LockIntent.RETRY_HOLD
+
+            case OperationResult.RETRY_RELEASE:
+                queue.increase_attempt()
+                operation = queue.peek()
+                if operation is None or operation.is_max_retry_reached():
+                    logger.warning('Operation max retry reached. Dropping.')
+                    queue.dequeue()
+                    data.intent = LockIntent.REQUEST if queue.peek() else LockIntent.IDLE
+                else:
+                    data.intent = LockIntent.RETRY_RELEASE
+            case _:
+                queue.dequeue()
+                data.intent = LockIntent.REQUEST if queue.peek() else LockIntent.IDLE
+
+        data.queue = queue
+        data.executed_at_dt = now_timestamp()
+
+    def should_run(self, lock: PeerAppLock) -> bool:
+        """Return whether this unit should execute now."""
+        return (
+            self.is_peer_managed()
+            and lock.is_granted(self.unit.name)
+            and not self._executed_after_grant(lock)
+        )
+
+    def should_release(self, lock: PeerAppLock) -> bool:
+        """Return whether this unit should release the lock."""
+        return (self.is_peer_managed() and self.is_completed(lock)) or self._executed_after_grant(
+            lock
+        )
+
+    def is_waiting(self) -> bool:
+        """Return whether this unit is waiting for a fresh grant."""
+        return self.is_peer_managed() and self.intent == LockIntent.REQUEST
+
+    def is_waiting_retry(self) -> bool:
+        """Return whether this unit is waiting for a retry after releasing."""
+        return self.is_peer_managed() and self.intent == LockIntent.RETRY_RELEASE
+
+    def is_retry_hold(self) -> bool:
+        """Return whether this unit wants to retry while keeping priority."""
+        return self.is_peer_managed() and self.intent == LockIntent.RETRY_HOLD
+
+    def is_retry(self, lock: PeerAppLock) -> bool:
+        """Return whether this unit is in a retry state and currently granted."""
+        return (
+            self.is_peer_managed()
+            and self.intent
+            in {
+                LockIntent.RETRY_RELEASE,
+                LockIntent.RETRY_HOLD,
+            }
+            and lock.is_granted(self.unit.name)
+        )
+
+    def is_completed(self, lock: PeerAppLock) -> bool:
+        """Return whether this unit completed and still holds the grant."""
+        return (
+            self.is_peer_managed()
+            and self.intent == LockIntent.IDLE
+            and lock.is_granted(self.unit.name)
+        )
+
+    def requested_at(self) -> datetime | None:
+        """Return the timestamp of the current operation request, if any."""
+        operation = self.get_current()
+        return operation.requested_at if operation is not None else None
+
+    def _executed_after_grant(self, lock: PeerAppLock) -> bool:
+        """Return whether execution happened after the current grant."""
+        granted_at = lock.granted_at
+        executed_at = self.executed_at
+        if granted_at is None or executed_at is None:
+            return False
+        return executed_at > granted_at
+
+    def mirror_result(self, op_id: str, result: OperationResult) -> None:
+        """Apply an execution result to the mirrored peer queue.
+
+        This keeps the peer copy aligned with the backend that actually executed
+        the operation.
+
+        Raises:
+            RollingOpsDecodingError: if there is an inconsistency found.
+        """
+        data = self._unit_data
+        current = data.queue.peek()
+
+        if current is None:
+            logger.warning('Cannot mirror finalized operation: peer queue is empty.')
+            raise RollingOpsDecodingError('Inconsistent operation found.')
+
+        if current.op_id != op_id:
+            logger.warning(
+                'Cannot mirror finalized operation: peer head op_id=%s '
+                'does not match finalized op_id=%s.',
+                current.op_id,
+                op_id,
+            )
+            raise RollingOpsDecodingError('Inconsistent operation found.')
+
+        self._apply_result_to_data(data, result)
+        self._save(data)
+
+
+def iter_peer_units(model: Model, relation_name: str) -> Iterator[Unit]:
+    """Yield all units currently participating in the peer relation, including self."""
+    relation = model.get_relation(relation_name)
+    if relation is None:
+        raise RollingOpsNoRelationError()
+
+    units = set(relation.units)
+    units.add(model.unit)
+
+    yield from units
+
+
+def pick_oldest_completed(operations_list: list[PeerUnitOperations]) -> str | None:
+    """Return the name of the unit with the oldest executed_at timestamp."""
+    selected = None
+    oldest = None
+
+    for operations in operations_list:
+        timestamp = operations.executed_at
+        if timestamp is None:
+            continue
+        if oldest is None or timestamp < oldest:
+            oldest = timestamp
+            selected = operations
+
+    return selected.unit.name if selected is not None else None
+
+
+def pick_oldest_request(operations_list: list[PeerUnitOperations]) -> str | None:
+    """Return the name of the unit with the oldest head operation."""
+    selected = None
+    oldest = None
+
+    for operations in operations_list:
+        timestamp = operations.requested_at()
+        if timestamp is None:
+            continue
+        if oldest is None or timestamp < oldest:
+            oldest = timestamp
+            selected = operations
+
+    return selected.unit.name if selected is not None else None
diff --git a/rollingops/src/charmlibs/rollingops/_peer_rollingops.py b/rollingops/src/charmlibs/rollingops/peer/_rollingops.py
similarity index 58%
rename from rollingops/src/charmlibs/rollingops/_peer_rollingops.py
rename to rollingops/src/charmlibs/rollingops/peer/_rollingops.py
index 0933ce867..8dae3c0f1 100644
--- a/rollingops/src/charmlibs/rollingops/_peer_rollingops.py
+++ b/rollingops/src/charmlibs/rollingops/peer/_rollingops.py
@@ -15,25 +15,34 @@
 """Background process."""
 
 import argparse
-import subprocess
 import time
 
+from charmlibs.rollingops.common._utils import dispatch_lock_granted, setup_logging
+from charmlibs.rollingops.peer._worker import PEER_LOG_FILENAME
+
 
 def main():
     """Juju hook event dispatcher."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--run-cmd', required=True)
-    parser.add_argument('--unit-name', required=True)
-    parser.add_argument('--charm-dir', required=True)
+    parser = argparse.ArgumentParser(description='RollingOps peer worker')
+    parser.add_argument(
+        '--unit-name',
+        type=str,
+        required=True,
+        help='Juju unit name (e.g. app/0)',
+    )
+    parser.add_argument(
+        '--charm-dir',
+        type=str,
+        required=True,
+        help='Path to the charm directory',
+    )
     args = parser.parse_args()
+    setup_logging(PEER_LOG_FILENAME, unit_name=args.unit_name)
 
     # Sleep so that the leader unit can properly leave the hook and start a new one
     time.sleep(10)
-    dispatch_sub_cmd = (
-        f'JUJU_DISPATCH_PATH=hooks/rollingops_lock_granted {args.charm_dir}/dispatch'
-    )
-    res = subprocess.run([args.run_cmd, '-u', args.unit_name, dispatch_sub_cmd])
-    res.check_returncode()
+
+    dispatch_lock_granted(args.unit_name, args.charm_dir)
 
 
 if __name__ == '__main__':
diff --git a/rollingops/src/charmlibs/rollingops/peer/_worker.py b/rollingops/src/charmlibs/rollingops/peer/_worker.py
new file mode 100644
index 000000000..ca4da358c
--- /dev/null
+++ b/rollingops/src/charmlibs/rollingops/peer/_worker.py
@@ -0,0 +1,111 @@
+# Copyright 2026 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""etcd rolling ops. Spawns and manages the external rolling-ops worker process."""
+
+import logging
+
+from ops import RelationDataContent
+from ops.charm import (
+    CharmBase,
+)
+
+from charmlibs import pathops
+from charmlibs.rollingops.common._base_worker import BaseRollingOpsAsyncWorker
+
+logger = logging.getLogger(__name__)
+
+PEER_LOG_FILENAME = '/var/log/peer_rollingops_worker.log'
+
+
+class PeerRollingOpsAsyncWorker(BaseRollingOpsAsyncWorker):
+    """Manage the peer-backed rolling-ops worker process.
+
+    The worker state is coordinated through the peer relation application
+    databag to ensure that it remains accessible across leadership
+    changes. This guarantees that a newly elected leader can detect,
+    stop, or restart an existing worker process as needed.
+    """
+
+    _pid_field = 'peer-rollingops-worker-pid'
+    _log_filename = PEER_LOG_FILENAME
+
+    def __init__(self, charm: CharmBase, relation_name: str):
+        super().__init__(charm, 'peer-rollingops-async-worker', relation_name)
+
+    @property
+    def _app_data(self) -> RelationDataContent:
+        """Return the application databag in the peer relation."""
+        return self._relation.data[self.model.app]  # type: ignore[reportOptionalMemberAccess]
+
+    def _worker_script_path(self) -> pathops.LocalPath:
+        """Return the path to the peer rolling-ops worker script.
+
+        This script is executed in a background process to handle operation
+        processing for the peer backend.
+        """
+        return pathops.LocalPath(
+            self._venv_site_packages() / 'charmlibs' / 'rollingops' / 'peer' / '_rollingops.py'
+        )
+
+    @property
+    def _pid(self) -> int | None:
+        """Return the stored worker process PID.
+
+        The PID is persisted in the application databag of the peer relation.
+
+        Returns:
+            The worker process PID, or None if not set.
+        """
+        if self._relation is None:
+            return None
+        pid = self._app_data.get(self._pid_field, '')
+
+        try:
+            pid = int(pid)
+        except (ValueError, TypeError):
+            pid = None
+
+        return pid
+
+    @_pid.setter
+    def _pid(self, value: int | None) -> None:
+        """Persist the worker process PID in the peer relation databag.
+
+        The PID is stored in the application databag because it is used
+        to trigger rolling operations on the leader and the leader may change.
+
+        Args:
+            value: The process identifier to store.
+        """
+        if self._relation is None:
+            return
+        self._app_data.update({self._pid_field: '' if value is None else str(value)})
+
+    def _on_existing_worker(self, pid: int) -> bool:
+        """Handle the presence of an already running worker process.
+
+        When an existing worker is detected, it is stopped before starting a
+        new one to ensure a single active worker per application.
+
+        Args:
+            pid: The PID of the currently running worker.
+
+        Returns:
+            True to indicate that the existing worker was handled and a new
+            worker can be started.
+        """
+        logger.info('Stopping existing RollingOps worker PID %s before restart.', pid)
+        self.stop()
+        return True
diff --git a/rollingops/tests/integration/charms/actions.yaml b/rollingops/tests/integration/charms/actions.yaml
index d514e1d0d..196a181ce 100644
--- a/rollingops/tests/integration/charms/actions.yaml
+++ b/rollingops/tests/integration/charms/actions.yaml
@@ -30,3 +30,15 @@ deferred-restart:
     max-retry:
       description: "Number of times the operation should be retried."
       type: integer
+
+sync-restart:
+  description: Example restart with a custom callback function. Used in testing
+  params:
+    delay:
+      description: "Introduce an artificial delay (for testing)."
+      type: integer
+      default: 0
+    timeout:
+      description: "Time (seconds) to wait before giving up."
+      type: integer
+      default: 60
diff --git a/rollingops/tests/integration/charms/common.py b/rollingops/tests/integration/charms/common.py
index aba0cdb40..a098677c7 100644
--- a/rollingops/tests/integration/charms/common.py
+++ b/rollingops/tests/integration/charms/common.py
@@ -30,6 +30,7 @@
 from charmlibs.rollingops import (
     OperationResult,
     RollingOpsManager,
+    SyncLockBackend,
 )
 
 logger = logging.getLogger(__name__)
@@ -38,8 +39,16 @@
 
 
 def _now_timestamp_str() -> str:
-    """UTC timestamp as a string using ISO 8601 format."""
-    return datetime.now(UTC).isoformat()
+    """UTC timestamp as a epoch."""
+    return str(datetime.now(UTC).timestamp())
+
+
+class MySyncBackend(SyncLockBackend):
+    def acquire(self, timeout: int | None) -> None:
+        logger.info('acquiring sync lock')
+
+    def release(self) -> None:
+        logger.info('releasing sync lock')
 
 
 class Charm(CharmBase):
@@ -59,11 +68,15 @@ def __init__(self, framework: Framework):
             etcd_relation_name='etcd',
             cluster_id='cluster-12345',
             callback_targets=callback_targets,
+            sync_lock_targets={
+                'stop': MySyncBackend,
+            },
         )
 
         self.framework.observe(self.on.restart_action, self._on_restart_action)
         self.framework.observe(self.on.failed_restart_action, self._on_failed_restart_action)
         self.framework.observe(self.on.deferred_restart_action, self._on_deferred_restart_action)
+        self.framework.observe(self.on.sync_restart_action, self._on_sync_restart_action)
 
     def _restart(self, delay: int = 0) -> None:
         self._record_transition('_restart:start', delay=delay)
@@ -119,12 +132,37 @@ def _on_deferred_restart_action(self, event: ActionEvent) -> None:
             max_retry=max_retry,
         )
 
+    def _on_sync_restart_action(self, event: ActionEvent):
+        self.model.unit.status = WaitingStatus('Awaiting _sync_restart operation')
+        timeout = event.params.get('timeout', 60)
+        delay = event.params.get('delay')
+        self._record_transition('action:sync-restart', delay=delay, timeout=timeout)
+
+        try:
+            with self.restart_manager.acquire_sync_lock(backend_id='stop', timeout=timeout):
+                self._record_transition('_sync_restart:start', delay=delay, timeout=timeout)
+                logger.info('Executing _sync_restart.')
+                self.model.unit.status = MaintenanceStatus('Executing _sync_restart operation')
+                time.sleep(int(event.params.get('delay', 0)))
+                self.model.unit.status = ActiveStatus('')
+                logger.info('Finished _sync_restart.')
+                self._record_transition('_sync_restart:done', delay=delay, timeout=timeout)
+                return
+        except TimeoutError:
+            self._record_transition('_sync_restart:timeout', delay=delay, timeout=timeout)
+        event.fail('Timed out acquiring sync lock')
+
     def _record_transition(self, name: str, **data: Any) -> None:
         TRACE_FILE.parent.mkdir(parents=True, exist_ok=True)
+        state = self.restart_manager.state
         payload = {
             'ts': _now_timestamp_str(),
             'unit': self.model.unit.name,
             'event': name,
+            'rollingops_status': state.status.value if state.status else None,
+            'processing_backend': state.processing_backend.value
+            if state.processing_backend
+            else None,
             **data,
         }
         with TRACE_FILE.open('a', encoding='utf-8') as f:
diff --git a/rollingops/tests/integration/test_etcd_rolling_ops.py b/rollingops/tests/integration/test_etcd_rolling_ops.py
index d17153ca9..4172d416e 100644
--- a/rollingops/tests/integration/test_etcd_rolling_ops.py
+++ b/rollingops/tests/integration/test_etcd_rolling_ops.py
@@ -15,23 +15,37 @@
 """Integration tests using real Juju and pre-packed charm(s)."""
 
 import logging
+import time
 from pathlib import Path
 
 import jubilant
 import pytest
 from tenacity import retry, stop_after_delay, wait_fixed
 
-from tests.integration.utils import get_unit_events, remove_transition_file
+from tests.integration.utils import (
+    get_unit_events,
+    is_empty_file,
+    parse_ts,
+    remove_transition_file,
+)
 
-TRACE_FILE = '/var/lib/charm-rolling-ops/transitions.log'
 logger = logging.getLogger(__name__)
 TIMEOUT = 15 * 60.0
+ETCD_PROCESS_LOGS = '/var/log/etcd_rollingops_worker.log'
+PEER_PROCCES_LOGS = '/var/log/peer_rollingops_worker.log'
+ETCD_CONFIG_FILE = '/var/lib/rollingops/etcd/etcdctl.json'
 
 
-@retry(wait=wait_fixed(10), stop=stop_after_delay(60), reraise=True)
-def wait_for_etcdctl_env(juju: jubilant.Juju, unit: str) -> None:
-    task = juju.exec('test -f /var/lib/rollingops/etcd/etcdctl.json', unit=unit)
+def etcdctl_file_exits(juju: jubilant.Juju, unit: str) -> bool:
+    task = juju.exec(f'test -f {ETCD_CONFIG_FILE}', unit=unit)
     if task.status != 'completed' or task.return_code != 0:
+        return False
+    return True
+
+
+@retry(wait=wait_fixed(10), stop=stop_after_delay(60), reraise=True)
+def wait_for_etcdctl_config_file(juju: jubilant.Juju, unit: str) -> None:
+    if not etcdctl_file_exits(juju, unit):
         raise RuntimeError('etcdctl config file not ready')
 
 
@@ -41,9 +55,7 @@ def test_deploy(juju: jubilant.Juju, app_name: str):
 
 
 @pytest.mark.machine_only
-def test_restart_action_one_unit(juju: jubilant.Juju, app_name: str):
-    """Verify that restart action runs through the expected workflow."""
-
+def test_charm_is_integrated_with_etcd(juju: jubilant.Juju, app_name: str):
     juju.deploy(
         'self-signed-certificates',
         app='self-signed-certificates',
@@ -65,68 +77,262 @@ def test_restart_action_one_unit(juju: jubilant.Juju, app_name: str):
     juju.integrate(f'{app_name}:etcd', 'etcd:etcd-client')
     juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
 
-    wait_for_etcdctl_env(juju, f'{app_name}/0')
+    wait_for_etcdctl_config_file(juju, f'{app_name}/0')
 
-    juju.run(f'{app_name}/0', 'restart', {'delay': 1}, wait=300)
 
-    juju.wait(
-        jubilant.all_active,
-        error=jubilant.any_error,
-        timeout=TIMEOUT,
-    )
+@pytest.mark.machine_only
+def test_restart_action_one_unit_single_app(juju: jubilant.Juju, app_name: str):
+    unit = f'{app_name}/0'
 
-    events = get_unit_events(juju, f'{app_name}/0')
-    restart_events = [e['event'] for e in events]
+    juju.run(unit, 'restart', {'delay': 1}, wait=TIMEOUT)
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
 
+    events = get_unit_events(juju, unit)
+    restart_events = [
+        (e['event'], e['processing_backend'])
+        for e in events
+        if not e['event'].startswith('action')
+    ]
     expected = [
-        'action:restart',
-        '_restart:start',
-        '_restart:done',
+        ('_restart:start', 'etcd'),
+        ('_restart:done', 'etcd'),
+    ]
+
+    assert restart_events == expected, f'unexpected event order: {restart_events}'
+    assert not is_empty_file(juju, unit, ETCD_PROCESS_LOGS)
+    assert is_empty_file(juju, unit, PEER_PROCCES_LOGS)
+
+
+@pytest.mark.machine_only
+def test_failed_restart_retries_one_unit_single_app(juju: jubilant.Juju, app_name: str):
+    unit = f'{app_name}/0'
+    remove_transition_file(juju, unit)
+
+    juju.run(unit, 'failed-restart', {'delay': 1, 'max-retry': 1})
+    juju.run(unit, 'restart', {'delay': 1})
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
+
+    events = get_unit_events(juju, unit)
+    restart_events = [
+        (e['event'], e['processing_backend'])
+        for e in events
+        if not e['event'].startswith('action')
     ]
 
-    assert expected == restart_events
+    expected = [
+        ('_failed_restart:start', 'etcd'),  # attempt 0
+        ('_failed_restart:retry_release', 'etcd'),
+        ('_failed_restart:start', 'etcd'),  # retry 1
+        ('_failed_restart:retry_release', 'etcd'),
+        ('_restart:start', 'etcd'),
+        ('_restart:done', 'etcd'),
+    ]
+    assert restart_events == expected, f'unexpected event order: {restart_events}'
+    assert is_empty_file(juju, unit, PEER_PROCCES_LOGS)
+
+
+@pytest.mark.machine_only
+def test_assert_deferred_restart_retries_one_unit_single_app(juju: jubilant.Juju, app_name: str):
+    unit = f'{app_name}/0'
+    remove_transition_file(juju, unit)
+
+    juju.run(unit, 'deferred-restart', {'delay': 1, 'max-retry': 1}, wait=TIMEOUT)
+    juju.run(unit, 'restart', {'delay': 1})
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
+
+    events = get_unit_events(juju, unit)
+    restart_events = [
+        (e['event'], e['processing_backend'])
+        for e in events
+        if not e['event'].startswith('action')
+    ]
+
+    expected = [
+        ('_deferred_restart:start', 'etcd'),  # attempt 0
+        ('_deferred_restart:retry_hold', 'etcd'),
+        ('_deferred_restart:start', 'etcd'),  # retry 1
+        ('_deferred_restart:retry_hold', 'etcd'),
+        ('_restart:start', 'etcd'),
+        ('_restart:done', 'etcd'),
+    ]
+    assert restart_events == expected, f'unexpected event order: {restart_events}'
+    assert is_empty_file(juju, unit, PEER_PROCCES_LOGS)
+
+
+@pytest.mark.machine_only
+def test_assert_restart_rolls_one_unit_at_a_time_single_app(juju: jubilant.Juju, app_name: str):
+    juju.add_unit(app=app_name, num_units=4)
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
+
+    status = juju.status()
+    units = sorted(status.apps[app_name].units)
+    for unit in units:
+        remove_transition_file(juju, unit)
+
+    for unit in units:
+        juju.run(unit, 'restart', {'delay': 15})
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
+
+    all_events: list[dict[str, str]] = []
+    for unit in units:
+        events = get_unit_events(juju, unit)
+        assert len(events) == 3
+        all_events.extend(events)
+
+    restart_events = [e for e in all_events if not e['event'].startswith('action')]
+    restart_events.sort(key=parse_ts)
+
+    logger.info(restart_events)
+
+    assert len(restart_events) == len(units) * 2
+    for i in range(0, len(restart_events), 2):
+        start_event = restart_events[i]
+        done_event = restart_events[i + 1]
+
+        assert start_event['event'] == '_restart:start'
+        assert done_event['event'] == '_restart:done'
+        assert start_event['unit'] == done_event['unit']
+        assert start_event['processing_backend'] == 'etcd'
+        assert done_event['processing_backend'] == 'etcd'
+    for unit in units:
+        assert is_empty_file(juju, unit, PEER_PROCCES_LOGS)
 
 
 @pytest.mark.machine_only
-def test_all_units_can_connect_to_etcd(juju: jubilant.Juju, app_name: str):
-    juju.add_unit(app_name, num_units=2)
+def test_retry_hold_operation_two_units_single_app(juju: jubilant.Juju, app_name: str):
+    status = juju.status()
+    units = sorted(status.apps[app_name].units)
+
+    for unit in units:
+        remove_transition_file(juju, unit)
+
+    unit_a = units[1]
+    unit_b = units[3]
+
+    juju.run(unit_a, 'deferred-restart', {'delay': 15, 'max-retry': 2}, wait=TIMEOUT)
+    juju.run(unit_b, 'restart', {'delay': 2}, wait=TIMEOUT)
+
     juju.wait(
-        lambda status: jubilant.all_active(status, app_name),
+        lambda status: status.apps[app_name].units[unit_b].is_active,
         error=jubilant.any_error,
         timeout=TIMEOUT,
     )
 
+    all_events: list[dict[str, str]] = []
+    all_events.extend(get_unit_events(juju, unit_a))
+    all_events.extend(get_unit_events(juju, unit_b))
+    all_events.sort(key=parse_ts)
+
+    logger.info(all_events)
+
+    relevant_events = [e for e in all_events if not e['event'].startswith('action')]
+    sequence = [(e['unit'], e['event'], e['processing_backend']) for e in relevant_events]
+
+    logger.info(sequence)
+
+    assert sequence == [
+        (unit_a, '_deferred_restart:start', 'etcd'),  # attempt 0
+        (unit_a, '_deferred_restart:retry_hold', 'etcd'),
+        (unit_a, '_deferred_restart:start', 'etcd'),  # retry 1
+        (unit_a, '_deferred_restart:retry_hold', 'etcd'),
+        (unit_a, '_deferred_restart:start', 'etcd'),  # retry 2
+        (unit_a, '_deferred_restart:retry_hold', 'etcd'),
+        (unit_b, '_restart:start', 'etcd'),
+        (unit_b, '_restart:done', 'etcd'),
+    ], f'unexpected event sequence: {sequence}'
+
+    for unit in units:
+        assert is_empty_file(juju, unit, PEER_PROCCES_LOGS)
+
+
+@pytest.mark.machine_only
+def test_retry_release_two_units_single_app(juju: jubilant.Juju, app_name: str):
     status = juju.status()
     units = sorted(status.apps[app_name].units)
-
     for unit in units:
         remove_transition_file(juju, unit)
 
+    unit_a = units[2]
+    unit_b = units[4]
+
+    juju.run(unit_a, 'failed-restart', {'delay': 10, 'max-retry': 2}, wait=TIMEOUT)
+    juju.run(unit_b, 'failed-restart', {'delay': 15, 'max-retry': 2}, wait=TIMEOUT)
+
+    time.sleep(
+        60 * 3
+    )  # wait for operation execution. TODO: in charm use lock state to clear status.
+
+    all_events: list[dict[str, str]] = []
+    all_events.extend(get_unit_events(juju, unit_a))
+    all_events.extend(get_unit_events(juju, unit_b))
+    all_events.sort(key=parse_ts)
+
+    restart_events = [e for e in all_events if not e['event'].startswith('action')]
+    restart_events.sort(key=parse_ts)
+
+    logger.info(restart_events)
+
+    assert len(restart_events) == 2 * 2 * 3  # 2 units * 2 events * 3 executions
+    for i in range(0, len(restart_events), 2):
+        start_event = restart_events[i]
+        done_event = restart_events[i + 1]
+
+        assert start_event['event'] == '_failed_restart:start'
+        assert done_event['event'] == '_failed_restart:retry_release'
+        assert start_event['unit'] == done_event['unit']
+        assert start_event['processing_backend'] == 'etcd'
+        assert done_event['processing_backend'] == 'etcd'
+
+    for unit in units:
+        assert is_empty_file(juju, unit, PEER_PROCCES_LOGS)
+
+
+@pytest.mark.machine_only
+def test_subsequent_lock_request_ops_single_app(juju: jubilant.Juju, app_name: str):
+    status = juju.status()
+    units = sorted(status.apps[app_name].units)
     for unit in units:
-        juju.run(unit, 'restart', {'delay': 2}, wait=300)
+        remove_transition_file(juju, unit)
+
+    unit_a = units[3]
+
+    juju.run(unit_a, 'deferred-restart', {'delay': 1, 'max-retry': 1})
+    for _ in range(3):
+        juju.run(unit_a, 'failed-restart', {'delay': 1, 'max-retry': 0})
+    juju.run(unit_a, 'restart', {'delay': 1})
 
     juju.wait(
-        lambda status: jubilant.all_active(status, app_name, 'etcd', 'self-signed-certificates'),
+        lambda status: status.apps[app_name].units[unit_a].is_active,
         error=jubilant.any_error,
         timeout=TIMEOUT,
     )
 
-    expected = [
-        'action:restart',
-        '_restart:start',
-        '_restart:done',
+    unit_a_events = get_unit_events(juju, unit_a)
+    relevant_events = [
+        (e['event'], e['processing_backend'])
+        for e in unit_a_events
+        if not e['event'].startswith('action')
     ]
+    logger.info('unit_a_events %s', unit_a_events)
+
+    assert relevant_events == [
+        ('_deferred_restart:start', 'etcd'),  # attempt 0
+        ('_deferred_restart:retry_hold', 'etcd'),
+        ('_deferred_restart:start', 'etcd'),  # retry 1
+        ('_deferred_restart:retry_hold', 'etcd'),
+        ('_failed_restart:start', 'etcd'),  # attempt 0
+        ('_failed_restart:retry_release', 'etcd'),
+        ('_restart:start', 'etcd'),
+        ('_restart:done', 'etcd'),
+    ], f'unexpected event sequence: {relevant_events}'
 
     for unit in units:
-        events = get_unit_events(juju, unit)
-        restart_events = [e['event'] for e in events]
-        assert restart_events == expected
+        assert is_empty_file(juju, unit, PEER_PROCCES_LOGS)
 
 
 @pytest.mark.machine_only
-def test_all_units_can_connect_to_etcd_multi_app(juju: jubilant.Juju, charm: Path, app_name: str):
+def test_rolling_ops_multi_app(juju: jubilant.Juju, charm: Path, app_name: str):
     second_app = f'{app_name}-secondary'
-
     juju.deploy(charm, app=second_app, num_units=3)
     juju.wait(
         lambda status: jubilant.all_active(status, second_app),
@@ -136,9 +342,7 @@ def test_all_units_can_connect_to_etcd_multi_app(juju: jubilant.Juju, charm: Pat
     juju.integrate(f'{second_app}:etcd', 'etcd:etcd-client')
 
     juju.wait(
-        lambda status: jubilant.all_active(
-            status, app_name, second_app, 'etcd', 'self-signed-certificates'
-        ),
+        lambda status: jubilant.all_active(status, second_app, 'etcd'),
         error=jubilant.any_error,
         timeout=TIMEOUT,
     )
@@ -149,32 +353,169 @@ def test_all_units_can_connect_to_etcd_multi_app(juju: jubilant.Juju, charm: Pat
 
     for unit in all_units:
         remove_transition_file(juju, unit)
+        wait_for_etcdctl_config_file(juju, unit)
 
     for unit in all_units:
-        wait_for_etcdctl_env(juju, unit)
+        juju.run(unit, 'restart', {'delay': 10}, wait=TIMEOUT)
+
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
+
+    all_events: list[dict[str, str]] = []
 
     for unit in all_units:
-        juju.run(unit, 'restart', {'delay': 2}, wait=300)
+        events = get_unit_events(juju, unit)
+        assert len(events) == 3
+        all_events.extend(events)
 
-    juju.wait(
-        lambda status: jubilant.all_active(
-            status,
-            app_name,
-            second_app,
-            'etcd',
-            'self-signed-certificates',
-        ),
-        error=jubilant.any_error,
-        timeout=TIMEOUT,
-    )
+    restart_events = [e for e in all_events if not e['event'].startswith('action')]
+    restart_events.sort(key=parse_ts)
 
-    expected = [
-        'action:restart',
-        '_restart:start',
-        '_restart:done',
-    ]
+    logger.info(restart_events)
+
+    assert len(restart_events) == len(all_units) * 2
+    for i in range(0, len(restart_events), 2):
+        start_event = restart_events[i]
+        done_event = restart_events[i + 1]
+
+        assert start_event['event'] == '_restart:start'
+        assert done_event['event'] == '_restart:done'
+        assert start_event['unit'] == done_event['unit']
+        assert start_event['processing_backend'] == 'etcd'
+        assert done_event['processing_backend'] == 'etcd'
 
     for unit in all_units:
+        assert is_empty_file(juju, unit, PEER_PROCCES_LOGS)
+
+
+@pytest.mark.machine_only
+def test_rolling_ops_sync_lock_multi_app(juju: jubilant.Juju, app_name: str):
+    second_app = f'{app_name}-secondary'
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
+
+    primary_units = sorted(juju.status().apps[app_name].units.keys())
+    secondary_units = sorted(juju.status().apps[second_app].units.keys())
+    all_units: list[str] = primary_units + secondary_units
+
+    for unit in all_units:
+        remove_transition_file(juju, unit)
+        wait_for_etcdctl_config_file(juju, unit)
+
+    unit_a = primary_units[1]
+    unit_b = secondary_units[1]
+
+    juju.cli('run', unit_a, 'sync-restart', 'delay=15', '--background')
+    time.sleep(2)
+    juju.cli('run', unit_b, 'sync-restart', 'delay=15', '--background')
+
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
+
+    all_events: list[dict[str, str]] = []
+
+    for unit in {unit_a, unit_b}:
         events = get_unit_events(juju, unit)
-        restart_events = [e['event'] for e in events]
-        assert restart_events == expected
+        assert len(events) == 3
+        all_events.extend(events)
+
+    all_events.sort(key=parse_ts)
+    restart_events = [
+        (e['unit'], e['event'], e['processing_backend'])
+        for e in all_events
+        if not e['event'].startswith('action')
+    ]
+
+    logger.info(restart_events)
+
+    assert restart_events == [
+        (unit_a, '_sync_restart:start', 'etcd'),
+        (unit_a, '_sync_restart:done', 'etcd'),
+        (unit_b, '_sync_restart:start', 'etcd'),
+        (unit_b, '_sync_restart:done', 'etcd'),
+    ], f'unexpected event sequence: {restart_events}'
+
+    for unit in all_units:
+        assert is_empty_file(juju, unit, PEER_PROCCES_LOGS)
+
+
+@pytest.mark.machine_only
+def test_lock_released_when_unit_removed(juju: jubilant.Juju, app_name: str) -> None:
+    units = sorted(juju.status().apps[app_name].units.keys())
+    for unit in units:
+        remove_transition_file(juju, unit)
+    unit_a = units[1]
+    unit_b = units[2]
+
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
+
+    juju.run(unit_a, 'deferred-restart', {'delay': 15})
+    time.sleep(5)
+    juju.run(unit_b, 'restart', {'delay': 2})
+
+    juju.remove_unit(unit_a)
+
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
+
+    unit_b_events = get_unit_events(juju, unit_b)
+    relevant_events = [
+        (e['event'], e['processing_backend'])
+        for e in unit_b_events
+        if not e['event'].startswith('action')
+    ]
+
+    logger.info('unit_b_events %s', unit_b_events)
+
+    assert relevant_events == [
+        ('_restart:start', 'etcd'),
+        ('_restart:done', 'etcd'),
+    ], f'unexpected event sequence: {relevant_events}'
+
+
+@pytest.mark.machine_only
+def test_actions_still_work_after_etcd_relation_removed(
+    juju: jubilant.Juju, app_name: str
+) -> None:
+    second_app = f'{app_name}-secondary'
+    primary_units = sorted(juju.status().apps[app_name].units.keys())
+    secondary_units = sorted(juju.status().apps[second_app].units.keys())
+    all_units: list[str] = primary_units + secondary_units
+
+    for unit in all_units:
+        remove_transition_file(juju, unit)
+        wait_for_etcdctl_config_file(juju, unit)
+
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
+
+    unit_a = primary_units[3]
+
+    juju.run(unit_a, 'failed-restart', {'delay': 10, 'max-retry': 1})
+    juju.run(unit_a, 'restart', {'delay': 1})
+    juju.run(unit_a, 'restart', {'delay': 2})
+
+    juju.remove_relation(f'{app_name}:etcd', 'etcd:etcd-client')
+
+    unit_b = secondary_units[1]
+    juju.run(unit_b, 'restart', {'delay': 1})
+
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
+
+    unit_a_events = get_unit_events(juju, unit_a)
+    relevant_events = [e['event'] for e in unit_a_events if not e['event'].startswith('action')]
+
+    logger.info('unit_a_events %s', unit_a_events)
+
+    assert relevant_events.count('_failed_restart:start') == 2, relevant_events
+    assert relevant_events.count('_failed_restart:retry_release') == 2, relevant_events
+    assert relevant_events.count('_restart:start') == 2, relevant_events
+    assert relevant_events.count('_restart:done') == 2, relevant_events
+
+    unit_b_events = get_unit_events(juju, unit_b)
+    assert len(unit_b_events) == 3
+    restart_events = [
+        (e['event'], e['processing_backend'])
+        for e in unit_b_events
+        if not e['event'].startswith('action')
+    ]
+
+    assert restart_events == [
+        ('_restart:start', 'etcd'),
+        ('_restart:done', 'etcd'),
+    ], f'unexpected event sequence: {restart_events}'
diff --git a/rollingops/tests/integration/test_peer_rolling_ops.py b/rollingops/tests/integration/test_peer_rolling_ops.py
index 8178cfe92..dcd527b61 100644
--- a/rollingops/tests/integration/test_peer_rolling_ops.py
+++ b/rollingops/tests/integration/test_peer_rolling_ops.py
@@ -36,8 +36,6 @@ def test_deploy(juju: jubilant.Juju, app_name: str):
 
 
 def test_restart_action_one_unit(juju: jubilant.Juju, app_name: str):
-    """Verify that restart action runs through the expected workflow."""
-
     juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
     unit = f'{app_name}/0'
 
@@ -55,79 +53,66 @@ def test_restart_action_one_unit(juju: jubilant.Juju, app_name: str):
     ]
 
     assert restart_events == expected, f'unexpected event order: {restart_events}'
+    assert all(e['processing_backend'] == 'peer' for e in events)
 
 
 def test_failed_restart_retries_one_unit(juju: jubilant.Juju, app_name: str):
     unit = f'{app_name}/0'
 
     remove_transition_file(juju, unit)
-    juju.run(unit, 'failed-restart', {'delay': 1, 'max-retry': 2}, wait=TIMEOUT)
-
-    time.sleep(60)  # wait for operation execution. TODO: in charm use lock state to clear status.
+    juju.run(unit, 'failed-restart', {'delay': 1, 'max-retry': 2})
+    juju.run(unit, 'restart', {'delay': 1})
 
-    juju.wait(
-        lambda status: status.apps[app_name].is_maintenance,
-        error=jubilant.any_error,
-        timeout=TIMEOUT,
-    )
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
 
     events = get_unit_events(juju, unit)
-    restart_events = [e['event'] for e in events]
+    restart_events = [e['event'] for e in events if not e['event'].startswith('action')]
 
     expected = [
-        'action:failed-restart',
         '_failed_restart:start',  # attempt 0
         '_failed_restart:retry_release',
         '_failed_restart:start',  # retry 1
         '_failed_restart:retry_release',
         '_failed_restart:start',  # retry 2
         '_failed_restart:retry_release',
+        '_restart:start',
+        '_restart:done',
     ]
 
     assert restart_events == expected, f'unexpected event order: {restart_events}'
+    assert all(e['processing_backend'] == 'peer' for e in events)
 
 
-def test_deferred_restart_retries_one_unit(juju: jubilant.Juju, app_name: str):
+def test_assert_deferred_restart_retries_one_unit(juju: jubilant.Juju, app_name: str):
     unit = f'{app_name}/0'
 
     remove_transition_file(juju, unit)
     juju.run(unit, 'deferred-restart', {'delay': 1, 'max-retry': 2}, wait=TIMEOUT)
+    juju.run(unit, 'restart', {'delay': 1})
 
-    time.sleep(60)  # wait for operation execution. TODO: in charm use lock state to clear status.
-
-    juju.wait(
-        lambda status: status.apps[app_name].is_maintenance,
-        error=jubilant.any_error,
-        timeout=TIMEOUT,
-    )
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
 
     events = get_unit_events(juju, unit)
-    restart_events = [e['event'] for e in events]
+    restart_events = [e['event'] for e in events if not e['event'].startswith('action')]
 
     expected = [
-        'action:deferred-restart',
         '_deferred_restart:start',  # attempt 0
         '_deferred_restart:retry_hold',
         '_deferred_restart:start',  # retry 1
         '_deferred_restart:retry_hold',
         '_deferred_restart:start',  # retry 2
         '_deferred_restart:retry_hold',
+        '_restart:start',
+        '_restart:done',
     ]
 
     assert restart_events == expected, f'unexpected event order: {restart_events}'
+    assert all(e['processing_backend'] == 'peer' for e in events)
 
 
-def test_restart_rolls_one_unit_at_a_time(juju: jubilant.Juju, app_name: str):
+def test_assert_restart_rolls_one_unit_at_a_time(juju: jubilant.Juju, app_name: str):
     juju.add_unit(app=app_name, num_units=4)
-    juju.wait(  # TODO: wait for 5 units to be active
-        lambda status: (
-            app_name in status.apps
-            and len(status.apps[app_name].units) == 5
-            and sum(1 for u in status.apps[app_name].units.values() if u.is_active) >= 4
-        ),
-        error=jubilant.any_error,
-        timeout=TIMEOUT,
-    )
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
 
     status = juju.status()
     units = sorted(status.apps[app_name].units)
@@ -160,6 +145,7 @@ def test_restart_rolls_one_unit_at_a_time(juju: jubilant.Juju, app_name: str):
         assert start_event['unit'] == done_event['unit'], (
             f'start/done pair mismatch: {start_event} vs {done_event}'
         )
+    assert all(e['processing_backend'] == 'peer' for e in all_events)
 
 
 def test_retry_hold_keeps_lock_on_same_unit(juju: jubilant.Juju, app_name: str):
@@ -172,7 +158,7 @@ def test_retry_hold_keeps_lock_on_same_unit(juju: jubilant.Juju, app_name: str):
     unit_a = units[1]
     unit_b = units[3]
 
-    juju.run(unit_a, 'deferred-restart', {'delay': 10, 'max-retry': 2}, wait=TIMEOUT)
+    juju.run(unit_a, 'deferred-restart', {'delay': 15, 'max-retry': 2}, wait=TIMEOUT)
     juju.run(unit_b, 'restart', {'delay': 2}, wait=TIMEOUT)
 
     juju.wait(
@@ -214,6 +200,7 @@ def test_retry_hold_keeps_lock_on_same_unit(juju: jubilant.Juju, app_name: str):
         (unit_b, '_restart:start'),
         (unit_b, '_restart:done'),
     ], f'unexpected event sequence: {sequence}'
+    assert all(e['processing_backend'] == 'peer' for e in all_events)
 
 
 def test_retry_release_alternates_execution(juju: jubilant.Juju, app_name: str):
@@ -261,6 +248,7 @@ def test_retry_release_alternates_execution(juju: jubilant.Juju, app_name: str):
         (unit_b, '_failed_restart:start'),  # retry 2
         (unit_b, '_failed_restart:retry_release'),
     ], f'unexpected event sequence: {sequence}'
+    assert all(e['processing_backend'] == 'peer' for e in all_events)
 
 
 def test_subsequent_lock_request_of_different_ops(juju: jubilant.Juju, app_name: str):
@@ -303,6 +291,7 @@ def test_subsequent_lock_request_of_different_ops(juju: jubilant.Juju, app_name:
         '_restart:start',
         '_restart:done',
     ], f'unexpected event sequence: {relevant_events}'
+    assert all(e['processing_backend'] == 'peer' for e in unit_a_events)
 
 
 def test_subsequent_lock_request_of_same_op(juju: jubilant.Juju, app_name: str):
@@ -317,7 +306,8 @@ def test_subsequent_lock_request_of_same_op(juju: jubilant.Juju, app_name: str):
     juju.run(unit_b, 'deferred-restart', {'delay': 10, 'max-retry': 1})
     juju.run(unit_a, 'failed-restart', {'delay': 1, 'max-retry': 2})
     for _ in range(3):
-        juju.run(unit_a, 'restart', {'delay': 1})
+        juju.run(unit_a, 'deferred-restart', {'delay': 1, 'max-retry': 0})
+    juju.run(unit_a, 'restart', {'delay': 1})
 
     juju.wait(
         lambda status: status.apps[app_name].units[unit_a].is_active,
@@ -326,24 +316,49 @@ def test_subsequent_lock_request_of_same_op(juju: jubilant.Juju, app_name: str):
     )
 
     unit_a_events = get_unit_events(juju, unit_a)
-    relevant_events = [e['event'] for e in unit_a_events]
+    relevant_events = [e['event'] for e in unit_a_events if not e['event'].startswith('action')]
 
     logger.info('unit_a_events %s', unit_a_events)
 
     assert relevant_events == [
-        'action:failed-restart',
-        'action:restart',
-        'action:restart',
-        'action:restart',
         '_failed_restart:start',  # attempt 0
         '_failed_restart:retry_release',
         '_failed_restart:start',  # retry 1
         '_failed_restart:retry_release',
         '_failed_restart:start',  # retry 2
         '_failed_restart:retry_release',
+        '_deferred_restart:start',  # attemp 0
+        '_deferred_restart:retry_hold',
         '_restart:start',
         '_restart:done',
     ], f'unexpected event sequence: {relevant_events}'
+    assert all(e['processing_backend'] == 'peer' for e in unit_a_events)
+
+
+def test_sync_lock_is_executed(juju: jubilant.Juju, app_name: str):
+    status = juju.status()
+    units = sorted(status.apps[app_name].units)
+    for unit in units:
+        remove_transition_file(juju, unit)
+
+    for unit in units:
+        juju.run(unit, 'sync-restart', {'delay': 1})
+
+    juju.wait(jubilant.all_active, error=jubilant.any_error, timeout=TIMEOUT)
+
+    expected_events = [
+        'action:sync-restart',
+        '_sync_restart:start',
+        '_sync_restart:done',
+    ]
+
+    # mutually exclusive execution is not guarantee
+    for unit in units:
+        events = get_unit_events(juju, unit)
+        relevant_events = [e['event'] for e in events]
+
+        assert expected_events == relevant_events, f'unexpected event sequence: {relevant_events}'
+        assert all(e['processing_backend'] == 'peer' for e in events)
 
 
 def test_retry_on_leader_unit_leaves_the_hook(juju: jubilant.Juju, app_name: str):
@@ -372,3 +387,4 @@ def test_retry_on_leader_unit_leaves_the_hook(juju: jubilant.Juju, app_name: str
         '_restart:start',
         '_restart:done',
     ], f'unexpected event sequence: {relevant_events}'
+    assert all(e['processing_backend'] == 'peer' for e in non_leader_events)
diff --git a/rollingops/tests/integration/utils.py b/rollingops/tests/integration/utils.py
index 795d9d514..bed37564a 100644
--- a/rollingops/tests/integration/utils.py
+++ b/rollingops/tests/integration/utils.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Integration tests using real Juju and pre-packed charm(s)."""
+"""Utils for integration tests."""
 
 import json
-from datetime import datetime
+from datetime import UTC, datetime
 
 import jubilant
 
+from charmlibs import pathops
+
 TRACE_FILE = '/var/lib/charm-rolling-ops/transitions.log'
 
 
@@ -32,7 +34,7 @@ def get_unit_events(juju: jubilant.Juju, unit: str) -> list[dict[str, str]]:
 
 
 def parse_ts(event: dict[str, str]) -> datetime:
-    return datetime.fromisoformat(event['ts'])
+    return datetime.fromtimestamp(float(event['ts']), tz=UTC)
 
 
 def get_leader_unit_name(juju: jubilant.Juju, app: str) -> str:
@@ -50,3 +52,13 @@ def get_leader_unit_name(juju: jubilant.Juju, app: str) -> str:
 
 def remove_transition_file(juju: jubilant.Juju, unit: str):
     juju.exec(f'rm -f {TRACE_FILE}', unit=unit)
+
+
+def is_empty_file(juju: jubilant.Juju, unit: str, path: str) -> bool:
+    pathops_path = pathops.LocalPath(path)
+    try:
+        task = juju.exec(f'test ! -s {pathops_path}', unit=unit)
+    except Exception:
+        return False
+
+    return task.status == 'completed' and task.return_code == 0
diff --git a/rollingops/tests/unit/conftest.py b/rollingops/tests/unit/conftest.py
index 74a5e4a60..bb3c46f61 100644
--- a/rollingops/tests/unit/conftest.py
+++ b/rollingops/tests/unit/conftest.py
@@ -25,17 +25,16 @@
 from ops import ActionEvent
 from ops.testing import Context
 
-import charmlibs.rollingops._certificates as certificates
-import charmlibs.rollingops._etcdctl as etcdctl
+import charmlibs.rollingops.etcd._certificates as certificates
+import charmlibs.rollingops.etcd._etcdctl as etcdctl
 from charmlibs.interfaces.tls_certificates import (
     Certificate,
     PrivateKey,
 )
 from charmlibs.pathops import LocalPath
-from charmlibs.rollingops._manager import EtcdRollingOpsManager
-from charmlibs.rollingops._models import SharedCertificate
-from charmlibs.rollingops._peer_manager import PeerRollingOpsManager
-from charmlibs.rollingops._peer_models import OperationResult
+from charmlibs.rollingops import RollingOpsManager
+from charmlibs.rollingops.common._models import OperationResult
+from charmlibs.rollingops.etcd._models import SharedCertificate
 
 VALID_CA_CERT_PEM = """-----BEGIN CERTIFICATE-----
       MIIC6DCCAdCgAwIBAgIUW42TU9LSjEZLMCclWrvSwAsgRtcwDQYJKoZIhvcNAQEL
@@ -136,7 +135,7 @@ def temp_etcdctl(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> types.Modul
 
 @pytest.fixture
 def etcdctl_patch() -> Generator[MagicMock, None, None]:
-    with patch('charmlibs.rollingops._certificates') as mock_etcdctl:
+    with patch('charmlibs.rollingops.etcd._certificates') as mock_etcdctl:
         yield mock_etcdctl
 
 
@@ -144,11 +143,11 @@ def etcdctl_patch() -> Generator[MagicMock, None, None]:
 def certificates_manager_patches() -> Generator[dict[str, MagicMock], None, None]:
     with (
         patch(
-            'charmlibs.rollingops._certificates._exists',
+            'charmlibs.rollingops.etcd._certificates._exists',
             return_value=False,
         ),
         patch(
-            'charmlibs.rollingops._certificates.generate',
+            'charmlibs.rollingops.etcd._certificates.generate',
             return_value=SharedCertificate(
                 certificate=Certificate.from_string(VALID_CLIENT_CERT_PEM),
                 key=PrivateKey.from_string(VALID_CLIENT_KEY_PEM),
@@ -156,7 +155,7 @@ def certificates_manager_patches() -> Generator[dict[str, MagicMock], None, None
             ),
         ) as mock_generate,
         patch(
-            'charmlibs.rollingops._certificates.persist_client_cert_key_and_ca',
+            'charmlibs.rollingops.etcd._certificates.persist_client_cert_key_and_ca',
             return_value=None,
         ) as mock_persit,
     ):
@@ -167,26 +166,6 @@ def certificates_manager_patches() -> Generator[dict[str, MagicMock], None, None
 
 
 class RollingOpsCharm(ops.CharmBase):
-    def __init__(self, framework: ops.Framework):
-        super().__init__(framework)
-
-        callback_targets = {
-            '_restart': self.restart,
-        }
-
-        self.restart_manager = EtcdRollingOpsManager(
-            charm=self,
-            peer_relation_name='restart',
-            etcd_relation_name='etcd',
-            cluster_id='cluster-12345',
-            callback_targets=callback_targets,
-        )
-
-    def restart(self) -> None:
-        pass
-
-
-class PeerRollingOpsCharm(ops.CharmBase):
     def __init__(self, framework: ops.Framework):
         super().__init__(framework)
 
@@ -196,9 +175,11 @@ def __init__(self, framework: ops.Framework):
             '_deferred_restart': self._deferred_restart,
         }
 
-        self.restart_manager = PeerRollingOpsManager(
+        self.restart_manager = RollingOpsManager(
             charm=self,
-            relation_name='restart',
+            peer_relation_name='restart',
+            etcd_relation_name='etcd',
+            cluster_id='cluster-12345',
             callback_targets=callback_targets,
         )
         self.framework.observe(self.on.restart_action, self._on_restart_action)
@@ -242,11 +223,6 @@ def charm_test() -> type[RollingOpsCharm]:
     return RollingOpsCharm
 
 
-@pytest.fixture
-def peer_charm_test() -> type[PeerRollingOpsCharm]:
-    return PeerRollingOpsCharm
-
-
 meta: dict[str, Any] = {
     'name': 'charm',
     'peers': {
@@ -305,9 +281,4 @@ def peer_charm_test() -> type[PeerRollingOpsCharm]:
 
 @pytest.fixture
 def ctx(charm_test: type[RollingOpsCharm]) -> Context[RollingOpsCharm]:
-    return Context(charm_test, meta=meta)
-
-
-@pytest.fixture
-def peer_ctx(peer_charm_test: type[PeerRollingOpsCharm]) -> Context[PeerRollingOpsCharm]:
-    return Context(peer_charm_test, meta=meta, actions=actions)
+    return Context(charm_test, meta=meta, actions=actions)
diff --git a/rollingops/tests/unit/test_common_models.py b/rollingops/tests/unit/test_common_models.py
new file mode 100644
index 000000000..fc4072ff6
--- /dev/null
+++ b/rollingops/tests/unit/test_common_models.py
@@ -0,0 +1,543 @@
+# Copyright 2026 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Learn more about testing at: https://juju.is/docs/sdk/testing
+
+import json
+from datetime import UTC, datetime
+from typing import Any
+
+import pytest
+
+from charmlibs.rollingops.common._exceptions import RollingOpsDecodingError
+from charmlibs.rollingops.common._models import (
+    Operation,
+    OperationQueue,
+    OperationResult,
+)
+
+
+def test_operation_create_sets_fields():
+    op = Operation.create('restart', {'b': 2, 'a': 1}, max_retry=3)
+
+    assert op.kwargs == {'b': 2, 'a': 1}
+    assert op.callback_id == 'restart'
+    assert op.max_retry == 3
+    assert isinstance(op.requested_at, datetime)
+
+
+def test_operation_to_string():
+    ts = datetime(2026, 2, 23, 12, 0, 0, 123456, tzinfo=UTC)
+    op = Operation(
+        callback_id='cb',
+        kwargs={'b': 2, 'a': 1},
+        requested_at=ts,
+        max_retry=None,
+        attempt=0,
+        result=None,
+    )
+
+    s = op.to_string()
+    expected = (
+        '{"callback_id":"cb",'
+        '"requested_at":"1771848000.123456",'
+        '"max_retry":null,'
+        '"attempt":0,'
+        '"result":null,'
+        '"kwargs":{"a":1,"b":2}}'
+    )
+
+    assert s == expected
+
+
+def test_operation_to_string_zero_max_retry():
+    ts = datetime(2026, 2, 23, 4, 0, 0, 123456, tzinfo=UTC)
+    op = Operation(
+        callback_id='cb',
+        kwargs={'b': 2, 'a': 1},
+        requested_at=ts,
+        max_retry=0,
+        attempt=0,
+        result=None,
+    )
+
+    s = op.to_string()
+    expected = (
+        '{"callback_id":"cb",'
+        '"requested_at":"1771819200.123456",'
+        '"max_retry":0,'
+        '"attempt":0,'
+        '"result":null,'
+        '"kwargs":{"a":1,"b":2}}'
+    )
+    assert s == expected
+
+
+def test_operation_to_string_none_max_retry():
+    ts = datetime(2026, 2, 23, 4, 0, 0, 123456, tzinfo=UTC)
+    op = Operation(
+        callback_id='cb',
+        kwargs={'b': 2, 'a': 1},
+        requested_at=ts,
+        max_retry=None,
+        attempt=0,
+        result=None,
+    )
+
+    s = op.to_string()
+    expected = (
+        '{"callback_id":"cb",'
+        '"requested_at":"1771819200.123456",'
+        '"max_retry":null,'
+        '"attempt":0,'
+        '"result":null,'
+        '"kwargs":{"a":1,"b":2}}'
+    )
+
+    assert s == expected
+
+
+def test_operation_is_max_retry_reached_on_zero_max_retry():
+    op = Operation.create('restart', {'a': 1, 'b': 2}, max_retry=0)
+    assert not op.is_max_retry_reached()
+    op.increase_attempt()
+    assert op.is_max_retry_reached()
+
+
+def test_operation_equality_and_hash_ignore_timestamp_and_max_retry():
+    # Equality only depends on (callback_id, kwargs)
+    op1 = Operation.create('restart', {'a': 1, 'b': 2}, max_retry=0)
+    op2 = Operation.create('restart', {'b': 2, 'a': 1}, max_retry=999)
+
+    assert op1 == op2
+    assert hash(op1) == hash(op2)
+
+    op3 = Operation.create('restart', {'a': 2}, max_retry=0)
+    assert op1 != op3
+
+
+def test_operation_equality_and_hash_empty_arguments():
+    # Equality only depends on (callback_id, kwargs)
+    op1 = Operation.create('restart', {}, max_retry=0)
+    op2 = Operation.create('restart', {}, max_retry=999)
+
+    assert op1 == op2
+    assert hash(op1) == hash(op2)
+
+    op3 = Operation.create('restart', {'a': 2}, max_retry=0)
+    assert op1 != op3
+
+
+def test_operation_to_string_and_from_string():
+    ts = datetime(2026, 2, 23, 12, 0, 0, 0, tzinfo=UTC)
+    op1 = Operation(
+        callback_id='cb',
+        kwargs={'x': 1, 'y': 'z'},
+        requested_at=ts,
+        max_retry=5,
+        attempt=0,
+        result=None,
+    )
+
+    s = op1.to_string()
+    op2 = Operation.from_string(s)
+
+    assert op2.callback_id == op1.callback_id
+    assert op2.kwargs == op1.kwargs
+    assert op2.requested_at == op1.requested_at
+    assert op2.max_retry == op1.max_retry
+    assert op2.attempt == op1.attempt
+
+
+def test_operation_from_string_valid_payload():
+    requested_at = datetime(2026, 3, 12, 10, 30, 45, 123456, tzinfo=UTC)
+    payload = json.dumps({
+        'callback_id': 'cb-123',
+        'kwargs': {'b': 2, 'a': 'x'},
+        'requested_at': '1773311445.123456',
+        'max_retry': '5',
+        'attempt': '2',
+    })
+
+    op = Operation.from_string(payload)
+
+    assert op is not None
+    assert op.callback_id == 'cb-123'
+    assert op.kwargs == {'b': 2, 'a': 'x'}
+    assert op.requested_at == requested_at
+    assert op.max_retry == 5
+    assert op.attempt == 2
+
+
+def test_from_string_valid_payload_with_empty_kwargs_and_no_max_retry():
+    requested_at = datetime(2026, 3, 12, 10, 30, 45, 123456, tzinfo=UTC)
+    payload = json.dumps({
+        'callback_id': 'cb-123',
+        'requested_at': '1773311445.123456',
+        'attempt': '0',
+    })
+
+    op = Operation.from_string(payload)
+
+    assert op is not None
+    assert op.callback_id == 'cb-123'
+    assert op.kwargs == {}
+    assert op.requested_at == requested_at
+    assert op.max_retry is None
+    assert op.attempt == 0
+
+
+def test_from_string_valid_payload_with_empty_kwargs_and_0_max_retry():
+    requested_at = datetime(2026, 3, 12, 10, 30, 45, 123456, tzinfo=UTC)
+    payload = json.dumps({
+        'callback_id': 'cb-123',
+        'kwargs': {},
+        'requested_at': '1773311445.123456',
+        'max_retry': '0',
+        'attempt': '0',
+    })
+
+    op = Operation.from_string(payload)
+
+    assert op is not None
+    assert op.callback_id == 'cb-123'
+    assert op.kwargs == {}
+    assert op.requested_at == requested_at
+    assert op.max_retry == 0
+    assert op.attempt == 0
+
+
+@pytest.mark.parametrize(
+    'payload',
+    [
+        '{not valid json',
+        json.dumps(  # invalid requested_at
+            {
+                'callback_id': 'cb-123',
+                'kwargs': {'x': 1},
+                'requested_at': 'bad-ts',
+                'max_retry': '3',
+                'attempt': '1',
+            }
+        ),
+        json.dumps(  # invalid kwargs
+            {
+                'callback_id': 'cb-123',
+                'kwargs': '{bad kwargs json',
+                'requested_at': '1773311445.123456',
+                'max_retry': '3',
+                'attempt': '1',
+            }
+        ),
+        json.dumps(  # missing callback_id
+            {
+                'kwargs': {'x': 1},
+                'requested_at': '1773311445.123456',
+                'max_retry': '3',
+                'attempt': '1',
+            }
+        ),
+        json.dumps(  # invalid kwargs
+            {
+                'callback_id': 'cb-123',
+                'kwargs': '[]',
+                'requested_at': '1773311445.123456',
+                'max_retry': '3',
+                'attempt': '1',
+            }
+        ),
+        json.dumps(  # missing requested_at
+            {
+                'callback_id': 'cb-123',
+                'kwargs': {},
+                'requested_at': '',
+                'max_retry': '3',
+                'attempt': '1',
+            }
+        ),
+        json.dumps(  # result
+            {
+                'callback_id': 'cb-123',
+                'kwargs': {},
+                'requested_at': 'bad-ts',
+                'max_retry': '3',
+                'attempt': '1',
+                'result': 'something',
+            }
+        ),
+    ],
+)
+def test_operation_from_string_invalid_inputs_return_none(payload: Any):
+    with pytest.raises(RollingOpsDecodingError, match='Failed to deserialize'):
+        Operation.from_string(payload)
+
+
+def test_op_id_returns_timestamp_and_callback_id() -> None:
+    requested_at = datetime(2025, 1, 2, 3, 4, 5)
+    operation = Operation(
+        callback_id='restart',
+        kwargs={'delay': 2},
+        requested_at=requested_at,
+        max_retry=3,
+        attempt=0,
+        result=None,
+    )
+
+    assert operation.op_id == f'{requested_at.timestamp()}-restart'
+
+
+def test_complete_increments_attempt_and_sets_release() -> None:
+    operation = Operation(
+        callback_id='restart',
+        kwargs={},
+        requested_at=datetime(2025, 1, 1, 0, 0, 0),
+        max_retry=3,
+        attempt=0,
+        result=None,
+    )
+
+    operation.complete()
+
+    assert operation.attempt == 1
+    assert operation.result == OperationResult.RELEASE
+
+
+def test_retry_hold_sets_retry_hold_when_max_retry_not_reached() -> None:
+    operation = Operation(
+        callback_id='restart',
+        kwargs={},
+        requested_at=datetime(2025, 1, 1, 0, 0, 0),
+        max_retry=3,
+        attempt=0,
+        result=None,
+    )
+
+    operation.retry_hold()
+
+    assert operation.attempt == 1
+    assert operation.result == OperationResult.RETRY_HOLD
+
+
+def test_retry_hold_sets_release_when_max_retry_reached() -> None:
+    operation = Operation(
+        callback_id='restart',
+        kwargs={},
+        requested_at=datetime(2025, 1, 1, 0, 0, 0),
+        max_retry=0,
+        attempt=0,
+        result=None,
+    )
+
+    operation.retry_hold()
+
+    assert operation.attempt == 1
+    assert operation.result == OperationResult.RELEASE
+
+
+def test_retry_release_sets_retry_release_when_max_retry_not_reached() -> None:
+    operation = Operation(
+        callback_id='restart',
+        kwargs={},
+        requested_at=datetime(2025, 1, 1, 0, 0, 0),
+        max_retry=3,
+        attempt=0,
+        result=None,
+    )
+
+    operation.retry_release()
+
+    assert operation.attempt == 1
+    assert operation.result == OperationResult.RETRY_RELEASE
+
+
+def test_retry_release_sets_release_when_max_retry_reached() -> None:
+    operation = Operation(
+        callback_id='restart',
+        kwargs={},
+        requested_at=datetime(2025, 1, 1, 0, 0, 0),
+        max_retry=0,
+        attempt=0,
+        result=None,
+    )
+
+    operation.retry_release()
+
+    assert operation.attempt == 1
+    assert operation.result == OperationResult.RELEASE
+
+
+def test_retry_hold_with_no_max_retry_sets_retry_hold() -> None:
+    operation = Operation(
+        callback_id='restart',
+        kwargs={},
+        requested_at=datetime(2025, 1, 1, 0, 0, 0),
+        max_retry=None,
+        attempt=5,
+        result=None,
+    )
+
+    operation.retry_hold()
+
+    assert operation.attempt == 6
+    assert operation.result == OperationResult.RETRY_HOLD
+
+
+def test_retry_release_with_no_max_retry_sets_retry_release() -> None:
+    operation = Operation(
+        callback_id='restart',
+        kwargs={},
+        requested_at=datetime(2025, 1, 1, 0, 0, 0),
+        max_retry=None,
+        attempt=5,
+        result=None,
+    )
+
+    operation.retry_release()
+
+    assert operation.attempt == 6
+    assert operation.result == OperationResult.RETRY_RELEASE
+
+
+def test_queue_empty_behaviour():
+    q = OperationQueue()
+
+    assert len(q) == 0
+    assert q.empty is True
+    assert q.peek() is None
+    assert q.dequeue() is None
+
+    assert q.to_string() == '[]'
+
+
+def test_queue_enqueue_and_fifo_order():
+    q = OperationQueue()
+    op1 = Operation.create('a', {'x': 2})
+    op2 = Operation.create('b', {'i': 2})
+    q.enqueue(op1)
+    q.enqueue(op2)
+
+    assert len(q) == 2
+    op = q.peek()
+    assert op is not None
+    assert op == op1
+
+    first = q.dequeue()
+    assert first is not None
+    assert first == op1
+    assert len(q) == 1
+    op = q.peek()
+    assert op is not None
+    assert op == op2
+
+    second = q.dequeue()
+    assert second is not None
+    assert second == op2
+    assert q.empty is True
+
+
+def test_queue_deduplicates_only_against_last_item():
+    q = OperationQueue()
+    op1 = Operation.create('a', {'x': 2})
+    op2 = Operation.create('a', {'x': 2})
+    op3 = Operation.create('a', {'x': 4})
+
+    q.enqueue(op1)
+    assert len(q) == 1
+
+    q.enqueue(op2)
+    assert len(q) == 1
+
+    q.enqueue(op3)
+    assert len(q) == 2
+
+    q.enqueue(op2)
+    assert len(q) == 3
+
+
+def test_queue_to_string_and_from_string():
+    q1 = OperationQueue()
+    ts1 = datetime(2026, 2, 23, 12, 0, 0, 123456, tzinfo=UTC)
+    op1 = Operation(
+        callback_id='a',
+        kwargs={'x': 1},
+        requested_at=ts1,
+        max_retry=5,
+        attempt=0,
+        result=None,
+    )
+    ts2 = datetime(2026, 2, 20, 12, 0, 0, 123456, tzinfo=UTC)
+    op2 = Operation(
+        callback_id='b',
+        kwargs={'y': 'z'},
+        requested_at=ts2,
+        max_retry=None,
+        attempt=0,
+        result=None,
+    )
+    q1.enqueue(op1)
+    q1.enqueue(op2)
+
+    encoded = q1.to_string()
+    expected = (
+        '[{"callback_id":"a",'
+        '"requested_at":"1771848000.123456",'
+        '"max_retry":5,'
+        '"attempt":0,'
+        '"result":null,'
+        '"kwargs":{"x":1}},'
+        '{"callback_id":"b",'
+        '"requested_at":"1771588800.123456",'
+        '"max_retry":null,'
+        '"attempt":0,'
+        '"result":null,'
+        '"kwargs":{"y":"z"}}]'
+    )
+
+    assert encoded == expected
+
+    q2 = OperationQueue.from_string(encoded)
+
+    assert len(q2) == 2
+    op = q2.peek()
+    assert op is not None
+    assert op == op1
+
+    op = q2.dequeue()
+    assert op is not None
+    assert op == op1
+
+    op = q2.dequeue()
+    assert op is not None
+    assert op == op2
+    assert q2.empty
+
+
+def test_queue_from_string_empty_string_is_empty_queue():
+    q = OperationQueue.from_string('')
+    assert q.empty
+    assert q.peek() is None
+
+
+def test_queue_from_string_rejects_non_list_json():
+    with pytest.raises(
+        RollingOpsDecodingError, match='Failed to deserialize data to create an OperationQueue'
+    ):
+        OperationQueue.from_string('{"not": "a list"}')
+
+
+def test_queue_from_string_rejects_invalid_json():
+    with pytest.raises(
+        RollingOpsDecodingError, match='Failed to deserialize data to create an OperationQueue'
+    ):
+        OperationQueue.from_string('{invalid')
diff --git a/rollingops/tests/unit/test_certificates.py b/rollingops/tests/unit/test_etcd_certificates.py
similarity index 95%
rename from rollingops/tests/unit/test_certificates.py
rename to rollingops/tests/unit/test_etcd_certificates.py
index 19dd62853..4984bb6db 100644
--- a/rollingops/tests/unit/test_certificates.py
+++ b/rollingops/tests/unit/test_etcd_certificates.py
@@ -22,7 +22,7 @@
     Certificate,
     PrivateKey,
 )
-from charmlibs.rollingops._models import SharedCertificate
+from charmlibs.rollingops.etcd._models import SharedCertificate
 
 
 def make_shared_certificate() -> SharedCertificate:
@@ -33,7 +33,7 @@ def make_shared_certificate() -> SharedCertificate:
     )
 
 
-def test_certs():
+def test_make_shared_certificate_is_valid():
     Certificate.from_string(VALID_CA_CERT_PEM)
     PrivateKey.from_string(VALID_CLIENT_KEY_PEM)
     Certificate.from_string(VALID_CLIENT_CERT_PEM)
@@ -123,7 +123,7 @@ def test_certificates_manager_generate_does_nothing_when_files_already_exist(
     temp_certificates.CA_CERT_PATH.write_text(VALID_CA_CERT_PEM)
     old_certificates = make_shared_certificate()
 
-    new_certificates = temp_certificates.generate(common_name='unit-1')
+    new_certificates = temp_certificates.generate(model_uuid='model', app_name='unit-1')
 
     written = SharedCertificate.from_strings(
         certificate=temp_certificates.CLIENT_CERT_PATH.read_text(),
@@ -138,7 +138,7 @@ def test_certificates_manager_generate_does_nothing_when_files_already_exist(
 def test_certificates_manager_generate_creates_all_files(
     temp_certificates: Any,
 ) -> None:
-    shared = temp_certificates.generate(common_name='unit-1')
+    shared = temp_certificates.generate(model_uuid='model', app_name='unit-1')
     assert temp_certificates._exists() is True
 
     assert temp_certificates.CA_CERT_PATH.read_text().startswith('-----BEGIN CERTIFICATE-----')
diff --git a/rollingops/tests/unit/test_etcdctl.py b/rollingops/tests/unit/test_etcd_etcdctl.py
similarity index 97%
rename from rollingops/tests/unit/test_etcdctl.py
rename to rollingops/tests/unit/test_etcd_etcdctl.py
index 051103f2f..26497fa1e 100644
--- a/rollingops/tests/unit/test_etcdctl.py
+++ b/rollingops/tests/unit/test_etcd_etcdctl.py
@@ -21,7 +21,7 @@
 import pytest
 
 from charmlibs.pathops import LocalPath
-from charmlibs.rollingops import RollingOpsEtcdNotConfiguredError
+from charmlibs.rollingops.common._exceptions import RollingOpsEtcdNotConfiguredError
 
 
 def test_etcdctl_write_env(temp_etcdctl: Any) -> None:
diff --git a/rollingops/tests/unit/test_models.py b/rollingops/tests/unit/test_etcd_models.py
similarity index 96%
rename from rollingops/tests/unit/test_models.py
rename to rollingops/tests/unit/test_etcd_models.py
index 2820dfea0..e20ce39b5 100644
--- a/rollingops/tests/unit/test_models.py
+++ b/rollingops/tests/unit/test_etcd_models.py
@@ -15,7 +15,7 @@
 # Learn more about testing at: https://juju.is/docs/sdk/testing
 
 
-from charmlibs.rollingops._models import RollingOpsKeys
+from charmlibs.rollingops.etcd._models import RollingOpsKeys
 
 
 def test_rollingopskeys_paths() -> None:
diff --git a/rollingops/tests/unit/test_etcd_rollingops_in_charm.py b/rollingops/tests/unit/test_etcd_rollingops_in_charm.py
index de2d0dd56..e668412fd 100644
--- a/rollingops/tests/unit/test_etcd_rollingops_in_charm.py
+++ b/rollingops/tests/unit/test_etcd_rollingops_in_charm.py
@@ -14,10 +14,11 @@
 #
 # Learn more about testing at: https://juju.is/docs/sdk/testing
 
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch
 
 import pytest
 from ops.testing import Context, PeerRelation, Secret, State
+from scenario import RawDataBagContents
 from scenario.errors import UncaughtCharmError
 from tests.unit.conftest import (
     VALID_CA_CERT_PEM,
@@ -30,10 +31,22 @@
     Certificate,
     PrivateKey,
 )
-from charmlibs.rollingops._models import RollingOpsInvalidSecretContentError, SharedCertificate
-from charmlibs.rollingops._relations import (
-    CERT_SECRET_FIELD,
+from charmlibs.rollingops.common._exceptions import (
+    RollingOpsInvalidSecretContentError,
 )
+from charmlibs.rollingops.common._models import (
+    Operation,
+    OperationQueue,
+    ProcessingBackend,
+    RollingOpsStatus,
+)
+from charmlibs.rollingops.etcd._models import SharedCertificate
+from charmlibs.rollingops.etcd._relations import CERT_SECRET_FIELD
+from charmlibs.rollingops.peer._models import LockIntent
+
+
+def _unit_databag(state: State, peer: PeerRelation) -> RawDataBagContents:
+    return state.get_relation(peer.id).local_unit_data
 
 
 def test_leader_elected_creates_shared_secret_and_stores_id(
@@ -145,3 +158,186 @@ def test_invalid_certificate_secret_content_raises(
     with pytest.raises(UncaughtCharmError) as exc_info:
         ctx.run(ctx.on.relation_changed(peer_relation, remote_unit=1), state_in)
         assert isinstance(exc_info.value.__cause__, RollingOpsInvalidSecretContentError)
+
+
+def test_on_restart_action_lock_fallbacks_to_peer(
+    ctx: Context[RollingOpsCharm],
+):
+    peer = PeerRelation(endpoint='restart')
+    state_in = State(leader=False, relations={peer})
+
+    state_out = ctx.run(
+        ctx.on.action('restart', params={'delay': 10}),
+        state_in,
+    )
+
+    databag = _unit_databag(state_out, peer)
+    assert databag['state'] == LockIntent.REQUEST
+    assert databag['operations']
+    assert databag['processing_backend'] == ProcessingBackend.PEER
+    assert databag['etcd_cleanup_needed'] == 'true'
+
+    q = OperationQueue.from_string(databag['operations'])
+    assert len(q) == 1
+    operation = q.peek()
+    assert operation is not None
+    assert operation.callback_id == '_restart'
+    assert operation.kwargs == {'delay': 10}
+    assert operation.max_retry is None
+    assert operation.requested_at is not None
+
+
+def test_state_not_initialized(ctx: Context[RollingOpsCharm]):
+    state = State(leader=True)
+
+    with ctx(ctx.on.start(), state) as mgr:
+        rolling_state = mgr.charm.restart_manager.state
+        assert rolling_state.status == RollingOpsStatus.UNAVAILABLE
+        assert rolling_state.processing_backend == ProcessingBackend.PEER
+        assert len(rolling_state.operations) == 0
+
+
+def test_state_peer_idle(ctx: Context[RollingOpsCharm]):
+    peer_rel = PeerRelation(
+        endpoint='restart',
+        local_unit_data={
+            'state': '',
+            'operations': '',
+            'executed_at': '',
+            'processing_backend': 'peer',
+            'etcd_cleanup_needed': 'false',
+        },
+    )
+    state = State(leader=False, relations={peer_rel})
+
+    with ctx(ctx.on.update_status(), state) as mgr:
+        rolling_state = mgr.charm.restart_manager.state
+        assert rolling_state.status == RollingOpsStatus.IDLE
+        assert rolling_state.processing_backend == ProcessingBackend.PEER
+        assert len(rolling_state.operations) == 0
+
+
+def test_state_peer_waiting(ctx: Context[RollingOpsCharm]):
+    peer_rel = PeerRelation(
+        endpoint='restart',
+        local_unit_data={
+            'state': 'request',
+            'operations': OperationQueue([
+                Operation.create('restart', {'delay': 1}, max_retry=2)
+            ]).to_string(),
+            'executed_at': '',
+            'processing_backend': 'peer',
+            'etcd_cleanup_needed': 'false',
+        },
+    )
+    state = State(leader=False, relations={peer_rel})
+
+    with ctx(ctx.on.update_status(), state) as mgr:
+        rolling_state = mgr.charm.restart_manager.state
+        assert rolling_state.status == RollingOpsStatus.WAITING
+        assert rolling_state.processing_backend == ProcessingBackend.PEER
+        assert len(rolling_state.operations) == 1
+
+
+def test_state_peer_is_granted(ctx: Context[RollingOpsCharm]):
+    peer_rel = PeerRelation(
+        endpoint='restart',
+        local_app_data={
+            'granted_unit': f'{ctx.app_name}/0',
+        },
+        local_unit_data={
+            'state': 'retry-release',
+            'operations': OperationQueue([
+                Operation.create('restart', {'delay': 1}, max_retry=2)
+            ]).to_string(),
+            'executed_at': '2026-04-09T10:01:00+00:00',
+            'processing_backend': 'peer',
+            'etcd_cleanup_needed': 'false',
+        },
+    )
+    state = State(leader=False, relations={peer_rel})
+
+    with ctx(ctx.on.update_status(), state) as mgr:
+        rolling_state = mgr.charm.restart_manager.state
+        assert rolling_state.status == RollingOpsStatus.GRANTED
+        assert rolling_state.processing_backend == ProcessingBackend.PEER
+        assert len(rolling_state.operations) == 1
+
+
+def test_state_peer_waiting_retry(ctx: Context[RollingOpsCharm]):
+    peer_rel = PeerRelation(
+        endpoint='restart',
+        local_app_data={
+            'granted_unit': 'myapp/0',
+        },
+        local_unit_data={
+            'state': 'retry-release',
+            'operations': OperationQueue([
+                Operation.create('restart', {'delay': 1}, max_retry=2)
+            ]).to_string(),
+            'executed_at': '2026-04-09T10:01:00+00:00',
+            'processing_backend': 'peer',
+            'etcd_cleanup_needed': 'false',
+        },
+    )
+    state = State(leader=False, relations={peer_rel})
+
+    with ctx(ctx.on.update_status(), state) as mgr:
+        rolling_state = mgr.charm.restart_manager.state
+        assert rolling_state.status == RollingOpsStatus.WAITING
+        assert rolling_state.processing_backend == ProcessingBackend.PEER
+        assert len(rolling_state.operations) == 1
+
+
+def test_state_etcd_status(ctx: Context[RollingOpsCharm]):
+    peer_rel = PeerRelation(
+        endpoint='restart',
+        interface='rollingops',
+        local_app_data={},
+        local_unit_data={
+            'state': '',
+            'operations': OperationQueue([
+                Operation.create('restart', {'delay': 1}, max_retry=2)
+            ]).to_string(),
+            'executed_at': '',
+            'processing_backend': 'etcd',
+            'etcd_cleanup_needed': 'false',
+        },
+    )
+    state = State(leader=False, relations={peer_rel})
+
+    with patch(
+        'charmlibs.rollingops.etcd._backend.EtcdRollingOpsBackend.get_status',
+        return_value=RollingOpsStatus.GRANTED,
+    ):
+        with ctx(ctx.on.update_status(), state) as mgr:
+            rolling_state = mgr.charm.restart_manager.state
+            assert rolling_state.status == RollingOpsStatus.GRANTED
+            assert rolling_state.processing_backend == ProcessingBackend.ETCD
+            assert len(rolling_state.operations) == 1
+
+
+def test_state_falls_back_to_peer_if_etcd_status_fails(ctx: Context[RollingOpsCharm]):
+    peer_rel = PeerRelation(
+        endpoint='restart',
+        interface='rollingops',
+        local_app_data={},
+        local_unit_data={
+            'state': 'request',
+            'operations': OperationQueue([Operation.create('restart', {'delay': 1})]).to_string(),
+            'executed_at': '',
+            'processing_backend': 'etcd',
+            'etcd_cleanup_needed': 'false',
+        },
+    )
+    state = State(leader=False, relations={peer_rel})
+
+    with patch(
+        'charmlibs.rollingops._rollingops_manager.EtcdRollingOpsBackend.get_status',
+        return_value=RollingOpsStatus.UNAVAILABLE,
+    ):
+        with ctx(ctx.on.update_status(), state) as mgr:
+            rolling_state = mgr.charm.restart_manager.state
+            assert rolling_state.status == RollingOpsStatus.WAITING
+            assert rolling_state.processing_backend == ProcessingBackend.PEER
+            assert len(rolling_state.operations) == 1
diff --git a/rollingops/tests/unit/test_peer_models.py b/rollingops/tests/unit/test_peer_models.py
deleted file mode 100644
index 201f2a1b5..000000000
--- a/rollingops/tests/unit/test_peer_models.py
+++ /dev/null
@@ -1,342 +0,0 @@
-# Copyright 2026 Canonical Ltd.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Learn more about testing at: https://juju.is/docs/sdk/testing
-
-import json
-from datetime import UTC, datetime
-from typing import Any
-
-import pytest
-
-from charmlibs.rollingops._peer_models import (
-    Operation,
-    OperationQueue,
-    RollingOpsDecodingError,
-)
-
-
-def _decode_queue_string(queue_str: str) -> list[dict[str, str]]:
-    """Helper: decode OperationQueue.to_string() -> list of dicts."""
-    items = json.loads(queue_str)
-    assert isinstance(items, list)
-    return [json.loads(s) for s in items]  # type: ignore[reportUnknownArgumentType]
-
-
-def test_operation_create_sets_fields():
-    op = Operation.create('restart', {'b': 2, 'a': 1}, max_retry=3)
-
-    assert op.kwargs == {'b': 2, 'a': 1}
-    assert op.callback_id == 'restart'
-    assert op.max_retry == 3
-    assert isinstance(op.requested_at, datetime)
-
-
-def test_operation_to_string_contains_string_values_only():
-    ts = datetime(2026, 2, 23, 12, 0, 0, 123456, tzinfo=UTC)
-    op = Operation(
-        callback_id='cb', kwargs={'b': 2, 'a': 1}, requested_at=ts, max_retry=None, attempt=0
-    )
-
-    s = op.to_string()
-    obj = json.loads(s)
-
-    assert obj['callback_id'] == 'cb'
-    assert obj['kwargs'] == '{"a":1,"b":2}'
-    assert obj['requested_at'] == ts.isoformat()
-    assert obj.get('max_retry', '') == ''
-
-
-def test_operation_to_string_contains_string_values_only_zero_max_retry():
-    ts = datetime(2026, 2, 23, 12, 0, 0, 123456, tzinfo=UTC)
-    op = Operation(
-        callback_id='cb', kwargs={'b': 2, 'a': 1}, requested_at=ts, max_retry=0, attempt=0
-    )
-
-    s = op.to_string()
-    obj = json.loads(s)
-
-    assert obj['callback_id'] == 'cb'
-    assert obj['kwargs'] == '{"a":1,"b":2}'
-    assert obj['requested_at'] == ts.isoformat()
-    assert obj.get('max_retry', '') == '0'
-
-
-def test_operation_is_max_retry_reached_on_zero_max_retry():
-    op = Operation.create('restart', {'a': 1, 'b': 2}, max_retry=0)
-    assert not op.is_max_retry_reached()
-    op.increase_attempt()
-    assert op.is_max_retry_reached()
-
-
-def test_operation_equality_and_hash_ignore_timestamp_and_max_retry():
-    # Equality only depends on (callback_id, kwargs)
-    op1 = Operation.create('restart', {'a': 1, 'b': 2}, max_retry=0)
-    op2 = Operation.create('restart', {'b': 2, 'a': 1}, max_retry=999)
-
-    assert op1 == op2
-    assert hash(op1) == hash(op2)
-
-    op3 = Operation.create('restart', {'a': 2}, max_retry=0)
-    assert op1 != op3
-
-
-def test_operation_equality_and_hash_empty_arguments():
-    # Equality only depends on (callback_id, kwargs)
-    op1 = Operation.create('restart', {}, max_retry=0)
-    op2 = Operation.create('restart', {}, max_retry=999)
-
-    assert op1 == op2
-    assert hash(op1) == hash(op2)
-
-    op3 = Operation.create('restart', {'a': 2}, max_retry=0)
-    assert op1 != op3
-
-
-def test_operation_to_string_and_from_string():
-    ts = datetime(2026, 2, 23, 12, 0, 0, 0, tzinfo=UTC)
-    op1 = Operation(
-        callback_id='cb', kwargs={'x': 1, 'y': 'z'}, requested_at=ts, max_retry=5, attempt=0
-    )
-
-    s = op1.to_string()
-    op2 = Operation.from_string(s)
-
-    assert op2.callback_id == op1.callback_id
-    assert op2.kwargs == op1.kwargs
-    assert op2.requested_at == op1.requested_at
-    assert op2.max_retry == op1.max_retry
-    assert op2.attempt == op1.attempt
-
-
-def test_operation_from_string_valid_payload():
-    requested_at = datetime(2026, 3, 12, 10, 30, 45, 123456, tzinfo=UTC)
-    payload = json.dumps({
-        'callback_id': 'cb-123',
-        'kwargs': json.dumps({'b': 2, 'a': 'x'}),
-        'requested_at': requested_at.isoformat(),
-        'max_retry': '5',
-        'attempt': '2',
-    })
-
-    op = Operation.from_string(payload)
-
-    assert op is not None
-    assert op.callback_id == 'cb-123'
-    assert op.kwargs == {'b': 2, 'a': 'x'}
-    assert op.requested_at == requested_at
-    assert op.max_retry == 5
-    assert op.attempt == 2
-
-
-def test_from_string_valid_payload_with_empty_kwargs_and_no_max_retry():
-    requested_at = datetime(2026, 3, 12, 10, 30, 45, 123456, tzinfo=UTC)
-    payload = json.dumps({
-        'callback_id': 'cb-123',
-        'kwargs': '',
-        'requested_at': requested_at.isoformat(),
-        'max_retry': '',
-        'attempt': '0',
-    })
-
-    op = Operation.from_string(payload)
-
-    assert op is not None
-    assert op.callback_id == 'cb-123'
-    assert op.kwargs == {}
-    assert op.requested_at == requested_at
-    assert op.max_retry is None
-    assert op.attempt == 0
-
-
-def test_from_string_valid_payload_with_empty_kwargs_and_0_max_retry():
-    requested_at = datetime(2026, 3, 12, 10, 30, 45, 123456, tzinfo=UTC)
-    payload = json.dumps({
-        'callback_id': 'cb-123',
-        'kwargs': '{}',
-        'requested_at': requested_at.isoformat(),
-        'max_retry': '0',
-        'attempt': '0',
-    })
-
-    op = Operation.from_string(payload)
-
-    assert op is not None
-    assert op.callback_id == 'cb-123'
-    assert op.kwargs == {}
-    assert op.requested_at == requested_at
-    assert op.max_retry == 0
-    assert op.attempt == 0
-
-
-@pytest.mark.parametrize(
-    'payload',
-    [
-        '{not valid json',
-        json.dumps(  # invalid requested_at
-            {
-                'callback_id': 'cb-123',
-                'kwargs': json.dumps({'x': 1}),
-                'requested_at': 'bad-ts',
-                'max_retry': '3',
-                'attempt': '1',
-            }
-        ),
-        json.dumps(  # invalid kwargs
-            {
-                'callback_id': 'cb-123',
-                'kwargs': '{bad kwargs json',
-                'requested_at': datetime.now(UTC).isoformat(),
-                'max_retry': '3',
-                'attempt': '1',
-            }
-        ),
-        json.dumps(  # missing callback_id
-            {
-                'kwargs': json.dumps({'x': 1}),
-                'requested_at': datetime.now(UTC).isoformat(),
-                'max_retry': '3',
-                'attempt': '1',
-            }
-        ),
-        json.dumps(  # invalid kwargs
-            {
-                'callback_id': 'cb-123',
-                'kwargs': '[]',
-                'requested_at': datetime.now(UTC).isoformat(),
-                'max_retry': '3',
-                'attempt': '1',
-            }
-        ),
-        json.dumps(  # missing requested_at
-            {
-                'callback_id': 'cb-123',
-                'kwargs': '{}',
-                'requested_at': '',
-                'max_retry': '3',
-                'attempt': '1',
-            }
-        ),
-    ],
-)
-def test_operation_from_string_invalid_inputs_return_none(payload: Any):
-    with pytest.raises(RollingOpsDecodingError, match='Failed to deserialize'):
-        Operation.from_string(payload)
-
-
-def test_queue_empty_behaviour():
-    q = OperationQueue()
-
-    assert len(q) == 0
-    assert q.empty is True
-    assert q.peek() is None
-    assert q.dequeue() is None
-
-    assert json.loads(q.to_string()) == []
-
-
-def test_queue_enqueue_and_fifo_order():
-    q = OperationQueue()
-    q.enqueue_lock_request('a', {'i': 1})
-    q.enqueue_lock_request('b', {'i': 2})
-
-    assert len(q) == 2
-    op = q.peek()
-    assert op is not None
-    assert op.callback_id == 'a'
-
-    first = q.dequeue()
-    assert first is not None
-    assert first.callback_id == 'a'
-    assert len(q) == 1
-    op = q.peek()
-    assert op is not None
-    assert op.callback_id == 'b'
-
-    second = q.dequeue()
-    assert second is not None
-    assert second.callback_id == 'b'
-    assert q.empty is True
-
-
-def test_queue_deduplicates_only_against_last_item():
-    q = OperationQueue()
-
-    q.enqueue_lock_request('restart', {'x': 1})
-    assert len(q) == 1
-
-    q.enqueue_lock_request('restart', {'x': 1})
-    assert len(q) == 1
-
-    q.enqueue_lock_request('restart', {'x': 2})
-    assert len(q) == 2
-
-    q.enqueue_lock_request('restart', {'x': 1})
-    assert len(q) == 3
-
-
-def test_queue_to_string_and_from_string():
-    q1 = OperationQueue()
-    q1.enqueue_lock_request('a', {'x': 1}, max_retry=5)
-    q1.enqueue_lock_request('b', {'y': 'z'}, max_retry=None)
-
-    encoded = q1.to_string()
-    q2 = OperationQueue.from_string(encoded)
-
-    assert len(q2) == 2
-    op = q2.peek()
-    assert op is not None
-    assert op.callback_id == 'a'
-
-    op = q2.dequeue()
-    assert op is not None
-    assert op.callback_id == 'a'
-
-    op = q2.dequeue()
-    assert op is not None
-    assert op.callback_id == 'b'
-    assert q2.empty
-
-
-def test_queue_from_string_empty_string_is_empty_queue():
-    q = OperationQueue.from_string('')
-    assert q.empty
-    assert q.peek() is None
-
-
-def test_queue_from_string_rejects_non_list_json():
-    with pytest.raises(RollingOpsDecodingError, match='OperationQueue string'):
-        OperationQueue.from_string(json.dumps({'not': 'a list'}))
-
-
-def test_queue_from_string_rejects_invalid_jason():
-    with pytest.raises(RollingOpsDecodingError, match='Failed to deserialize data'):
-        OperationQueue.from_string('{invalid')
-
-
-def test_queue_encoding_is_list_of_operation_strings():
-    q = OperationQueue()
-    q.enqueue_lock_request('a', {'x': 1})
-    s = q.to_string()
-
-    decoded = json.loads(s)
-    assert isinstance(decoded, list)
-    assert len(decoded) == 1  # type: ignore[reportUnknownArgumentType]
-    assert isinstance(decoded[0], str)
-
-    op_dicts = _decode_queue_string(s)
-    assert op_dicts[0]['callback_id'] == 'a'
-    assert op_dicts[0]['kwargs'] == '{"x":1}'
-    assert op_dicts[0].get('max_retry', '') == ''
-    assert 'requested_at' in op_dicts[0]
diff --git a/rollingops/tests/unit/test_peer_rollingops_in_charm.py b/rollingops/tests/unit/test_peer_rollingops_in_charm.py
index fab338e19..11389de47 100644
--- a/rollingops/tests/unit/test_peer_rollingops_in_charm.py
+++ b/rollingops/tests/unit/test_peer_rollingops_in_charm.py
@@ -15,22 +15,18 @@
 # Learn more about testing at: https://juju.is/docs/sdk/testing
 
 
-import logging
 from typing import Any
+from unittest.mock import MagicMock
 
 import pytest
 from ops.testing import Context, PeerRelation, State
 from scenario import RawDataBagContents
-from tests.unit.conftest import PeerRollingOpsCharm
+from tests.unit.conftest import RollingOpsCharm
 
-from charmlibs.rollingops._peer_models import (
-    LockIntent,
-    OperationQueue,
-    RollingOpsInvalidLockRequestError,
-    _now_timestamp_str,
-)
-
-logger = logging.getLogger(__name__)
+from charmlibs.rollingops.common._exceptions import RollingOpsInvalidLockRequestError
+from charmlibs.rollingops.common._models import Operation, OperationQueue
+from charmlibs.rollingops.common._utils import now_timestamp
+from charmlibs.rollingops.peer._models import LockIntent
 
 
 def _unit_databag(state: State, peer: PeerRelation) -> RawDataBagContents:
@@ -45,18 +41,19 @@ def _make_operation_queue(
     callback_id: str, kwargs: dict[str, Any], max_retry: int | None
 ) -> OperationQueue:
     q = OperationQueue()
-    q.enqueue_lock_request(callback_id=callback_id, kwargs=kwargs, max_retry=max_retry)
+    op1 = Operation.create(callback_id=callback_id, kwargs=kwargs, max_retry=max_retry)
+    q.enqueue(op1)
     return q
 
 
 def test_lock_request_enqueues_and_sets_request(
-    peer_ctx: Context[PeerRollingOpsCharm],
+    ctx: Context[RollingOpsCharm],
 ):
     peer = PeerRelation(endpoint='restart')
     state_in = State(leader=False, relations={peer})
 
-    state_out = peer_ctx.run(
-        peer_ctx.on.action('restart', params={'delay': 10}),
+    state_out = ctx.run(
+        ctx.on.action('restart', params={'delay': 10}),
         state_in,
     )
 
@@ -79,14 +76,13 @@ def test_lock_request_enqueues_and_sets_request(
     [
         (-5),
         (-1),
-        ('3'),
     ],
 )
-def test_lock_request_invalid_inputs(peer_ctx: Context[PeerRollingOpsCharm], max_retry: Any):
+def test_lock_request_invalid_inputs(ctx: Context[RollingOpsCharm], max_retry: Any):
     peer = PeerRelation(endpoint='restart')
     state_in = State(leader=False, relations={peer})
 
-    with peer_ctx(peer_ctx.on.update_status(), state_in) as mgr:
+    with ctx(ctx.on.update_status(), state_in) as mgr:
         with pytest.raises(RollingOpsInvalidLockRequestError):
             mgr.charm.restart_manager.request_async_lock(
                 callback_id='_restart',
@@ -103,13 +99,11 @@ def test_lock_request_invalid_inputs(peer_ctx: Context[PeerRollingOpsCharm], max
         ('unknown',),
     ],
 )
-def test_lock_request_invalid_callback_id(
-    peer_ctx: Context[PeerRollingOpsCharm], callback_id: str
-):
+def test_lock_request_invalid_callback_id(ctx: Context[RollingOpsCharm], callback_id: str):
     peer = PeerRelation(endpoint='restart')
     state_in = State(leader=False, relations={peer})
 
-    with peer_ctx(peer_ctx.on.update_status(), state_in) as mgr:
+    with ctx(ctx.on.update_status(), state_in) as mgr:
         with pytest.raises(RollingOpsInvalidLockRequestError, match='Unknown callback_id'):
             mgr.charm.restart_manager.request_async_lock(
                 callback_id=callback_id,
@@ -126,11 +120,11 @@ def test_lock_request_invalid_callback_id(
         ({'x': OperationQueue()}),
     ],
 )
-def test_lock_request_invalid_kwargs(peer_ctx: Context[PeerRollingOpsCharm], kwargs: Any):
+def test_lock_request_invalid_kwargs(ctx: Context[RollingOpsCharm], kwargs: Any):
     peer = PeerRelation(endpoint='restart')
     state_in = State(leader=False, relations={peer})
 
-    with peer_ctx(peer_ctx.on.update_status(), state_in) as mgr:
+    with ctx(ctx.on.update_status(), state_in) as mgr:
         with pytest.raises(
             RollingOpsInvalidLockRequestError, match='Failed to create the lock request'
         ):
@@ -141,7 +135,7 @@ def test_lock_request_invalid_kwargs(peer_ctx: Context[PeerRollingOpsCharm], kwa
             )
 
 
-def test_existing_operation_then_new_request(peer_ctx: Context[PeerRollingOpsCharm]):
+def test_existing_operation_then_new_request(ctx: Context[RollingOpsCharm]):
     queue = _make_operation_queue(callback_id='_failed_restart', kwargs={}, max_retry=3)
     peer = PeerRelation(
         endpoint='restart',
@@ -150,7 +144,7 @@ def test_existing_operation_then_new_request(peer_ctx: Context[PeerRollingOpsCha
 
     state_in = State(leader=False, relations={peer})
 
-    state_out = peer_ctx.run(peer_ctx.on.action('restart', params={'delay': 10}), state_in)
+    state_out = ctx.run(ctx.on.action('restart', params={'delay': 10}), state_in)
 
     databag = _unit_databag(state_out, peer)
     assert databag['state'] == LockIntent.REQUEST
@@ -162,10 +156,10 @@ def test_existing_operation_then_new_request(peer_ctx: Context[PeerRollingOpsCha
 
 
 def test_new_request_does_not_overwrite_state_if_queue_not_empty(
-    peer_ctx: Context[PeerRollingOpsCharm],
+    ctx: Context[RollingOpsCharm],
 ):
     queue = _make_operation_queue(callback_id='_failed_restart', kwargs={}, max_retry=3)
-    executed_at = _now_timestamp_str()
+    executed_at = str(now_timestamp().timestamp())
     peer = PeerRelation(
         endpoint='restart',
         local_unit_data={
@@ -176,7 +170,7 @@ def test_new_request_does_not_overwrite_state_if_queue_not_empty(
     )
     state_in = State(leader=False, relations={peer})
 
-    state_out = peer_ctx.run(peer_ctx.on.action('restart', params={'delay': 10}), state_in)
+    state_out = ctx.run(ctx.on.action('restart', params={'delay': 10}), state_in)
 
     databag = _unit_databag(state_out, peer)
     assert databag['state'] == LockIntent.RETRY_RELEASE
@@ -188,21 +182,22 @@ def test_new_request_does_not_overwrite_state_if_queue_not_empty(
 
 
 def test_relation_changed_without_grant_does_not_run_operation(
-    peer_ctx: Context[PeerRollingOpsCharm],
+    ctx: Context[RollingOpsCharm],
 ):
-    remote_unit_name = f'{peer_ctx.app_name}/1'
+    remote_unit_name = f'{ctx.app_name}/1'
     queue = _make_operation_queue(callback_id='_failed_restart', kwargs={}, max_retry=3)
     peer = PeerRelation(
         endpoint='restart',
         local_unit_data={'state': LockIntent.REQUEST, 'operations': queue.to_string()},
-        local_app_data={'granted_unit': remote_unit_name, 'granted_at': _now_timestamp_str()},
+        local_app_data={
+            'granted_unit': remote_unit_name,
+            'granted_at': str(now_timestamp().timestamp()),
+        },
     )
 
     state_in = State(leader=False, relations={peer})
 
-    state_out = peer_ctx.run(
-        peer_ctx.on.relation_changed(peer, remote_unit=remote_unit_name), state_in
-    )
+    state_out = ctx.run(ctx.on.relation_changed(peer, remote_unit=remote_unit_name), state_in)
 
     databag = _unit_databag(state_out, peer)
     assert databag['state'] == LockIntent.REQUEST
@@ -211,20 +206,21 @@ def test_relation_changed_without_grant_does_not_run_operation(
     assert databag.get('executed_at', '') == ''
 
 
-def test_lock_complete_pops_head(peer_ctx: Context[PeerRollingOpsCharm]):
-    remote_unit_name = f'{peer_ctx.app_name}/1'
-    local_unit_name = f'{peer_ctx.app_name}/0'
+def test_lock_complete_pops_head(ctx: Context[RollingOpsCharm]):
+    remote_unit_name = f'{ctx.app_name}/1'
+    local_unit_name = f'{ctx.app_name}/0'
     queue = _make_operation_queue(callback_id='_restart', kwargs={}, max_retry=0)
     peer = PeerRelation(
         endpoint='restart',
         local_unit_data={'state': LockIntent.REQUEST, 'operations': queue.to_string()},
-        local_app_data={'granted_unit': local_unit_name, 'granted_at': _now_timestamp_str()},
+        local_app_data={
+            'granted_unit': local_unit_name,
+            'granted_at': str(now_timestamp().timestamp()),
+        },
     )
     state_in = State(leader=False, relations={peer})
 
-    state_out = peer_ctx.run(
-        peer_ctx.on.relation_changed(peer, remote_unit=remote_unit_name), state_in
-    )
+    state_out = ctx.run(ctx.on.relation_changed(peer, remote_unit=remote_unit_name), state_in)
 
     databag = _unit_databag(state_out, peer)
     assert databag['state'] == LockIntent.IDLE
@@ -236,25 +232,29 @@ def test_lock_complete_pops_head(peer_ctx: Context[PeerRollingOpsCharm]):
 
 
 def test_successful_operation_leaves_state_request_when_more_ops_remain(
-    peer_ctx: Context[PeerRollingOpsCharm],
+    ctx: Context[RollingOpsCharm],
 ):
-    local_unit_name = f'{peer_ctx.app_name}/0'
-    remote_unit_name = f'{peer_ctx.app_name}/1'
+    local_unit_name = f'{ctx.app_name}/0'
+    remote_unit_name = f'{ctx.app_name}/1'
     queue = OperationQueue()
-    queue.enqueue_lock_request(callback_id='_restart', kwargs={}, max_retry=None)
-    queue.enqueue_lock_request(callback_id='_failed_restart', kwargs={}, max_retry=None)
+    op1 = Operation.create(callback_id='_restart', kwargs={}, max_retry=None)
+    op2 = Operation.create(callback_id='_failed_restart', kwargs={}, max_retry=None)
+
+    queue.enqueue(op1)
+    queue.enqueue(op2)
 
     peer = PeerRelation(
         endpoint='restart',
         local_unit_data={'state': LockIntent.REQUEST, 'operations': queue.to_string()},
-        local_app_data={'granted_unit': local_unit_name, 'granted_at': _now_timestamp_str()},
+        local_app_data={
+            'granted_unit': local_unit_name,
+            'granted_at': str(now_timestamp().timestamp()),
+        },
     )
 
     state_in = State(leader=False, relations={peer})
 
-    state_out = peer_ctx.run(
-        peer_ctx.on.relation_changed(peer, remote_unit=remote_unit_name), state_in
-    )
+    state_out = ctx.run(ctx.on.relation_changed(peer, remote_unit=remote_unit_name), state_in)
 
     databag = _unit_databag(state_out, peer)
     assert databag['state'] == LockIntent.REQUEST
@@ -273,23 +273,24 @@ def test_successful_operation_leaves_state_request_when_more_ops_remain(
     ],
 )
 def test_lock_retry_marks_retry(
-    peer_ctx: Context[PeerRollingOpsCharm],
+    ctx: Context[RollingOpsCharm],
     callback_id: str,
     lock_intent: LockIntent,
 ):
-    remote_unit_name = f'{peer_ctx.app_name}/1'
-    local_unit_name = f'{peer_ctx.app_name}/0'
+    remote_unit_name = f'{ctx.app_name}/1'
+    local_unit_name = f'{ctx.app_name}/0'
     queue = _make_operation_queue(callback_id=callback_id, kwargs={}, max_retry=3)
     peer = PeerRelation(
         endpoint='restart',
         local_unit_data={'state': LockIntent.REQUEST, 'operations': queue.to_string()},
-        local_app_data={'granted_unit': local_unit_name, 'granted_at': _now_timestamp_str()},
+        local_app_data={
+            'granted_unit': local_unit_name,
+            'granted_at': str(now_timestamp().timestamp()),
+        },
     )
     state_in = State(leader=False, relations={peer})
 
-    state_out = peer_ctx.run(
-        peer_ctx.on.relation_changed(peer, remote_unit=remote_unit_name), state_in
-    )
+    state_out = ctx.run(ctx.on.relation_changed(peer, remote_unit=remote_unit_name), state_in)
 
     databag = _unit_databag(state_out, peer)
     assert databag['state'] == lock_intent
@@ -316,14 +317,15 @@ def test_lock_retry_marks_retry(
     ],
 )
 def test_lock_retry_drops_when_max_retry_reached(
-    peer_ctx: Context[PeerRollingOpsCharm],
+    ctx: Context[RollingOpsCharm],
     callback_id: str,
 ):
-    remote_unit_name = f'{peer_ctx.app_name}/1'
-    local_unit_name = f'{peer_ctx.app_name}/0'
+    remote_unit_name = f'{ctx.app_name}/1'
+    local_unit_name = f'{ctx.app_name}/0'
 
     queue = OperationQueue()
-    queue.enqueue_lock_request(callback_id=callback_id, kwargs={}, max_retry=3)
+    op1 = Operation.create(callback_id=callback_id, kwargs={}, max_retry=3)
+    queue.enqueue(op1)
     op = queue.peek()
     assert op is not None
     op.increase_attempt()
@@ -333,13 +335,14 @@ def test_lock_retry_drops_when_max_retry_reached(
     peer = PeerRelation(
         endpoint='restart',
         local_unit_data={'state': LockIntent.REQUEST, 'operations': queue.to_string()},
-        local_app_data={'granted_unit': local_unit_name, 'granted_at': _now_timestamp_str()},
+        local_app_data={
+            'granted_unit': local_unit_name,
+            'granted_at': str(now_timestamp().timestamp()),
+        },
     )
     state_in = State(leader=False, relations={peer})
 
-    state_out = peer_ctx.run(
-        peer_ctx.on.relation_changed(peer, remote_unit=remote_unit_name), state_in
-    )
+    state_out = ctx.run(ctx.on.relation_changed(peer, remote_unit=remote_unit_name), state_in)
 
     databag = _unit_databag(state_out, peer)
     assert databag['state'] == LockIntent.IDLE
@@ -349,7 +352,11 @@ def test_lock_retry_drops_when_max_retry_reached(
     assert len(q) == 0
 
 
-def test_lock_grant_and_release(peer_ctx: Context[PeerRollingOpsCharm]):
+def test_lock_grant_and_release(
+    certificates_manager_patches: dict[str, MagicMock],
+    etcdctl_patch: MagicMock,
+    ctx: Context[RollingOpsCharm],
+):
     queue = _make_operation_queue(callback_id='_failed_restart', kwargs={}, max_retry=3)
     peer = PeerRelation(
         endpoint='restart',
@@ -357,41 +364,47 @@ def test_lock_grant_and_release(peer_ctx: Context[PeerRollingOpsCharm]):
     )
     state_in = State(leader=True, relations={peer})
 
-    state = peer_ctx.run(peer_ctx.on.leader_elected(), state_in)
+    state = ctx.run(ctx.on.leader_elected(), state_in)
     databag = _app_databag(state, peer)
 
-    unit_name = f'{peer_ctx.app_name}/1'
+    unit_name = f'{ctx.app_name}/1'
     assert unit_name in databag['granted_unit']
     assert databag['granted_at'] is not None
 
 
-def test_scheduling_does_nothing_if_lock_already_granted(peer_ctx: Context[PeerRollingOpsCharm]):
+def test_scheduling_does_nothing_if_lock_already_granted(
+    certificates_manager_patches: dict[str, MagicMock],
+    etcdctl_patch: MagicMock,
+    ctx: Context[RollingOpsCharm],
+):
     queue = _make_operation_queue(callback_id='_failed_restart', kwargs={}, max_retry=3)
-    remote_unit_name = f'{peer_ctx.app_name}/1'
-    now_timestamp = _now_timestamp_str()
+    remote_unit_name = f'{ctx.app_name}/1'
+    now_timestamp_str = str(now_timestamp().timestamp())
     peer = PeerRelation(
         endpoint='restart',
         peers_data={
             1: {'state': LockIntent.REQUEST, 'operations': queue.to_string()},
             2: {'state': LockIntent.REQUEST, 'operations': queue.to_string()},
         },
-        local_app_data={'granted_unit': remote_unit_name, 'granted_at': now_timestamp},
+        local_app_data={'granted_unit': remote_unit_name, 'granted_at': now_timestamp_str},
     )
     state_in = State(leader=True, relations={peer})
 
-    state_out = peer_ctx.run(
-        peer_ctx.on.relation_changed(peer, remote_unit=remote_unit_name), state_in
-    )
+    state_out = ctx.run(ctx.on.relation_changed(peer, remote_unit=remote_unit_name), state_in)
 
     databag = _app_databag(state_out, peer)
     assert databag['granted_unit'] == remote_unit_name
-    assert databag['granted_at'] == now_timestamp
+    assert databag['granted_at'] == now_timestamp_str
 
 
-def test_schedule_picks_retry_hold(peer_ctx: Context[PeerRollingOpsCharm]):
-    old_operation = _now_timestamp_str()
+def test_schedule_picks_retry_hold(
+    certificates_manager_patches: dict[str, MagicMock],
+    etcdctl_patch: MagicMock,
+    ctx: Context[RollingOpsCharm],
+):
+    old_operation = str(now_timestamp().timestamp())
     queue = _make_operation_queue(callback_id='_failed_restart', kwargs={}, max_retry=3)
-    new_operation = _now_timestamp_str()
+    new_operation = str(now_timestamp().timestamp())
 
     peer = PeerRelation(
         endpoint='restart',
@@ -415,19 +428,25 @@ def test_schedule_picks_retry_hold(peer_ctx: Context[PeerRollingOpsCharm]):
     )
     state_in = State(leader=True, relations={peer})
 
-    state_out = peer_ctx.run(peer_ctx.on.leader_elected(), state_in)
+    state_out = ctx.run(ctx.on.leader_elected(), state_in)
 
     databag = _app_databag(state_out, peer)
-    remote_unit_name = f'{peer_ctx.app_name}/3'
+    remote_unit_name = f'{ctx.app_name}/3'
     assert databag['granted_unit'] == remote_unit_name
 
 
-def test_schedule_picks_oldest_requested_at_among_requests(peer_ctx: Context[PeerRollingOpsCharm]):
+def test_schedule_picks_oldest_requested_at_among_requests(
+    certificates_manager_patches: dict[str, MagicMock],
+    etcdctl_patch: MagicMock,
+    ctx: Context[RollingOpsCharm],
+):
     old_queue = OperationQueue()
-    old_queue.enqueue_lock_request(callback_id='restart', kwargs={}, max_retry=2)
+    old_op = Operation.create(callback_id='restart', kwargs={}, max_retry=2)
+    old_queue.enqueue(old_op)
 
     new_queue = OperationQueue()
-    new_queue.enqueue_lock_request(callback_id='restart', kwargs={}, max_retry=2)
+    new_op = Operation.create(callback_id='restart', kwargs={}, max_retry=2)
+    new_queue.enqueue(new_op)
 
     peer = PeerRelation(
         endpoint='restart',
@@ -438,18 +457,20 @@ def test_schedule_picks_oldest_requested_at_among_requests(peer_ctx: Context[Pee
     )
     state_in = State(leader=True, relations={peer})
 
-    state_out = peer_ctx.run(peer_ctx.on.leader_elected(), state_in)
+    state_out = ctx.run(ctx.on.leader_elected(), state_in)
     databag = _app_databag(state_out, peer)
-    remote_unit_name = f'{peer_ctx.app_name}/2'
+    remote_unit_name = f'{ctx.app_name}/2'
     assert databag['granted_unit'] == remote_unit_name
 
 
 def test_schedule_picks_oldest_executed_at_among_retries_when_no_requests(
-    peer_ctx: Context[PeerRollingOpsCharm],
+    certificates_manager_patches: dict[str, MagicMock],
+    etcdctl_patch: MagicMock,
+    ctx: Context[RollingOpsCharm],
 ):
-    old_operation = _now_timestamp_str()
+    old_operation = str(now_timestamp().timestamp())
     queue = _make_operation_queue(callback_id='_failed_restart', kwargs={}, max_retry=3)
-    new_operation = _now_timestamp_str()
+    new_operation = str(now_timestamp().timestamp())
 
     peer = PeerRelation(
         endpoint='restart',
@@ -468,14 +489,18 @@ def test_schedule_picks_oldest_executed_at_among_retries_when_no_requests(
     )
     state_in = State(leader=True, relations={peer})
 
-    state_out = peer_ctx.run(peer_ctx.on.leader_elected(), state_in)
+    state_out = ctx.run(ctx.on.leader_elected(), state_in)
 
     databag = _app_databag(state_out, peer)
-    remote_unit_name = f'{peer_ctx.app_name}/2'
+    remote_unit_name = f'{ctx.app_name}/2'
     assert databag['granted_unit'] == remote_unit_name
 
 
-def test_schedule_prioritizes_requests_over_retries(peer_ctx: Context[PeerRollingOpsCharm]):
+def test_schedule_prioritizes_requests_over_retries(
+    certificates_manager_patches: dict[str, MagicMock],
+    etcdctl_patch: MagicMock,
+    ctx: Context[RollingOpsCharm],
+):
     queue = _make_operation_queue(callback_id='_failed_restart', kwargs={}, max_retry=3)
 
     peer = PeerRelation(
@@ -484,28 +509,32 @@ def test_schedule_prioritizes_requests_over_retries(peer_ctx: Context[PeerRollin
             1: {
                 'state': LockIntent.RETRY_RELEASE,
                 'operations': queue.to_string(),
-                'executed_at': _now_timestamp_str(),
+                'executed_at': str(now_timestamp().timestamp()),
             },
             2: {'state': LockIntent.REQUEST, 'operations': queue.to_string()},
         },
     )
     state_in = State(leader=True, relations={peer})
 
-    state_out = peer_ctx.run(peer_ctx.on.leader_elected(), state_in)
+    state_out = ctx.run(ctx.on.leader_elected(), state_in)
 
     databag = _app_databag(state_out, peer)
-    remote_unit_name = f'{peer_ctx.app_name}/2'
+    remote_unit_name = f'{ctx.app_name}/2'
     assert databag['granted_unit'] == remote_unit_name
 
 
-def test_no_unit_is_granted_if_there_are_no_requests(peer_ctx: Context[PeerRollingOpsCharm]):
+def test_no_unit_is_granted_if_there_are_no_requests(
+    certificates_manager_patches: dict[str, MagicMock],
+    etcdctl_patch: MagicMock,
+    ctx: Context[RollingOpsCharm],
+):
     peer = PeerRelation(
         endpoint='restart',
         peers_data={1: {'state': LockIntent.IDLE}, 2: {'state': LockIntent.IDLE}},
     )
     state_in = State(leader=True, relations={peer})
 
-    state_out = peer_ctx.run(peer_ctx.on.leader_elected(), state_in)
+    state_out = ctx.run(ctx.on.leader_elected(), state_in)
 
     databag = _app_databag(state_out, peer)
     assert databag.get('granted_unit', '') == ''
diff --git a/rollingops/uv.lock b/rollingops/uv.lock
index e6833c191..b5d0d3651 100644
--- a/rollingops/uv.lock
+++ b/rollingops/uv.lock
@@ -114,6 +114,8 @@ dependencies = [
     { name = "charmlibs-pathops" },
     { name = "dpcharmlibs-interfaces" },
     { name = "ops" },
+    { name = "pydantic" },
+    { name = "shortuuid" },
     { name = "tenacity" },
 ]
 
@@ -133,6 +135,8 @@ requires-dist = [
     { name = "charmlibs-pathops", specifier = ">=1.2.1" },
     { name = "dpcharmlibs-interfaces", specifier = "==1.0.0" },
     { name = "ops" },
+    { name = "pydantic", specifier = ">=2.12.5" },
+    { name = "shortuuid", specifier = ">=1.0.13" },
     { name = "tenacity" },
 ]
 
@@ -423,6 +427,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
 ]
 
+[[package]]
+name = "shortuuid"
+version = "1.0.13"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8c/e2/bcf761f3bff95856203f9559baf3741c416071dd200c0fc19fad7f078f86/shortuuid-1.0.13.tar.gz", hash = "sha256:3bb9cf07f606260584b1df46399c0b87dd84773e7b25912b7e391e30797c5e72", size = 9662, upload-time = "2024-03-11T20:11:06.879Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c0/44/21d6bf170bf40b41396480d8d49ad640bca3f2b02139cd52aa1e272830a5/shortuuid-1.0.13-py3-none-any.whl", hash = "sha256:a482a497300b49b4953e15108a7913244e1bb0d41f9d332f5e9925dba33a3c5a", size = 10529, upload-time = "2024-03-11T20:11:04.807Z" },
+]
+
 [[package]]
 name = "tenacity"
 version = "9.1.4"